contrib/kube-prometheus: Run jb update and generate all manifests
This commit is contained in:
@@ -8,7 +8,7 @@
|
|||||||
"subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus"
|
"subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "ce4ab08d6791161267204d9a61588e64f1b57e05"
|
"version": "00c64bc438d2acf9c808388fe1e5d733e92b0c3b"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "ksonnet",
|
"name": "ksonnet",
|
||||||
@@ -28,7 +28,7 @@
|
|||||||
"subdir": ""
|
"subdir": ""
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "d445c4d98fdf88fd3c59bb34ca4b0f82536f878c"
|
"version": "c70814dcafce1b51357938e09ee1192998a95706"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "grafonnet",
|
"name": "grafonnet",
|
||||||
@@ -78,7 +78,7 @@
|
|||||||
"subdir": "Documentation/etcd-mixin"
|
"subdir": "Documentation/etcd-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "1df1ddff4361ed7f2c0f33571923511889a115ce"
|
"version": "a7b1306ecfefeabe48286403b260513786289922"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4707,7 +4707,7 @@ items:
|
|||||||
"step": 10
|
"step": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"expr": "sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=\"$pod\", container_name!=\"\"}) by (container)",
|
"expr": "sum(kube_pod_container_resource_limits_memory_bytes{namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)",
|
||||||
"format": "table",
|
"format": "table",
|
||||||
"instant": true,
|
"instant": true,
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
|
|||||||
@@ -838,7 +838,7 @@ spec:
|
|||||||
the limit of 110.
|
the limit of 110.
|
||||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
|
||||||
expr: |
|
expr: |
|
||||||
kubelet_running_pod_count{job="kubelet"} > 100
|
kubelet_running_pod_count{job="kubelet"} > 110 * 0.9
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
@@ -914,8 +914,8 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: AlertmanagerDownOrMissing
|
- alert: AlertmanagerDownOrMissing
|
||||||
annotations:
|
annotations:
|
||||||
description: An unexpected number of Alertmanagers are scraped or Alertmanagers
|
description: An unexpected number of Alertmanagers were scraped or disappeared
|
||||||
disappeared from discovery.
|
from discovery.
|
||||||
summary: Alertmanager down or missing
|
summary: Alertmanager down or missing
|
||||||
expr: |
|
expr: |
|
||||||
label_replace(prometheus_operator_alertmanager_spec_replicas{job="prometheus-operator"}, "job", "alertmanager-$1", "alertmanager", "(.*)") / ON(job) GROUP_RIGHT() sum(up{job="alertmanager-main"}) BY (job) != 1
|
label_replace(prometheus_operator_alertmanager_spec_replicas{job="prometheus-operator"}, "job", "alertmanager-$1", "alertmanager", "(.*)") / ON(job) GROUP_RIGHT() sum(up{job="alertmanager-main"}) BY (job) != 1
|
||||||
@@ -936,7 +936,7 @@ spec:
|
|||||||
rules:
|
rules:
|
||||||
- alert: TargetDown
|
- alert: TargetDown
|
||||||
annotations:
|
annotations:
|
||||||
description: '{{ $value }}% of {{ $labels.job }} targets are down.'
|
description: '{{ $value }}% of the {{ $labels.job }} targets are down.'
|
||||||
summary: Targets are down
|
summary: Targets are down
|
||||||
expr: 100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10
|
expr: 100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10
|
||||||
for: 10m
|
for: 10m
|
||||||
@@ -944,7 +944,7 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: DeadMansSwitch
|
- alert: DeadMansSwitch
|
||||||
annotations:
|
annotations:
|
||||||
description: This is a DeadMansSwitch meant to ensure that the entire Alerting
|
description: This is a DeadMansSwitch meant to ensure that the entire alerting
|
||||||
pipeline is functional.
|
pipeline is functional.
|
||||||
summary: Alerting DeadMansSwitch
|
summary: Alerting DeadMansSwitch
|
||||||
expr: vector(1)
|
expr: vector(1)
|
||||||
@@ -955,7 +955,7 @@ spec:
|
|||||||
- alert: NodeDiskRunningFull
|
- alert: NodeDiskRunningFull
|
||||||
annotations:
|
annotations:
|
||||||
message: Device {{ $labels.device }} of node-exporter {{ $labels.namespace
|
message: Device {{ $labels.device }} of node-exporter {{ $labels.namespace
|
||||||
}}/{{ $labels.pod }} is running full within the next 24 hours.
|
}}/{{ $labels.pod }} will be full within the next 24 hours.
|
||||||
expr: |
|
expr: |
|
||||||
(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[6h], 3600 * 24) < 0)
|
(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[6h], 3600 * 24) < 0)
|
||||||
for: 30m
|
for: 30m
|
||||||
@@ -964,7 +964,7 @@ spec:
|
|||||||
- alert: NodeDiskRunningFull
|
- alert: NodeDiskRunningFull
|
||||||
annotations:
|
annotations:
|
||||||
message: Device {{ $labels.device }} of node-exporter {{ $labels.namespace
|
message: Device {{ $labels.device }} of node-exporter {{ $labels.namespace
|
||||||
}}/{{ $labels.pod }} is running full within the next 2 hours.
|
}}/{{ $labels.pod }} will be full within the next 2 hours.
|
||||||
expr: |
|
expr: |
|
||||||
(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[30m], 3600 * 2) < 0)
|
(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[30m], 3600 * 2) < 0)
|
||||||
for: 10m
|
for: 10m
|
||||||
@@ -1071,3 +1071,29 @@ spec:
|
|||||||
for: 10m
|
for: 10m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
|
- name: prometheus-operator
|
||||||
|
rules:
|
||||||
|
- alert: PrometheusOperatorAlertmanagerReconcileErrors
|
||||||
|
annotations:
|
||||||
|
message: Errors while reconciling Alertmanager in {{$labels.namespace}} namespace.
|
||||||
|
expr: |
|
||||||
|
rate(prometheus_operator_alertmanager_reconcile_errors_total{job="prometheus-operator"}[5m]) > 0.01
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: PrometheusOperatorPrometheusReconcileErrors
|
||||||
|
annotations:
|
||||||
|
message: Errors while reconciling Prometheus in {{$labels.namespace}} namespace.
|
||||||
|
expr: |
|
||||||
|
rate(prometheus_operator_prometheus_reconcile_errors_total{job="prometheus-operator"}[5m]) > 0.01
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: PrometheusOperatorNodeLookupErrors
|
||||||
|
annotations:
|
||||||
|
message: Errors while reconciling Prometheus in {{$labels.namespace}} namespace.
|
||||||
|
expr: |
|
||||||
|
rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator"}[5m]) > 0.01
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
|||||||
Reference in New Issue
Block a user