manifests: regenerate

This commit is contained in:
paulfantom
2021-01-21 10:34:21 +01:00
parent 6d8e56727c
commit 9bba0e2a2a
8 changed files with 241 additions and 77 deletions

View File

@@ -55,17 +55,31 @@ spec:
- alert: AlertmanagerClusterFailedToSendAlerts
annotations:
description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}.
summary: All Alertmanager instances in a cluster failed to send notifications.
summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration.
expr: |
min by (namespace,service) (
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring"}[5m])
min by (namespace,service, integration) (
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m])
/
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring"}[5m])
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m])
)
> 0.01
for: 5m
labels:
severity: critical
- alert: AlertmanagerClusterFailedToSendAlerts
annotations:
description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}.
summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration.
expr: |
min by (namespace,service, integration) (
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m])
/
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m])
)
> 0.01
for: 5m
labels:
severity: warning
- alert: AlertmanagerConfigInconsistent
annotations:
description: Alertmanager instances within the {{$labels.job}} cluster have different configurations.

View File

@@ -3,6 +3,11 @@ data:
datasources.yaml: ewogICAgImFwaVZlcnNpb24iOiAxLAogICAgImRhdGFzb3VyY2VzIjogWwogICAgICAgIHsKICAgICAgICAgICAgImFjY2VzcyI6ICJwcm94eSIsCiAgICAgICAgICAgICJlZGl0YWJsZSI6IGZhbHNlLAogICAgICAgICAgICAibmFtZSI6ICJwcm9tZXRoZXVzIiwKICAgICAgICAgICAgIm9yZ0lkIjogMSwKICAgICAgICAgICAgInR5cGUiOiAicHJvbWV0aGV1cyIsCiAgICAgICAgICAgICJ1cmwiOiAiaHR0cDovL3Byb21ldGhldXMtazhzLm1vbml0b3Jpbmcuc3ZjOjkwOTAiLAogICAgICAgICAgICAidmVyc2lvbiI6IDEKICAgICAgICB9CiAgICBdCn0=
kind: Secret
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-datasources
namespace: monitoring
type: Opaque

View File

@@ -1729,6 +1729,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-apiserver
namespace: monitoring
- apiVersion: v1
@@ -3595,6 +3600,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-cluster-total
namespace: monitoring
- apiVersion: v1
@@ -4730,6 +4740,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-controller-manager
namespace: monitoring
- apiVersion: v1
@@ -7296,6 +7311,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-k8s-resources-cluster
namespace: monitoring
- apiVersion: v1
@@ -9566,6 +9586,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-k8s-resources-namespace
namespace: monitoring
- apiVersion: v1
@@ -10528,6 +10553,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-k8s-resources-node
namespace: monitoring
- apiVersion: v1
@@ -12284,6 +12314,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-k8s-resources-pod
namespace: monitoring
- apiVersion: v1
@@ -14302,6 +14337,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-k8s-resources-workload
namespace: monitoring
- apiVersion: v1
@@ -16481,6 +16521,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-k8s-resources-workloads-namespace
namespace: monitoring
- apiVersion: v1
@@ -18998,6 +19043,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-kubelet
namespace: monitoring
- apiVersion: v1
@@ -20446,6 +20496,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-namespace-by-pod
namespace: monitoring
- apiVersion: v1
@@ -22166,6 +22221,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-namespace-by-workload
namespace: monitoring
- apiVersion: v1
@@ -23114,6 +23174,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-node-cluster-rsrc-use
namespace: monitoring
- apiVersion: v1
@@ -24089,6 +24154,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-node-rsrc-use
namespace: monitoring
- apiVersion: v1
@@ -25070,6 +25140,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-nodes
namespace: monitoring
- apiVersion: v1
@@ -25262,7 +25337,7 @@ items:
"tableColumn": "",
"targets": [
{
"expr": "(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n",
"expr": "max without(instance,node) (\n(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
@@ -25459,7 +25534,7 @@ items:
"tableColumn": "",
"targets": [
{
"expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n",
"expr": "max without(instance,node) (\nkubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "",
@@ -25631,6 +25706,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-persistentvolumesusage
namespace: monitoring
- apiVersion: v1
@@ -26843,6 +26923,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-pod-total
namespace: monitoring
- apiVersion: v1
@@ -26868,7 +26953,7 @@ items:
"links": [
],
"refresh": "",
"refresh": "60s",
"rows": [
{
"collapse": false,
@@ -27119,7 +27204,7 @@ items:
"steppedLine": false,
"targets": [
{
"expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(remote_name, url) group_right(instance) rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n",
"expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(remote_name, url) group_right(instance) (rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]))\n- \n (rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]))\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
@@ -27704,7 +27789,7 @@ items:
"steppedLine": false,
"targets": [
{
"expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"}",
"expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"} or prometheus_remote_storage_samples_pending{cluster=~\"$cluster\", instance=~\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
@@ -28009,7 +28094,7 @@ items:
"steppedLine": false,
"targets": [
{
"expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
"expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
@@ -28102,7 +28187,7 @@ items:
"steppedLine": false,
"targets": [
{
"expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
"expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
@@ -28195,7 +28280,7 @@ items:
"steppedLine": false,
"targets": [
{
"expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
"expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_retried_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
@@ -28348,7 +28433,7 @@ items:
"schemaVersion": 14,
"style": "dark",
"tags": [
"prometheus-mixin"
],
"templating": {
"list": [
@@ -28492,11 +28577,16 @@ items:
]
},
"timezone": "browser",
"title": "Prometheus Remote Write",
"title": "Prometheus / Remote Write",
"version": 0
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-prometheus-remote-write
namespace: monitoring
- apiVersion: v1
@@ -28515,7 +28605,7 @@ items:
"links": [
],
"refresh": "10s",
"refresh": "60s",
"rows": [
{
"collapse": false,
@@ -29594,7 +29684,7 @@ items:
"schemaVersion": 14,
"style": "dark",
"tags": [
"prometheus-mixin"
],
"templating": {
"list": [
@@ -29702,12 +29792,17 @@ items:
]
},
"timezone": "utc",
"title": "Prometheus Overview",
"title": "Prometheus / Overview",
"uid": "",
"version": 0
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-prometheus
namespace: monitoring
- apiVersion: v1
@@ -30923,6 +31018,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-proxy
namespace: monitoring
- apiVersion: v1
@@ -31981,6 +32081,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-scheduler
namespace: monitoring
- apiVersion: v1
@@ -32893,6 +32998,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-statefulset
namespace: monitoring
- apiVersion: v1
@@ -34315,6 +34425,11 @@ items:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboard-workload-total
namespace: monitoring
kind: ConfigMapList

View File

@@ -17,5 +17,10 @@ data:
}
kind: ConfigMap
metadata:
labels:
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana-dashboards
namespace: monitoring

View File

@@ -2,21 +2,28 @@ apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: grafana
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
app: grafana
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
template:
metadata:
annotations:
checksum/grafana-dashboards: b02ae450c84445cbaca8c685eefaec6c
checksum/grafana-datasources: 48faab41f579fc8efde6034391496f6a
checksum/grafana-datasources: a77789e5440a1e51e204e99e2f0f480a
labels:
app: grafana
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
spec:
containers:
- env: []

View File

@@ -2,7 +2,10 @@ apiVersion: v1
kind: Service
metadata:
labels:
app: grafana
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 7.3.5
name: grafana
namespace: monitoring
spec:
@@ -11,5 +14,7 @@ spec:
port: 3000
targetPort: http
selector:
app: grafana
app.kubernetes.io/component: grafana
app.kubernetes.io/name: grafana
app.kubernetes.io/part-of: kube-prometheus
type: NodePort

View File

@@ -15,11 +15,11 @@ spec:
rules:
- alert: KubePodCrashLooping
annotations:
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting {{ printf "%.2f" $value }} times / 10 minutes.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubepodcrashlooping
summary: Pod is crash looping.
expr: |
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[5m]) * 60 * 5 > 0
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[10m]) * 60 * 5 > 0
for: 15m
labels:
severity: warning
@@ -499,11 +499,11 @@ spec:
severity: critical
- alert: AggregatedAPIErrors
annotations:
description: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. The number of errors have increased for it in the past five minutes. High values indicate that the availability of the service changes too often.
description: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/aggregatedapierrors
summary: An aggregated API has reported errors.
expr: |
sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2
sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[10m])) > 4
labels:
severity: warning
- alert: AggregatedAPIDown
@@ -526,6 +526,16 @@ spec:
for: 15m
labels:
severity: critical
- alert: KubeAPITerminatedRequests
annotations:
description: The apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeapiterminatedrequests
summary: The apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.
expr: |
sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / ( sum(rate(apiserver_request_total{job="apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20
for: 5m
labels:
severity: warning
- name: kubernetes-system-kubelet
rules:
- alert: KubeNodeNotReady
@@ -1102,77 +1112,80 @@ spec:
verb: write
record: apiserver_request:availability30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[30d]))
avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30
record: code_verb:apiserver_request_total:increase30d
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[30d]))
record: code_verb:apiserver_request_total:increase30d
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[1h]))
record: code_verb:apiserver_request_total:increase1h
- expr: |
sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
labels:

View File

@@ -202,9 +202,9 @@ spec:
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
expr: |
min without (alertmanager) (
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring",alertmanager!~``}[5m])
/
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring",alertmanager!~``}[5m])
)
* 100
> 3