docs, manifests: Regenerate files
This commit is contained in:
@@ -252,7 +252,8 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
|||||||
_config+:: {
|
_config+:: {
|
||||||
namespace: 'monitoring',
|
namespace: 'monitoring',
|
||||||
},
|
},
|
||||||
grafanaDashboards+:: {
|
grafana+:: {
|
||||||
|
dashboards+:: {
|
||||||
'my-dashboard.json':
|
'my-dashboard.json':
|
||||||
dashboard.new('My Dashboard')
|
dashboard.new('My Dashboard')
|
||||||
.addTemplate(
|
.addTemplate(
|
||||||
@@ -277,6 +278,7 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
|||||||
.addTarget(prometheus.target('vector(1)')))
|
.addTarget(prometheus.target('vector(1)')))
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||||
@@ -298,9 +300,14 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
|||||||
_config+:: {
|
_config+:: {
|
||||||
namespace: 'monitoring',
|
namespace: 'monitoring',
|
||||||
},
|
},
|
||||||
grafanaDashboards+:: {
|
grafanaDashboards+:: { // monitoring-mixin compatibility
|
||||||
'my-dashboard.json': (import 'example-grafana-dashboard.json'),
|
'my-dashboard.json': (import 'example-grafana-dashboard.json'),
|
||||||
},
|
},
|
||||||
|
grafana+:: {
|
||||||
|
dashboards+:: { // use this method to import your dashboards to Grafana
|
||||||
|
'my-dashboard.json': (import 'example-grafana-dashboard.json'),
|
||||||
|
},
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||||
@@ -319,9 +326,11 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
|||||||
_config+:: {
|
_config+:: {
|
||||||
namespace: 'monitoring',
|
namespace: 'monitoring',
|
||||||
},
|
},
|
||||||
rawGrafanaDashboards+:: {
|
grafana+:: {
|
||||||
|
rawDashboards+:: {
|
||||||
'my-dashboard.json': (importstr 'example-grafana-dashboard.json'),
|
'my-dashboard.json': (importstr 'example-grafana-dashboard.json'),
|
||||||
},
|
},
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -4,7 +4,7 @@ metadata:
|
|||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.39.0
|
app.kubernetes.io/version: v0.40.0
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
@@ -19,4 +19,4 @@ spec:
|
|||||||
matchLabels:
|
matchLabels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.39.0
|
app.kubernetes.io/version: v0.40.0
|
||||||
|
|||||||
@@ -74,7 +74,7 @@ spec:
|
|||||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d]))
|
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d]))
|
||||||
-
|
-
|
||||||
(
|
(
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[1d])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1d])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d]))
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d]))
|
||||||
)
|
)
|
||||||
@@ -95,7 +95,7 @@ spec:
|
|||||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h]))
|
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h]))
|
||||||
-
|
-
|
||||||
(
|
(
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[1h])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1h])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h]))
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h]))
|
||||||
)
|
)
|
||||||
@@ -116,7 +116,7 @@ spec:
|
|||||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h]))
|
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h]))
|
||||||
-
|
-
|
||||||
(
|
(
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[2h])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[2h])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h]))
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h]))
|
||||||
)
|
)
|
||||||
@@ -137,7 +137,7 @@ spec:
|
|||||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m]))
|
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m]))
|
||||||
-
|
-
|
||||||
(
|
(
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[30m])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30m])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m]))
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m]))
|
||||||
)
|
)
|
||||||
@@ -158,7 +158,7 @@ spec:
|
|||||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d]))
|
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d]))
|
||||||
-
|
-
|
||||||
(
|
(
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[3d])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[3d])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d]))
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d]))
|
||||||
)
|
)
|
||||||
@@ -179,7 +179,7 @@ spec:
|
|||||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m]))
|
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m]))
|
||||||
-
|
-
|
||||||
(
|
(
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[5m])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[5m])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m]))
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m]))
|
||||||
)
|
)
|
||||||
@@ -200,7 +200,7 @@ spec:
|
|||||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h]))
|
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h]))
|
||||||
-
|
-
|
||||||
(
|
(
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[6h])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[6h])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h]))
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h]))
|
||||||
)
|
)
|
||||||
@@ -326,81 +326,6 @@ spec:
|
|||||||
labels:
|
labels:
|
||||||
verb: write
|
verb: write
|
||||||
record: apiserver_request:burnrate6h
|
record: apiserver_request:burnrate6h
|
||||||
- expr: |
|
|
||||||
1 - (
|
|
||||||
(
|
|
||||||
# write too slow
|
|
||||||
sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
|
|
||||||
-
|
|
||||||
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
|
|
||||||
) +
|
|
||||||
(
|
|
||||||
# read too slow
|
|
||||||
sum(increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d]))
|
|
||||||
-
|
|
||||||
(
|
|
||||||
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="resource",le="0.1"}[30d])) +
|
|
||||||
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) +
|
|
||||||
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
|
|
||||||
)
|
|
||||||
) +
|
|
||||||
# errors
|
|
||||||
sum(code:apiserver_request_total:increase30d{code=~"5.."})
|
|
||||||
)
|
|
||||||
/
|
|
||||||
sum(code:apiserver_request_total:increase30d)
|
|
||||||
labels:
|
|
||||||
verb: all
|
|
||||||
record: apiserver_request:availability30d
|
|
||||||
- expr: |
|
|
||||||
1 - (
|
|
||||||
sum(increase(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30d]))
|
|
||||||
-
|
|
||||||
(
|
|
||||||
# too slow
|
|
||||||
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[30d])) +
|
|
||||||
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) +
|
|
||||||
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
|
|
||||||
)
|
|
||||||
+
|
|
||||||
# errors
|
|
||||||
sum(code:apiserver_request_total:increase30d{verb="read",code=~"5.."})
|
|
||||||
)
|
|
||||||
/
|
|
||||||
sum(code:apiserver_request_total:increase30d{verb="read"})
|
|
||||||
labels:
|
|
||||||
verb: read
|
|
||||||
record: apiserver_request:availability30d
|
|
||||||
- expr: |
|
|
||||||
1 - (
|
|
||||||
(
|
|
||||||
# too slow
|
|
||||||
sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
|
|
||||||
-
|
|
||||||
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
|
|
||||||
)
|
|
||||||
+
|
|
||||||
# errors
|
|
||||||
sum(code:apiserver_request_total:increase30d{verb="write",code=~"5.."})
|
|
||||||
)
|
|
||||||
/
|
|
||||||
sum(code:apiserver_request_total:increase30d{verb="write"})
|
|
||||||
labels:
|
|
||||||
verb: write
|
|
||||||
record: apiserver_request:availability30d
|
|
||||||
- expr: |
|
|
||||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver"}[30d]))
|
|
||||||
record: code_verb:apiserver_request_total:increase30d
|
|
||||||
- expr: |
|
|
||||||
sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
|
|
||||||
labels:
|
|
||||||
verb: read
|
|
||||||
record: code:apiserver_request_total:increase30d
|
|
||||||
- expr: |
|
|
||||||
sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
|
|
||||||
labels:
|
|
||||||
verb: write
|
|
||||||
record: code:apiserver_request_total:increase30d
|
|
||||||
- expr: |
|
- expr: |
|
||||||
sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
|
sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
|
||||||
labels:
|
labels:
|
||||||
@@ -443,6 +368,153 @@ spec:
|
|||||||
labels:
|
labels:
|
||||||
quantile: "0.5"
|
quantile: "0.5"
|
||||||
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
||||||
|
- interval: 3m
|
||||||
|
name: kube-apiserver-availability.rules
|
||||||
|
rules:
|
||||||
|
- expr: |
|
||||||
|
1 - (
|
||||||
|
(
|
||||||
|
# write too slow
|
||||||
|
sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
|
||||||
|
-
|
||||||
|
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
|
||||||
|
) +
|
||||||
|
(
|
||||||
|
# read too slow
|
||||||
|
sum(increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d]))
|
||||||
|
-
|
||||||
|
(
|
||||||
|
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) +
|
||||||
|
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) +
|
||||||
|
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
|
||||||
|
)
|
||||||
|
) +
|
||||||
|
# errors
|
||||||
|
sum(code:apiserver_request_total:increase30d{code=~"5.."} or vector(0))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
sum(code:apiserver_request_total:increase30d)
|
||||||
|
labels:
|
||||||
|
verb: all
|
||||||
|
record: apiserver_request:availability30d
|
||||||
|
- expr: |
|
||||||
|
1 - (
|
||||||
|
sum(increase(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30d]))
|
||||||
|
-
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) +
|
||||||
|
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) +
|
||||||
|
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
|
||||||
|
)
|
||||||
|
+
|
||||||
|
# errors
|
||||||
|
sum(code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
sum(code:apiserver_request_total:increase30d{verb="read"})
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
record: apiserver_request:availability30d
|
||||||
|
- expr: |
|
||||||
|
1 - (
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
|
||||||
|
-
|
||||||
|
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
|
||||||
|
)
|
||||||
|
+
|
||||||
|
# errors
|
||||||
|
sum(code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
sum(code:apiserver_request_total:increase30d{verb="write"})
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
record: apiserver_request:availability30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
record: code:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
record: code:apiserver_request_total:increase30d
|
||||||
- name: k8s.rules
|
- name: k8s.rules
|
||||||
rules:
|
rules:
|
||||||
- expr: |
|
- expr: |
|
||||||
@@ -452,31 +524,31 @@ spec:
|
|||||||
sum by (cluster, namespace, pod, container) (
|
sum by (cluster, namespace, pod, container) (
|
||||||
rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])
|
rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])
|
||||||
) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (
|
) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (
|
||||||
1, max by(cluster, namespace, pod, node) (kube_pod_info)
|
1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
|
||||||
)
|
)
|
||||||
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
|
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
|
||||||
- expr: |
|
- expr: |
|
||||||
container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
||||||
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||||
max by(namespace, pod, node) (kube_pod_info)
|
max by(namespace, pod, node) (kube_pod_info{node!=""})
|
||||||
)
|
)
|
||||||
record: node_namespace_pod_container:container_memory_working_set_bytes
|
record: node_namespace_pod_container:container_memory_working_set_bytes
|
||||||
- expr: |
|
- expr: |
|
||||||
container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
||||||
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||||
max by(namespace, pod, node) (kube_pod_info)
|
max by(namespace, pod, node) (kube_pod_info{node!=""})
|
||||||
)
|
)
|
||||||
record: node_namespace_pod_container:container_memory_rss
|
record: node_namespace_pod_container:container_memory_rss
|
||||||
- expr: |
|
- expr: |
|
||||||
container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
||||||
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||||
max by(namespace, pod, node) (kube_pod_info)
|
max by(namespace, pod, node) (kube_pod_info{node!=""})
|
||||||
)
|
)
|
||||||
record: node_namespace_pod_container:container_memory_cache
|
record: node_namespace_pod_container:container_memory_cache
|
||||||
- expr: |
|
- expr: |
|
||||||
container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
||||||
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||||
max by(namespace, pod, node) (kube_pod_info)
|
max by(namespace, pod, node) (kube_pod_info{node!=""})
|
||||||
)
|
)
|
||||||
record: node_namespace_pod_container:container_memory_swap
|
record: node_namespace_pod_container:container_memory_swap
|
||||||
- expr: |
|
- expr: |
|
||||||
@@ -591,12 +663,12 @@ spec:
|
|||||||
- name: node.rules
|
- name: node.rules
|
||||||
rules:
|
rules:
|
||||||
- expr: |
|
- expr: |
|
||||||
sum(min(kube_pod_info) by (cluster, node))
|
sum(min(kube_pod_info{node!=""}) by (cluster, node))
|
||||||
record: ':kube_pod_info_node_count:'
|
record: ':kube_pod_info_node_count:'
|
||||||
- expr: |
|
- expr: |
|
||||||
topk by(namespace, pod) (1,
|
topk by(namespace, pod) (1,
|
||||||
max by (node, namespace, pod) (
|
max by (node, namespace, pod) (
|
||||||
label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")
|
label_replace(kube_pod_info{job="kube-state-metrics",node!=""}, "pod", "$1", "pod", "(.*)")
|
||||||
))
|
))
|
||||||
record: 'node_namespace_pod:kube_pod_info:'
|
record: 'node_namespace_pod:kube_pod_info:'
|
||||||
- expr: |
|
- expr: |
|
||||||
@@ -849,13 +921,22 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: NodeHighNumberConntrackEntriesUsed
|
- alert: NodeHighNumberConntrackEntriesUsed
|
||||||
annotations:
|
annotations:
|
||||||
description: '{{ $value | humanizePercentage }} of conntrack entries are used'
|
description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
|
||||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodehighnumberconntrackentriesused
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodehighnumberconntrackentriesused
|
||||||
summary: Number of conntrack are getting close to the limit
|
summary: Number of conntrack are getting close to the limit.
|
||||||
expr: |
|
expr: |
|
||||||
(node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
|
(node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
|
- alert: NodeTextFileCollectorScrapeError
|
||||||
|
annotations:
|
||||||
|
description: Node Exporter text file collector failed to scrape.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodetextfilecollectorscrapeerror
|
||||||
|
summary: Node Exporter text file collector failed to scrape.
|
||||||
|
expr: |
|
||||||
|
node_textfile_scrape_error{job="node-exporter"} == 1
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
- alert: NodeClockSkewDetected
|
- alert: NodeClockSkewDetected
|
||||||
annotations:
|
annotations:
|
||||||
message: Clock on {{ $labels.instance }} is out of sync by more than 300s.
|
message: Clock on {{ $labels.instance }} is out of sync by more than 300s.
|
||||||
@@ -896,20 +977,26 @@ spec:
|
|||||||
}}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
|
}}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
|
||||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
|
||||||
expr: |
|
expr: |
|
||||||
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[15m]) * 60 * 5 > 0
|
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[5m]) * 60 * 5 > 0
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: warning
|
||||||
- alert: KubePodNotReady
|
- alert: KubePodNotReady
|
||||||
annotations:
|
annotations:
|
||||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
|
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
|
||||||
state for longer than 15 minutes.
|
state for longer than 15 minutes.
|
||||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
|
||||||
expr: |
|
expr: |
|
||||||
sum by (namespace, pod) (max by(namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}) * on(namespace, pod) group_left(owner_kind) max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!="Job"})) > 0
|
sum by (namespace, pod) (
|
||||||
|
max by(namespace, pod) (
|
||||||
|
kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}
|
||||||
|
) * on(namespace, pod) group_left(owner_kind) topk by(namespace, pod) (
|
||||||
|
1, max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!="Job"})
|
||||||
|
)
|
||||||
|
) > 0
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: warning
|
||||||
- alert: KubeDeploymentGenerationMismatch
|
- alert: KubeDeploymentGenerationMismatch
|
||||||
annotations:
|
annotations:
|
||||||
message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
|
message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
|
||||||
@@ -922,7 +1009,7 @@ spec:
|
|||||||
kube_deployment_metadata_generation{job="kube-state-metrics"}
|
kube_deployment_metadata_generation{job="kube-state-metrics"}
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: warning
|
||||||
- alert: KubeDeploymentReplicasMismatch
|
- alert: KubeDeploymentReplicasMismatch
|
||||||
annotations:
|
annotations:
|
||||||
message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not
|
message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not
|
||||||
@@ -940,7 +1027,7 @@ spec:
|
|||||||
)
|
)
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: warning
|
||||||
- alert: KubeStatefulSetReplicasMismatch
|
- alert: KubeStatefulSetReplicasMismatch
|
||||||
annotations:
|
annotations:
|
||||||
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has
|
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has
|
||||||
@@ -958,7 +1045,7 @@ spec:
|
|||||||
)
|
)
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: warning
|
||||||
- alert: KubeStatefulSetGenerationMismatch
|
- alert: KubeStatefulSetGenerationMismatch
|
||||||
annotations:
|
annotations:
|
||||||
message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
|
message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
|
||||||
@@ -971,13 +1058,14 @@ spec:
|
|||||||
kube_statefulset_metadata_generation{job="kube-state-metrics"}
|
kube_statefulset_metadata_generation{job="kube-state-metrics"}
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: warning
|
||||||
- alert: KubeStatefulSetUpdateNotRolledOut
|
- alert: KubeStatefulSetUpdateNotRolledOut
|
||||||
annotations:
|
annotations:
|
||||||
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
|
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
|
||||||
has not been rolled out.
|
has not been rolled out.
|
||||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout
|
||||||
expr: |
|
expr: |
|
||||||
|
(
|
||||||
max without (revision) (
|
max without (revision) (
|
||||||
kube_statefulset_status_current_revision{job="kube-state-metrics"}
|
kube_statefulset_status_current_revision{job="kube-state-metrics"}
|
||||||
unless
|
unless
|
||||||
@@ -989,9 +1077,14 @@ spec:
|
|||||||
!=
|
!=
|
||||||
kube_statefulset_status_replicas_updated{job="kube-state-metrics"}
|
kube_statefulset_status_replicas_updated{job="kube-state-metrics"}
|
||||||
)
|
)
|
||||||
|
) and (
|
||||||
|
changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics"}[5m])
|
||||||
|
==
|
||||||
|
0
|
||||||
|
)
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: warning
|
||||||
- alert: KubeDaemonSetRolloutStuck
|
- alert: KubeDaemonSetRolloutStuck
|
||||||
annotations:
|
annotations:
|
||||||
message: Only {{ $value | humanizePercentage }} of the desired Pods of DaemonSet
|
message: Only {{ $value | humanizePercentage }} of the desired Pods of DaemonSet
|
||||||
@@ -1003,7 +1096,7 @@ spec:
|
|||||||
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} < 1.00
|
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} < 1.00
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: warning
|
||||||
- alert: KubeContainerWaiting
|
- alert: KubeContainerWaiting
|
||||||
annotations:
|
annotations:
|
||||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}}
|
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}}
|
||||||
@@ -1254,7 +1347,9 @@ spec:
|
|||||||
sum(apiserver_request:burnrate5m) > (14.40 * 0.01000)
|
sum(apiserver_request:burnrate5m) > (14.40 * 0.01000)
|
||||||
for: 2m
|
for: 2m
|
||||||
labels:
|
labels:
|
||||||
|
long: 1h
|
||||||
severity: critical
|
severity: critical
|
||||||
|
short: 5m
|
||||||
- alert: KubeAPIErrorBudgetBurn
|
- alert: KubeAPIErrorBudgetBurn
|
||||||
annotations:
|
annotations:
|
||||||
message: The API server is burning too much error budget
|
message: The API server is burning too much error budget
|
||||||
@@ -1265,7 +1360,9 @@ spec:
|
|||||||
sum(apiserver_request:burnrate30m) > (6.00 * 0.01000)
|
sum(apiserver_request:burnrate30m) > (6.00 * 0.01000)
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
|
long: 6h
|
||||||
severity: critical
|
severity: critical
|
||||||
|
short: 30m
|
||||||
- alert: KubeAPIErrorBudgetBurn
|
- alert: KubeAPIErrorBudgetBurn
|
||||||
annotations:
|
annotations:
|
||||||
message: The API server is burning too much error budget
|
message: The API server is burning too much error budget
|
||||||
@@ -1276,7 +1373,9 @@ spec:
|
|||||||
sum(apiserver_request:burnrate2h) > (3.00 * 0.01000)
|
sum(apiserver_request:burnrate2h) > (3.00 * 0.01000)
|
||||||
for: 1h
|
for: 1h
|
||||||
labels:
|
labels:
|
||||||
|
long: 1d
|
||||||
severity: warning
|
severity: warning
|
||||||
|
short: 2h
|
||||||
- alert: KubeAPIErrorBudgetBurn
|
- alert: KubeAPIErrorBudgetBurn
|
||||||
annotations:
|
annotations:
|
||||||
message: The API server is burning too much error budget
|
message: The API server is burning too much error budget
|
||||||
@@ -1287,7 +1386,9 @@ spec:
|
|||||||
sum(apiserver_request:burnrate6h) > (1.00 * 0.01000)
|
sum(apiserver_request:burnrate6h) > (1.00 * 0.01000)
|
||||||
for: 3h
|
for: 3h
|
||||||
labels:
|
labels:
|
||||||
|
long: 3d
|
||||||
severity: warning
|
severity: warning
|
||||||
|
short: 6h
|
||||||
- name: kubernetes-system-apiserver
|
- name: kubernetes-system-apiserver
|
||||||
rules:
|
rules:
|
||||||
- alert: KubeAPILatencyHigh
|
- alert: KubeAPILatencyHigh
|
||||||
@@ -1296,6 +1397,10 @@ spec:
|
|||||||
{{ $labels.verb }} {{ $labels.resource }}.
|
{{ $labels.verb }} {{ $labels.resource }}.
|
||||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
|
||||||
expr: |
|
expr: |
|
||||||
|
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"}
|
||||||
|
>
|
||||||
|
1
|
||||||
|
and on (verb,resource)
|
||||||
(
|
(
|
||||||
cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"}
|
cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"}
|
||||||
>
|
>
|
||||||
@@ -1307,10 +1412,6 @@ spec:
|
|||||||
)
|
)
|
||||||
) > on (verb) group_left()
|
) > on (verb) group_left()
|
||||||
1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
|
1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
|
||||||
and on (verb,resource)
|
|
||||||
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"}
|
|
||||||
>
|
|
||||||
1
|
|
||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
@@ -1391,8 +1492,7 @@ spec:
|
|||||||
message: '{{ $labels.node }} is unreachable and some workloads may be rescheduled.'
|
message: '{{ $labels.node }} is unreachable and some workloads may be rescheduled.'
|
||||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable
|
||||||
expr: |
|
expr: |
|
||||||
kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} == 1
|
(kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="kube-state-metrics",key="ToBeDeletedByClusterAutoscaler"}) == 1
|
||||||
for: 2m
|
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeletTooManyPods
|
- alert: KubeletTooManyPods
|
||||||
|
|||||||
@@ -2177,6 +2177,15 @@ spec:
|
|||||||
of origin for each alert and metric that is user created. The label
|
of origin for each alert and metric that is user created. The label
|
||||||
value will always be the namespace of the object that is being created.
|
value will always be the namespace of the object that is being created.
|
||||||
type: string
|
type: string
|
||||||
|
enforcedSampleLimit:
|
||||||
|
description: EnforcedSampleLimit defines global limit on number of
|
||||||
|
scraped samples that will be accepted. This overrides any SampleLimit
|
||||||
|
set per ServiceMonitor or/and PodMonitor. It is meant to be used
|
||||||
|
by admins to enforce the SampleLimit to keep overall number of samples/series
|
||||||
|
under the desired limit. Note that if SampleLimit is lower that
|
||||||
|
value will be taken instead.
|
||||||
|
format: int64
|
||||||
|
type: integer
|
||||||
evaluationInterval:
|
evaluationInterval:
|
||||||
description: Interval between consecutive evaluations.
|
description: Interval between consecutive evaluations.
|
||||||
type: string
|
type: string
|
||||||
@@ -3428,6 +3437,27 @@ spec:
|
|||||||
instance name. Defaults to the value of `prometheus`. External label
|
instance name. Defaults to the value of `prometheus`. External label
|
||||||
will _not_ be added when value is set to empty string (`""`).
|
will _not_ be added when value is set to empty string (`""`).
|
||||||
type: string
|
type: string
|
||||||
|
prometheusRulesExcludedFromEnforce:
|
||||||
|
description: PrometheusRulesExcludedFromEnforce - list of prometheus
|
||||||
|
rules to be excluded from enforcing of adding namespace labels.
|
||||||
|
Works only if enforcedNamespaceLabel set to true. Make sure both
|
||||||
|
ruleNamespace and ruleName are set for each pair
|
||||||
|
items:
|
||||||
|
description: PrometheusRuleExcludeConfig enables users to configure
|
||||||
|
excluded PrometheusRule names and their namespaces to be ignored
|
||||||
|
while enforcing namespace label for alerts and metrics.
|
||||||
|
properties:
|
||||||
|
ruleName:
|
||||||
|
description: RuleNamespace - name of excluded rule
|
||||||
|
type: string
|
||||||
|
ruleNamespace:
|
||||||
|
description: RuleNamespace - namespace of excluded rule
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- ruleName
|
||||||
|
- ruleNamespace
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
query:
|
query:
|
||||||
description: QuerySpec defines the query command line flags when starting
|
description: QuerySpec defines the query command line flags when starting
|
||||||
Prometheus.
|
Prometheus.
|
||||||
@@ -4114,6 +4144,10 @@ spec:
|
|||||||
scrapeInterval:
|
scrapeInterval:
|
||||||
description: Interval between consecutive scrapes.
|
description: Interval between consecutive scrapes.
|
||||||
type: string
|
type: string
|
||||||
|
scrapeTimeout:
|
||||||
|
description: Number of seconds to wait for target to respond before
|
||||||
|
erroring.
|
||||||
|
type: string
|
||||||
secrets:
|
secrets:
|
||||||
description: Secrets is a list of Secrets in the same namespace as
|
description: Secrets is a list of Secrets in the same namespace as
|
||||||
the Prometheus object, which shall be mounted into the Prometheus
|
the Prometheus object, which shall be mounted into the Prometheus
|
||||||
@@ -4762,6 +4796,12 @@ spec:
|
|||||||
logLevel:
|
logLevel:
|
||||||
description: LogLevel for Thanos sidecar to be configured with.
|
description: LogLevel for Thanos sidecar to be configured with.
|
||||||
type: string
|
type: string
|
||||||
|
minTime:
|
||||||
|
description: MinTime for Thanos sidecar to be configured with.
|
||||||
|
Option can be a constant time in RFC3339 format or time duration
|
||||||
|
relative to current time, such as -1d or 2h45m. Valid duration
|
||||||
|
units are ms, s, m, h, d, w, y.
|
||||||
|
type: string
|
||||||
objectStorageConfig:
|
objectStorageConfig:
|
||||||
description: ObjectStorageConfig configures object storage in
|
description: ObjectStorageConfig configures object storage in
|
||||||
Thanos.
|
Thanos.
|
||||||
|
|||||||
@@ -2998,6 +2998,27 @@ spec:
|
|||||||
priorityClassName:
|
priorityClassName:
|
||||||
description: Priority class assigned to the Pods
|
description: Priority class assigned to the Pods
|
||||||
type: string
|
type: string
|
||||||
|
prometheusRulesExcludedFromEnforce:
|
||||||
|
description: PrometheusRulesExcludedFromEnforce - list of Prometheus
|
||||||
|
rules to be excluded from enforcing of adding namespace labels.
|
||||||
|
Works only if enforcedNamespaceLabel set to true. Make sure both
|
||||||
|
ruleNamespace and ruleName are set for each pair
|
||||||
|
items:
|
||||||
|
description: PrometheusRuleExcludeConfig enables users to configure
|
||||||
|
excluded PrometheusRule names and their namespaces to be ignored
|
||||||
|
while enforcing namespace label for alerts and metrics.
|
||||||
|
properties:
|
||||||
|
ruleName:
|
||||||
|
description: RuleNamespace - name of excluded rule
|
||||||
|
type: string
|
||||||
|
ruleNamespace:
|
||||||
|
description: RuleNamespace - namespace of excluded rule
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- ruleName
|
||||||
|
- ruleNamespace
|
||||||
|
type: object
|
||||||
|
type: array
|
||||||
queryConfig:
|
queryConfig:
|
||||||
description: Define configuration for connecting to thanos query instances.
|
description: Define configuration for connecting to thanos query instances.
|
||||||
If this is defined, the QueryEndpoints field will be ignored. Maps
|
If this is defined, the QueryEndpoints field will be ignored. Maps
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ metadata:
|
|||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.39.0
|
app.kubernetes.io/version: v0.40.0
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
rules:
|
rules:
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ metadata:
|
|||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.39.0
|
app.kubernetes.io/version: v0.40.0
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
roleRef:
|
roleRef:
|
||||||
apiGroup: rbac.authorization.k8s.io
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ metadata:
|
|||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.39.0
|
app.kubernetes.io/version: v0.40.0
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
@@ -18,15 +18,15 @@ spec:
|
|||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.39.0
|
app.kubernetes.io/version: v0.40.0
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- args:
|
- args:
|
||||||
- --kubelet-service=kube-system/kubelet
|
- --kubelet-service=kube-system/kubelet
|
||||||
- --logtostderr=true
|
- --logtostderr=true
|
||||||
- --config-reloader-image=jimmidyson/configmap-reload:v0.3.0
|
- --config-reloader-image=jimmidyson/configmap-reload:v0.3.0
|
||||||
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.39.0
|
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.40.0
|
||||||
image: quay.io/coreos/prometheus-operator:v0.39.0
|
image: quay.io/coreos/prometheus-operator:v0.40.0
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8080
|
- containerPort: 8080
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ metadata:
|
|||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.39.0
|
app.kubernetes.io/version: v0.40.0
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
|
|||||||
@@ -4,6 +4,6 @@ metadata:
|
|||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.39.0
|
app.kubernetes.io/version: v0.40.0
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
|
|||||||
Reference in New Issue
Block a user