Merge branch 'master' into make-folder
This commit is contained in:
@@ -1,73 +0,0 @@
|
|||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: Prometheus
|
|
||||||
metadata:
|
|
||||||
name: self
|
|
||||||
labels:
|
|
||||||
prometheus: self
|
|
||||||
spec:
|
|
||||||
podMetadata:
|
|
||||||
labels:
|
|
||||||
thanos-peer: 'true'
|
|
||||||
replicas: 2
|
|
||||||
version: v2.2.1
|
|
||||||
serviceAccountName: prometheus-k8s
|
|
||||||
serviceMonitorSelector:
|
|
||||||
matchLabels:
|
|
||||||
app: prometheus
|
|
||||||
ruleSelector:
|
|
||||||
matchLabels:
|
|
||||||
role: prometheus-rulefiles
|
|
||||||
prometheus: k8s
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
# 2Gi is default, but won't schedule if you don't have a node with >2Gi
|
|
||||||
# memory. Modify based on your target and time-series count for
|
|
||||||
# production use. This value is mainly meant for demonstration/testing
|
|
||||||
# purposes.
|
|
||||||
memory: 400Mi
|
|
||||||
containers:
|
|
||||||
- name: thanos
|
|
||||||
image: improbable/thanos:latest
|
|
||||||
args:
|
|
||||||
- "sidecar"
|
|
||||||
- "--log.level=debug"
|
|
||||||
- "--cluster.peers=thanos-peers.default.svc:10900"
|
|
||||||
ports:
|
|
||||||
- name: http
|
|
||||||
containerPort: 10902
|
|
||||||
- name: grpc
|
|
||||||
containerPort: 10901
|
|
||||||
- name: cluster
|
|
||||||
containerPort: 10900
|
|
||||||
---
|
|
||||||
apiVersion: monitoring.coreos.com/v1
|
|
||||||
kind: ServiceMonitor
|
|
||||||
metadata:
|
|
||||||
name: prometheus
|
|
||||||
labels:
|
|
||||||
app: prometheus
|
|
||||||
spec:
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: prometheus
|
|
||||||
endpoints:
|
|
||||||
- port: web
|
|
||||||
interval: 30s
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: prometheus
|
|
||||||
prometheus: self
|
|
||||||
name: prometheus-self
|
|
||||||
spec:
|
|
||||||
type: NodePort
|
|
||||||
ports:
|
|
||||||
- name: web
|
|
||||||
nodePort: 30900
|
|
||||||
port: 9090
|
|
||||||
protocol: TCP
|
|
||||||
targetPort: web
|
|
||||||
selector:
|
|
||||||
prometheus: self
|
|
@@ -1,51 +0,0 @@
|
|||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: thanos-query
|
|
||||||
labels:
|
|
||||||
app: thanos-query
|
|
||||||
thanos-peer: "true"
|
|
||||||
spec:
|
|
||||||
replicas: 2
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: thanos-query
|
|
||||||
thanos-peer: "true"
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: thanos-query
|
|
||||||
thanos-peer: "true"
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: thanos-query
|
|
||||||
image: improbable/thanos:latest
|
|
||||||
args:
|
|
||||||
- "query"
|
|
||||||
- "--log.level=debug"
|
|
||||||
- "--query.replica-label=prometheus_replica"
|
|
||||||
- "--cluster.peers=thanos-peers.default.svc:10900"
|
|
||||||
ports:
|
|
||||||
- name: http
|
|
||||||
containerPort: 10902
|
|
||||||
- name: grpc
|
|
||||||
containerPort: 10901
|
|
||||||
- name: cluster
|
|
||||||
containerPort: 10900
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: thanos-query
|
|
||||||
name: thanos-query
|
|
||||||
spec:
|
|
||||||
type: NodePort
|
|
||||||
selector:
|
|
||||||
app: thanos-query
|
|
||||||
ports:
|
|
||||||
- port: 9090
|
|
||||||
protocol: TCP
|
|
||||||
targetPort: http
|
|
||||||
name: http-query
|
|
||||||
nodePort: 31111
|
|
@@ -1,14 +0,0 @@
|
|||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: thanos-peers
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
clusterIP: None
|
|
||||||
ports:
|
|
||||||
- name: cluster
|
|
||||||
port: 10900
|
|
||||||
targetPort: cluster
|
|
||||||
selector:
|
|
||||||
# Useful endpoint for gathering all thanos components for common gossip cluster.
|
|
||||||
thanos-peer: "true"
|
|
@@ -5,7 +5,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
|||||||
namespace: 'default',
|
namespace: 'default',
|
||||||
|
|
||||||
versions+:: {
|
versions+:: {
|
||||||
alertmanager: 'v0.14.0',
|
alertmanager: 'v0.15.0',
|
||||||
},
|
},
|
||||||
|
|
||||||
imageRepos+:: {
|
imageRepos+:: {
|
||||||
|
@@ -5,8 +5,8 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
|||||||
namespace: 'default',
|
namespace: 'default',
|
||||||
|
|
||||||
versions+:: {
|
versions+:: {
|
||||||
kubeStateMetrics: 'v1.3.0',
|
kubeStateMetrics: 'v1.3.1',
|
||||||
kubeRbacProxy: 'v0.3.0',
|
kubeRbacProxy: 'v0.3.1',
|
||||||
addonResizer: '1.0',
|
addonResizer: '1.0',
|
||||||
},
|
},
|
||||||
|
|
||||||
|
@@ -6,7 +6,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
|||||||
|
|
||||||
versions+:: {
|
versions+:: {
|
||||||
nodeExporter: 'v0.15.2',
|
nodeExporter: 'v0.15.2',
|
||||||
kubeRbacProxy: 'v0.3.0',
|
kubeRbacProxy: 'v0.3.1',
|
||||||
},
|
},
|
||||||
|
|
||||||
imageRepos+:: {
|
imageRepos+:: {
|
||||||
|
@@ -5,7 +5,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
|||||||
namespace: 'default',
|
namespace: 'default',
|
||||||
|
|
||||||
versions+:: {
|
versions+:: {
|
||||||
prometheus: 'v2.2.1',
|
prometheus: 'v2.3.1',
|
||||||
},
|
},
|
||||||
|
|
||||||
imageRepos+:: {
|
imageRepos+:: {
|
||||||
|
@@ -2672,6 +2672,77 @@ spec:
|
|||||||
phase:
|
phase:
|
||||||
description: Phase represents the current phase of PersistentVolumeClaim.
|
description: Phase represents the current phase of PersistentVolumeClaim.
|
||||||
type: string
|
type: string
|
||||||
|
thanos:
|
||||||
|
description: ThanosSpec defines parameters for a Prometheus server within
|
||||||
|
a Thanos deployment.
|
||||||
|
properties:
|
||||||
|
baseImage:
|
||||||
|
description: Thanos base image if other than default.
|
||||||
|
type: string
|
||||||
|
gcs:
|
||||||
|
description: ThanosGCSSpec defines parameters for use of Google
|
||||||
|
Cloud Storage (GCS) with Thanos.
|
||||||
|
properties:
|
||||||
|
bucket:
|
||||||
|
description: Google Cloud Storage bucket name for stored blocks.
|
||||||
|
If empty it won't store any block inside Google Cloud Storage.
|
||||||
|
type: string
|
||||||
|
peers:
|
||||||
|
description: Peers is a DNS name for Thanos to discover peers through.
|
||||||
|
type: string
|
||||||
|
s3:
|
||||||
|
description: ThanosSpec defines parameters for of AWS Simple Storage
|
||||||
|
Service (S3) with Thanos. (S3 compatible services apply as well)
|
||||||
|
properties:
|
||||||
|
accessKey:
|
||||||
|
description: SecretKeySelector selects a key of a Secret.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
description: The key of the secret to select from. Must
|
||||||
|
be a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
description: Specify whether the Secret or it's key must
|
||||||
|
be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
bucket:
|
||||||
|
description: S3-Compatible API bucket name for stored blocks.
|
||||||
|
type: string
|
||||||
|
endpoint:
|
||||||
|
description: S3-Compatible API endpoint for stored blocks.
|
||||||
|
type: string
|
||||||
|
insecure:
|
||||||
|
description: Whether to use an insecure connection with an S3-Compatible
|
||||||
|
API.
|
||||||
|
type: boolean
|
||||||
|
secretKey:
|
||||||
|
description: SecretKeySelector selects a key of a Secret.
|
||||||
|
properties:
|
||||||
|
key:
|
||||||
|
description: The key of the secret to select from. Must
|
||||||
|
be a valid secret key.
|
||||||
|
type: string
|
||||||
|
name:
|
||||||
|
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
|
||||||
|
type: string
|
||||||
|
optional:
|
||||||
|
description: Specify whether the Secret or it's key must
|
||||||
|
be defined
|
||||||
|
type: boolean
|
||||||
|
required:
|
||||||
|
- key
|
||||||
|
signatureVersion2:
|
||||||
|
description: Whether to use S3 Signature Version 2; otherwise
|
||||||
|
Signature Version 4 will be used.
|
||||||
|
type: boolean
|
||||||
|
version:
|
||||||
|
description: Version describes the version of Thanos to use.
|
||||||
|
type: string
|
||||||
tolerations:
|
tolerations:
|
||||||
description: If specified, the pod's tolerations.
|
description: If specified, the pod's tolerations.
|
||||||
items:
|
items:
|
||||||
|
@@ -11,4 +11,4 @@ spec:
|
|||||||
beta.kubernetes.io/os: linux
|
beta.kubernetes.io/os: linux
|
||||||
replicas: 3
|
replicas: 3
|
||||||
serviceAccountName: alertmanager-main
|
serviceAccountName: alertmanager-main
|
||||||
version: v0.14.0
|
version: v0.15.0
|
||||||
|
@@ -64,7 +64,7 @@ items:
|
|||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{node}}",
|
"legendFormat": "{{node}}",
|
||||||
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
|
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
|
||||||
"step": 10
|
"step": 10
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -150,7 +150,7 @@ items:
|
|||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{node}}",
|
"legendFormat": "{{node}}",
|
||||||
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
|
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
|
||||||
"step": 10
|
"step": 10
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -248,7 +248,7 @@ items:
|
|||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{node}}",
|
"legendFormat": "{{node}}",
|
||||||
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
|
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
|
||||||
"step": 10
|
"step": 10
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -334,7 +334,7 @@ items:
|
|||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{node}}",
|
"legendFormat": "{{node}}",
|
||||||
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
|
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
|
||||||
"step": 10
|
"step": 10
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -432,7 +432,7 @@ items:
|
|||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{node}}",
|
"legendFormat": "{{node}}",
|
||||||
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
|
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
|
||||||
"step": 10
|
"step": 10
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -518,7 +518,7 @@ items:
|
|||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{node}}",
|
"legendFormat": "{{node}}",
|
||||||
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
|
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
|
||||||
"step": 10
|
"step": 10
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -616,7 +616,7 @@ items:
|
|||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{node}}",
|
"legendFormat": "{{node}}",
|
||||||
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
|
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
|
||||||
"step": 10
|
"step": 10
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -702,7 +702,7 @@ items:
|
|||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{node}}",
|
"legendFormat": "{{node}}",
|
||||||
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
|
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
|
||||||
"step": 10
|
"step": 10
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -800,7 +800,7 @@ items:
|
|||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": "{{node}}",
|
"legendFormat": "{{node}}",
|
||||||
"legendLink": "/dashboard/file/k8s-node-rsrc-use.json",
|
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
|
||||||
"step": 10
|
"step": 10
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -909,6 +909,7 @@ items:
|
|||||||
},
|
},
|
||||||
"timezone": "utc",
|
"timezone": "utc",
|
||||||
"title": "K8s / USE Method / Cluster",
|
"title": "K8s / USE Method / Cluster",
|
||||||
|
"uid": "a6e7d1362e1ddbb79db21d5bb40d7137",
|
||||||
"version": 0
|
"version": 0
|
||||||
}
|
}
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
@@ -1851,6 +1852,7 @@ items:
|
|||||||
},
|
},
|
||||||
"timezone": "utc",
|
"timezone": "utc",
|
||||||
"title": "K8s / USE Method / Node",
|
"title": "K8s / USE Method / Node",
|
||||||
|
"uid": "4ac4f123aae0ff6dbaf4f4f66120033b",
|
||||||
"version": 0
|
"version": 0
|
||||||
}
|
}
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
@@ -2468,7 +2470,7 @@ items:
|
|||||||
"decimals": 2,
|
"decimals": 2,
|
||||||
"link": true,
|
"link": true,
|
||||||
"linkTooltip": "Drill down",
|
"linkTooltip": "Drill down",
|
||||||
"linkUrl": "/dashboard/file/k8s-resources-namespace.json?var-datasource=$datasource&var-namespace=$__cell",
|
"linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-namespace=$__cell",
|
||||||
"pattern": "namespace",
|
"pattern": "namespace",
|
||||||
"thresholds": [
|
"thresholds": [
|
||||||
|
|
||||||
@@ -2828,7 +2830,7 @@ items:
|
|||||||
"decimals": 2,
|
"decimals": 2,
|
||||||
"link": true,
|
"link": true,
|
||||||
"linkTooltip": "Drill down",
|
"linkTooltip": "Drill down",
|
||||||
"linkUrl": "/dashboard/file/k8s-resources-namespace.json?var-datasource=$datasource&var-namespace=$__cell",
|
"linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-namespace=$__cell",
|
||||||
"pattern": "namespace",
|
"pattern": "namespace",
|
||||||
"thresholds": [
|
"thresholds": [
|
||||||
|
|
||||||
@@ -3000,6 +3002,7 @@ items:
|
|||||||
},
|
},
|
||||||
"timezone": "utc",
|
"timezone": "utc",
|
||||||
"title": "K8s / Compute Resources / Cluster",
|
"title": "K8s / Compute Resources / Cluster",
|
||||||
|
"uid": "efa86fd1d0c121a26444b636a3f509a8",
|
||||||
"version": 0
|
"version": 0
|
||||||
}
|
}
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
@@ -3269,7 +3272,7 @@ items:
|
|||||||
"decimals": 2,
|
"decimals": 2,
|
||||||
"link": true,
|
"link": true,
|
||||||
"linkTooltip": "Drill down",
|
"linkTooltip": "Drill down",
|
||||||
"linkUrl": "/dashboard/file/k8s-resources-pod.json?var-datasource=$datasource&var-namespace=$namespace&var-pod=$__cell",
|
"linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-namespace=$namespace&var-pod=$__cell",
|
||||||
"pattern": "pod",
|
"pattern": "pod",
|
||||||
"thresholds": [
|
"thresholds": [
|
||||||
|
|
||||||
@@ -3629,7 +3632,7 @@ items:
|
|||||||
"decimals": 2,
|
"decimals": 2,
|
||||||
"link": true,
|
"link": true,
|
||||||
"linkTooltip": "Drill down",
|
"linkTooltip": "Drill down",
|
||||||
"linkUrl": "/dashboard/file/k8s-resources-pod.json?var-datasource=$datasource&var-namespace=$namespace&var-pod=$__cell",
|
"linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-namespace=$namespace&var-pod=$__cell",
|
||||||
"pattern": "pod",
|
"pattern": "pod",
|
||||||
"thresholds": [
|
"thresholds": [
|
||||||
|
|
||||||
@@ -3828,6 +3831,7 @@ items:
|
|||||||
},
|
},
|
||||||
"timezone": "utc",
|
"timezone": "utc",
|
||||||
"title": "K8s / Compute Resources / Namespace",
|
"title": "K8s / Compute Resources / Namespace",
|
||||||
|
"uid": "85a562078cdf77779eaa1add43ccec1e",
|
||||||
"version": 0
|
"version": 0
|
||||||
}
|
}
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
@@ -4683,6 +4687,7 @@ items:
|
|||||||
},
|
},
|
||||||
"timezone": "utc",
|
"timezone": "utc",
|
||||||
"title": "K8s / Compute Resources / Pod",
|
"title": "K8s / Compute Resources / Pod",
|
||||||
|
"uid": "6581e46e4e5c7ba40a07646395ef7b23",
|
||||||
"version": 0
|
"version": 0
|
||||||
}
|
}
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
@@ -5609,6 +5614,7 @@ items:
|
|||||||
},
|
},
|
||||||
"timezone": "browser",
|
"timezone": "browser",
|
||||||
"title": "Nodes",
|
"title": "Nodes",
|
||||||
|
"uid": "fa49a4706d07a042595b664c87fb33ea",
|
||||||
"version": 0
|
"version": 0
|
||||||
}
|
}
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
@@ -6098,6 +6104,7 @@ items:
|
|||||||
},
|
},
|
||||||
"timezone": "browser",
|
"timezone": "browser",
|
||||||
"title": "Pods",
|
"title": "Pods",
|
||||||
|
"uid": "ab4f13a9892a76a4d21ce8c2445bf4ea",
|
||||||
"version": 0
|
"version": 0
|
||||||
}
|
}
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
@@ -6950,6 +6957,7 @@ items:
|
|||||||
},
|
},
|
||||||
"timezone": "browser",
|
"timezone": "browser",
|
||||||
"title": "StatefulSets",
|
"title": "StatefulSets",
|
||||||
|
"uid": "a31c1f46e6f727cb37c0d731a7245005",
|
||||||
"version": 0
|
"version": 0
|
||||||
}
|
}
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
@@ -19,7 +19,7 @@ spec:
|
|||||||
- args:
|
- args:
|
||||||
- --secure-listen-address=:8443
|
- --secure-listen-address=:8443
|
||||||
- --upstream=http://127.0.0.1:8081/
|
- --upstream=http://127.0.0.1:8081/
|
||||||
image: quay.io/coreos/kube-rbac-proxy:v0.3.0
|
image: quay.io/coreos/kube-rbac-proxy:v0.3.1
|
||||||
name: kube-rbac-proxy-main
|
name: kube-rbac-proxy-main
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8443
|
- containerPort: 8443
|
||||||
@@ -34,7 +34,7 @@ spec:
|
|||||||
- args:
|
- args:
|
||||||
- --secure-listen-address=:9443
|
- --secure-listen-address=:9443
|
||||||
- --upstream=http://127.0.0.1:8082/
|
- --upstream=http://127.0.0.1:8082/
|
||||||
image: quay.io/coreos/kube-rbac-proxy:v0.3.0
|
image: quay.io/coreos/kube-rbac-proxy:v0.3.1
|
||||||
name: kube-rbac-proxy-self
|
name: kube-rbac-proxy-self
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 9443
|
- containerPort: 9443
|
||||||
@@ -51,7 +51,7 @@ spec:
|
|||||||
- --port=8081
|
- --port=8081
|
||||||
- --telemetry-host=127.0.0.1
|
- --telemetry-host=127.0.0.1
|
||||||
- --telemetry-port=8082
|
- --telemetry-port=8082
|
||||||
image: quay.io/coreos/kube-state-metrics:v1.3.0
|
image: quay.io/coreos/kube-state-metrics:v1.3.1
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
|
@@ -38,7 +38,7 @@ spec:
|
|||||||
- args:
|
- args:
|
||||||
- --secure-listen-address=:9100
|
- --secure-listen-address=:9100
|
||||||
- --upstream=http://127.0.0.1:9101/
|
- --upstream=http://127.0.0.1:9101/
|
||||||
image: quay.io/coreos/kube-rbac-proxy:v0.3.0
|
image: quay.io/coreos/kube-rbac-proxy:v0.3.1
|
||||||
name: kube-rbac-proxy
|
name: kube-rbac-proxy
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 9100
|
- containerPort: 9100
|
||||||
|
@@ -27,4 +27,4 @@ spec:
|
|||||||
matchExpressions:
|
matchExpressions:
|
||||||
- key: k8s-app
|
- key: k8s-app
|
||||||
operator: Exists
|
operator: Exists
|
||||||
version: v2.2.1
|
version: v2.3.1
|
||||||
|
@@ -202,21 +202,21 @@ spec:
|
|||||||
)
|
)
|
||||||
record: node:node_memory_swap_io_bytes:sum_rate
|
record: node:node_memory_swap_io_bytes:sum_rate
|
||||||
- expr: |
|
- expr: |
|
||||||
avg(irate(node_disk_io_time_ms{job="node-exporter",device=~"(sd|xvd).+"}[1m]) / 1e3)
|
avg(irate(node_disk_io_time_ms{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3)
|
||||||
record: :node_disk_utilisation:avg_irate
|
record: :node_disk_utilisation:avg_irate
|
||||||
- expr: |
|
- expr: |
|
||||||
avg by (node) (
|
avg by (node) (
|
||||||
irate(node_disk_io_time_ms{job="node-exporter",device=~"(sd|xvd).+"}[1m]) / 1e3
|
irate(node_disk_io_time_ms{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3
|
||||||
* on (namespace, pod) group_left(node)
|
* on (namespace, pod) group_left(node)
|
||||||
node_namespace_pod:kube_pod_info:
|
node_namespace_pod:kube_pod_info:
|
||||||
)
|
)
|
||||||
record: node:node_disk_utilisation:avg_irate
|
record: node:node_disk_utilisation:avg_irate
|
||||||
- expr: |
|
- expr: |
|
||||||
avg(irate(node_disk_io_time_weighted{job="node-exporter",device=~"(sd|xvd).+"}[1m]) / 1e3)
|
avg(irate(node_disk_io_time_weighted{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3)
|
||||||
record: :node_disk_saturation:avg_irate
|
record: :node_disk_saturation:avg_irate
|
||||||
- expr: |
|
- expr: |
|
||||||
avg by (node) (
|
avg by (node) (
|
||||||
irate(node_disk_io_time_weighted{job="node-exporter",device=~"(sd|xvd).+"}[1m]) / 1e3
|
irate(node_disk_io_time_weighted{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3
|
||||||
* on (namespace, pod) group_left(node)
|
* on (namespace, pod) group_left(node)
|
||||||
node_namespace_pod:kube_pod_info:
|
node_namespace_pod:kube_pod_info:
|
||||||
)
|
)
|
||||||
@@ -268,6 +268,7 @@ spec:
|
|||||||
- alert: AlertmanagerDown
|
- alert: AlertmanagerDown
|
||||||
annotations:
|
annotations:
|
||||||
message: Alertmanager has disappeared from Prometheus target discovery.
|
message: Alertmanager has disappeared from Prometheus target discovery.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-alertmanagerdown
|
||||||
expr: |
|
expr: |
|
||||||
absent(up{job="alertmanager-main"} == 1)
|
absent(up{job="alertmanager-main"} == 1)
|
||||||
for: 15m
|
for: 15m
|
||||||
@@ -276,6 +277,7 @@ spec:
|
|||||||
- alert: KubeAPIDown
|
- alert: KubeAPIDown
|
||||||
annotations:
|
annotations:
|
||||||
message: KubeAPI has disappeared from Prometheus target discovery.
|
message: KubeAPI has disappeared from Prometheus target discovery.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown
|
||||||
expr: |
|
expr: |
|
||||||
absent(up{job="apiserver"} == 1)
|
absent(up{job="apiserver"} == 1)
|
||||||
for: 15m
|
for: 15m
|
||||||
@@ -284,6 +286,7 @@ spec:
|
|||||||
- alert: KubeControllerManagerDown
|
- alert: KubeControllerManagerDown
|
||||||
annotations:
|
annotations:
|
||||||
message: KubeControllerManager has disappeared from Prometheus target discovery.
|
message: KubeControllerManager has disappeared from Prometheus target discovery.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown
|
||||||
expr: |
|
expr: |
|
||||||
absent(up{job="kube-controller-manager"} == 1)
|
absent(up{job="kube-controller-manager"} == 1)
|
||||||
for: 15m
|
for: 15m
|
||||||
@@ -292,6 +295,7 @@ spec:
|
|||||||
- alert: KubeSchedulerDown
|
- alert: KubeSchedulerDown
|
||||||
annotations:
|
annotations:
|
||||||
message: KubeScheduler has disappeared from Prometheus target discovery.
|
message: KubeScheduler has disappeared from Prometheus target discovery.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown
|
||||||
expr: |
|
expr: |
|
||||||
absent(up{job="kube-scheduler"} == 1)
|
absent(up{job="kube-scheduler"} == 1)
|
||||||
for: 15m
|
for: 15m
|
||||||
@@ -300,6 +304,7 @@ spec:
|
|||||||
- alert: KubeStateMetricsDown
|
- alert: KubeStateMetricsDown
|
||||||
annotations:
|
annotations:
|
||||||
message: KubeStateMetrics has disappeared from Prometheus target discovery.
|
message: KubeStateMetrics has disappeared from Prometheus target discovery.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricsdown
|
||||||
expr: |
|
expr: |
|
||||||
absent(up{job="kube-state-metrics"} == 1)
|
absent(up{job="kube-state-metrics"} == 1)
|
||||||
for: 15m
|
for: 15m
|
||||||
@@ -308,6 +313,7 @@ spec:
|
|||||||
- alert: KubeletDown
|
- alert: KubeletDown
|
||||||
annotations:
|
annotations:
|
||||||
message: Kubelet has disappeared from Prometheus target discovery.
|
message: Kubelet has disappeared from Prometheus target discovery.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletdown
|
||||||
expr: |
|
expr: |
|
||||||
absent(up{job="kubelet"} == 1)
|
absent(up{job="kubelet"} == 1)
|
||||||
for: 15m
|
for: 15m
|
||||||
@@ -316,6 +322,7 @@ spec:
|
|||||||
- alert: NodeExporterDown
|
- alert: NodeExporterDown
|
||||||
annotations:
|
annotations:
|
||||||
message: NodeExporter has disappeared from Prometheus target discovery.
|
message: NodeExporter has disappeared from Prometheus target discovery.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeexporterdown
|
||||||
expr: |
|
expr: |
|
||||||
absent(up{job="node-exporter"} == 1)
|
absent(up{job="node-exporter"} == 1)
|
||||||
for: 15m
|
for: 15m
|
||||||
@@ -324,6 +331,7 @@ spec:
|
|||||||
- alert: PrometheusDown
|
- alert: PrometheusDown
|
||||||
annotations:
|
annotations:
|
||||||
message: Prometheus has disappeared from Prometheus target discovery.
|
message: Prometheus has disappeared from Prometheus target discovery.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusdown
|
||||||
expr: |
|
expr: |
|
||||||
absent(up{job="prometheus-k8s"} == 1)
|
absent(up{job="prometheus-k8s"} == 1)
|
||||||
for: 15m
|
for: 15m
|
||||||
@@ -332,6 +340,7 @@ spec:
|
|||||||
- alert: PrometheusOperatorDown
|
- alert: PrometheusOperatorDown
|
||||||
annotations:
|
annotations:
|
||||||
message: PrometheusOperator has disappeared from Prometheus target discovery.
|
message: PrometheusOperator has disappeared from Prometheus target discovery.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatordown
|
||||||
expr: |
|
expr: |
|
||||||
absent(up{job="prometheus-operator"} == 1)
|
absent(up{job="prometheus-operator"} == 1)
|
||||||
for: 15m
|
for: 15m
|
||||||
@@ -343,6 +352,7 @@ spec:
|
|||||||
annotations:
|
annotations:
|
||||||
message: '{{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
|
message: '{{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
|
||||||
}}) is restarting {{ printf "%.2f" $value }} / second'
|
}}) is restarting {{ printf "%.2f" $value }} / second'
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
|
||||||
expr: |
|
expr: |
|
||||||
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[15m]) > 0
|
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[15m]) > 0
|
||||||
for: 1h
|
for: 1h
|
||||||
@@ -351,6 +361,7 @@ spec:
|
|||||||
- alert: KubePodNotReady
|
- alert: KubePodNotReady
|
||||||
annotations:
|
annotations:
|
||||||
message: '{{ $labels.namespace }}/{{ $labels.pod }} is not ready.'
|
message: '{{ $labels.namespace }}/{{ $labels.pod }} is not ready.'
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
|
||||||
expr: |
|
expr: |
|
||||||
sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase!~"Running|Succeeded"}) > 0
|
sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase!~"Running|Succeeded"}) > 0
|
||||||
for: 1h
|
for: 1h
|
||||||
@@ -360,6 +371,7 @@ spec:
|
|||||||
annotations:
|
annotations:
|
||||||
message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} generation
|
message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} generation
|
||||||
mismatch
|
mismatch
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch
|
||||||
expr: |
|
expr: |
|
||||||
kube_deployment_status_observed_generation{job="kube-state-metrics"}
|
kube_deployment_status_observed_generation{job="kube-state-metrics"}
|
||||||
!=
|
!=
|
||||||
@@ -371,6 +383,7 @@ spec:
|
|||||||
annotations:
|
annotations:
|
||||||
message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} replica
|
message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} replica
|
||||||
mismatch
|
mismatch
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch
|
||||||
expr: |
|
expr: |
|
||||||
kube_deployment_spec_replicas{job="kube-state-metrics"}
|
kube_deployment_spec_replicas{job="kube-state-metrics"}
|
||||||
!=
|
!=
|
||||||
@@ -382,6 +395,7 @@ spec:
|
|||||||
annotations:
|
annotations:
|
||||||
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} replica
|
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} replica
|
||||||
mismatch
|
mismatch
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch
|
||||||
expr: |
|
expr: |
|
||||||
kube_statefulset_status_replicas_ready{job="kube-state-metrics"}
|
kube_statefulset_status_replicas_ready{job="kube-state-metrics"}
|
||||||
!=
|
!=
|
||||||
@@ -393,6 +407,7 @@ spec:
|
|||||||
annotations:
|
annotations:
|
||||||
message: StatefulSet {{ $labels.namespace }}/{{ labels.statefulset }} generation
|
message: StatefulSet {{ $labels.namespace }}/{{ labels.statefulset }} generation
|
||||||
mismatch
|
mismatch
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch
|
||||||
expr: |
|
expr: |
|
||||||
kube_statefulset_status_observed_generation{job="kube-state-metrics"}
|
kube_statefulset_status_observed_generation{job="kube-state-metrics"}
|
||||||
!=
|
!=
|
||||||
@@ -404,6 +419,7 @@ spec:
|
|||||||
annotations:
|
annotations:
|
||||||
message: Only {{$value}}% of desired pods scheduled and ready for daemon set
|
message: Only {{$value}}% of desired pods scheduled and ready for daemon set
|
||||||
{{$labels.namespace}}/{{$labels.daemonset}}
|
{{$labels.namespace}}/{{$labels.daemonset}}
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck
|
||||||
expr: |
|
expr: |
|
||||||
kube_daemonset_status_number_ready{job="kube-state-metrics"}
|
kube_daemonset_status_number_ready{job="kube-state-metrics"}
|
||||||
/
|
/
|
||||||
@@ -415,6 +431,7 @@ spec:
|
|||||||
annotations:
|
annotations:
|
||||||
message: A number of pods of daemonset {{$labels.namespace}}/{{$labels.daemonset}}
|
message: A number of pods of daemonset {{$labels.namespace}}/{{$labels.daemonset}}
|
||||||
are not scheduled.
|
are not scheduled.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled
|
||||||
expr: |
|
expr: |
|
||||||
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"}
|
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"}
|
||||||
-
|
-
|
||||||
@@ -426,17 +443,48 @@ spec:
|
|||||||
annotations:
|
annotations:
|
||||||
message: A number of pods of daemonset {{$labels.namespace}}/{{$labels.daemonset}}
|
message: A number of pods of daemonset {{$labels.namespace}}/{{$labels.daemonset}}
|
||||||
are running where they are not supposed to run.
|
are running where they are not supposed to run.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled
|
||||||
expr: |
|
expr: |
|
||||||
kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
|
kube_daemonset_status_number_misscheduled{job="kube-state-metrics"} > 0
|
||||||
for: 10m
|
for: 10m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
|
- alert: KubeCronJobRunning
|
||||||
|
annotations:
|
||||||
|
message: CronJob {{ $labels.namespaces }}/{{ $labels.cronjob }} is taking
|
||||||
|
more than 1h to complete.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecronjobrunning
|
||||||
|
expr: |
|
||||||
|
time() - kube_cronjob_next_schedule_time{job="kube-state-metrics"} > 3600
|
||||||
|
for: 1h
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: KubeJobCompletion
|
||||||
|
annotations:
|
||||||
|
message: Job {{ $labels.namespaces }}/{{ $labels.job }} is taking more than
|
||||||
|
1h to complete.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion
|
||||||
|
expr: |
|
||||||
|
kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0
|
||||||
|
for: 1h
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- alert: KubeJobFailed
|
||||||
|
annotations:
|
||||||
|
message: Job {{ $labels.namespaces }}/{{ $labels.job }} failed to complete.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed
|
||||||
|
expr: |
|
||||||
|
kube_job_status_failed{job="kube-state-metrics"} > 0
|
||||||
|
for: 1h
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
- name: kubernetes-resources
|
- name: kubernetes-resources
|
||||||
rules:
|
rules:
|
||||||
- alert: KubeCPUOvercommit
|
- alert: KubeCPUOvercommit
|
||||||
annotations:
|
annotations:
|
||||||
message: Overcommited CPU resource requests on Pods, cannot tolerate node
|
message: Overcommited CPU resource requests on Pods, cannot tolerate node
|
||||||
failure.
|
failure.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
|
||||||
expr: |
|
expr: |
|
||||||
sum(namespace_name:kube_pod_container_resource_requests_cpu_cores:sum)
|
sum(namespace_name:kube_pod_container_resource_requests_cpu_cores:sum)
|
||||||
/
|
/
|
||||||
@@ -450,6 +498,7 @@ spec:
|
|||||||
annotations:
|
annotations:
|
||||||
message: Overcommited Memory resource requests on Pods, cannot tolerate node
|
message: Overcommited Memory resource requests on Pods, cannot tolerate node
|
||||||
failure.
|
failure.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit
|
||||||
expr: |
|
expr: |
|
||||||
sum(namespace_name:kube_pod_container_resource_requests_memory_bytes:sum)
|
sum(namespace_name:kube_pod_container_resource_requests_memory_bytes:sum)
|
||||||
/
|
/
|
||||||
@@ -464,6 +513,7 @@ spec:
|
|||||||
- alert: KubeCPUOvercommit
|
- alert: KubeCPUOvercommit
|
||||||
annotations:
|
annotations:
|
||||||
message: Overcommited CPU resource request quota on Namespaces.
|
message: Overcommited CPU resource request quota on Namespaces.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
|
||||||
expr: |
|
expr: |
|
||||||
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="requests.cpu"})
|
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="requests.cpu"})
|
||||||
/
|
/
|
||||||
@@ -475,6 +525,7 @@ spec:
|
|||||||
- alert: KubeMemOvercommit
|
- alert: KubeMemOvercommit
|
||||||
annotations:
|
annotations:
|
||||||
message: Overcommited Memory resource request quota on Namespaces.
|
message: Overcommited Memory resource request quota on Namespaces.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit
|
||||||
expr: |
|
expr: |
|
||||||
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="requests.memory"})
|
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="requests.memory"})
|
||||||
/
|
/
|
||||||
@@ -487,6 +538,7 @@ spec:
|
|||||||
annotations:
|
annotations:
|
||||||
message: '{{ printf "%0.0f" $value }}% usage of {{ $labels.resource }} in
|
message: '{{ printf "%0.0f" $value }}% usage of {{ $labels.resource }} in
|
||||||
namespace {{ $labels.namespace }}.'
|
namespace {{ $labels.namespace }}.'
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded
|
||||||
expr: |
|
expr: |
|
||||||
100 * kube_resourcequota{job="kube-state-metrics", type="used"}
|
100 * kube_resourcequota{job="kube-state-metrics", type="used"}
|
||||||
/ ignoring(instance, job, type)
|
/ ignoring(instance, job, type)
|
||||||
@@ -502,6 +554,7 @@ spec:
|
|||||||
message: The persistent volume claimed by {{ $labels.persistentvolumeclaim
|
message: The persistent volume claimed by {{ $labels.persistentvolumeclaim
|
||||||
}} in namespace {{ $labels.namespace }} has {{ printf "%0.0f" $value }}%
|
}} in namespace {{ $labels.namespace }} has {{ printf "%0.0f" $value }}%
|
||||||
free.
|
free.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeusagecritical
|
||||||
expr: |
|
expr: |
|
||||||
100 * kubelet_volume_stats_available_bytes{job="kubelet"}
|
100 * kubelet_volume_stats_available_bytes{job="kubelet"}
|
||||||
/
|
/
|
||||||
@@ -515,6 +568,7 @@ spec:
|
|||||||
message: Based on recent sampling, the persistent volume claimed by {{ $labels.persistentvolumeclaim
|
message: Based on recent sampling, the persistent volume claimed by {{ $labels.persistentvolumeclaim
|
||||||
}} in namespace {{ $labels.namespace }} is expected to fill up within four
|
}} in namespace {{ $labels.namespace }} is expected to fill up within four
|
||||||
days.
|
days.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefullinfourdays
|
||||||
expr: |
|
expr: |
|
||||||
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet"}[1h], 4 * 24 * 3600) < 0
|
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet"}[1h], 4 * 24 * 3600) < 0
|
||||||
for: 5m
|
for: 5m
|
||||||
@@ -525,6 +579,7 @@ spec:
|
|||||||
- alert: KubeNodeNotReady
|
- alert: KubeNodeNotReady
|
||||||
annotations:
|
annotations:
|
||||||
message: '{{ $labels.node }} has been unready for more than an hour'
|
message: '{{ $labels.node }} has been unready for more than an hour'
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready
|
||||||
expr: |
|
expr: |
|
||||||
kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
|
kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
|
||||||
for: 1h
|
for: 1h
|
||||||
@@ -534,6 +589,7 @@ spec:
|
|||||||
annotations:
|
annotations:
|
||||||
message: There are {{ $value }} different versions of Kubernetes components
|
message: There are {{ $value }} different versions of Kubernetes components
|
||||||
running.
|
running.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch
|
||||||
expr: |
|
expr: |
|
||||||
count(count(kubernetes_build_info{job!="kube-dns"}) by (gitVersion)) > 1
|
count(count(kubernetes_build_info{job!="kube-dns"}) by (gitVersion)) > 1
|
||||||
for: 1h
|
for: 1h
|
||||||
@@ -543,6 +599,7 @@ spec:
|
|||||||
annotations:
|
annotations:
|
||||||
message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
|
message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
|
||||||
}}' is experiencing {{ printf "%0.0f" $value }}% errors.'
|
}}' is experiencing {{ printf "%0.0f" $value }}% errors.'
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
|
||||||
expr: |
|
expr: |
|
||||||
sum(rate(rest_client_requests_total{code!~"2.."}[5m])) by (instance, job) * 100
|
sum(rate(rest_client_requests_total{code!~"2.."}[5m])) by (instance, job) * 100
|
||||||
/
|
/
|
||||||
@@ -555,6 +612,7 @@ spec:
|
|||||||
annotations:
|
annotations:
|
||||||
message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
|
message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
|
||||||
}}' is experiencing {{ printf "%0.0f" $value }} errors / sec.'
|
}}' is experiencing {{ printf "%0.0f" $value }} errors / sec.'
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
|
||||||
expr: |
|
expr: |
|
||||||
sum(rate(ksm_scrape_error_total{job="kube-state-metrics"}[5m])) by (instance, job) > 0.1
|
sum(rate(ksm_scrape_error_total{job="kube-state-metrics"}[5m])) by (instance, job) > 0.1
|
||||||
for: 15m
|
for: 15m
|
||||||
@@ -564,6 +622,7 @@ spec:
|
|||||||
annotations:
|
annotations:
|
||||||
message: Kubelet {{$labels.instance}} is running {{$value}} pods, close to
|
message: Kubelet {{$labels.instance}} is running {{$value}} pods, close to
|
||||||
the limit of 110.
|
the limit of 110.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
|
||||||
expr: |
|
expr: |
|
||||||
kubelet_running_pod_count{job="kubelet"} > 100
|
kubelet_running_pod_count{job="kubelet"} > 100
|
||||||
for: 15m
|
for: 15m
|
||||||
@@ -573,6 +632,7 @@ spec:
|
|||||||
annotations:
|
annotations:
|
||||||
message: The API server has a 99th percentile latency of {{ $value }} seconds
|
message: The API server has a 99th percentile latency of {{ $value }} seconds
|
||||||
for {{$labels.verb}} {{$labels.resource}}.
|
for {{$labels.verb}} {{$labels.resource}}.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
|
||||||
expr: |
|
expr: |
|
||||||
cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1
|
cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1
|
||||||
for: 10m
|
for: 10m
|
||||||
@@ -582,6 +642,7 @@ spec:
|
|||||||
annotations:
|
annotations:
|
||||||
message: The API server has a 99th percentile latency of {{ $value }} seconds
|
message: The API server has a 99th percentile latency of {{ $value }} seconds
|
||||||
for {{$labels.verb}} {{$labels.resource}}.
|
for {{$labels.verb}} {{$labels.resource}}.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
|
||||||
expr: |
|
expr: |
|
||||||
cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4
|
cluster_quantile:apiserver_request_latencies:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4
|
||||||
for: 10m
|
for: 10m
|
||||||
@@ -590,6 +651,7 @@ spec:
|
|||||||
- alert: KubeAPIErrorsHigh
|
- alert: KubeAPIErrorsHigh
|
||||||
annotations:
|
annotations:
|
||||||
message: API server is erroring for {{ $value }}% of requests.
|
message: API server is erroring for {{ $value }}% of requests.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
|
||||||
expr: |
|
expr: |
|
||||||
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) without(instance, pod)
|
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) without(instance, pod)
|
||||||
/
|
/
|
||||||
@@ -600,6 +662,7 @@ spec:
|
|||||||
- alert: KubeAPIErrorsHigh
|
- alert: KubeAPIErrorsHigh
|
||||||
annotations:
|
annotations:
|
||||||
message: API server is erroring for {{ $value }}% of requests.
|
message: API server is erroring for {{ $value }}% of requests.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
|
||||||
expr: |
|
expr: |
|
||||||
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) without(instance, pod)
|
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) without(instance, pod)
|
||||||
/
|
/
|
||||||
@@ -610,6 +673,7 @@ spec:
|
|||||||
- alert: KubeClientCertificateExpiration
|
- alert: KubeClientCertificateExpiration
|
||||||
annotations:
|
annotations:
|
||||||
message: Kubernetes API certificate is expiring in less than 7 days.
|
message: Kubernetes API certificate is expiring in less than 7 days.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
|
||||||
expr: |
|
expr: |
|
||||||
histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
|
histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
|
||||||
labels:
|
labels:
|
||||||
@@ -617,6 +681,7 @@ spec:
|
|||||||
- alert: KubeClientCertificateExpiration
|
- alert: KubeClientCertificateExpiration
|
||||||
annotations:
|
annotations:
|
||||||
message: Kubernetes API certificate is expiring in less than 1 day.
|
message: Kubernetes API certificate is expiring in less than 1 day.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
|
||||||
expr: |
|
expr: |
|
||||||
histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
|
histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
|
||||||
labels:
|
labels:
|
||||||
|
Reference in New Issue
Block a user