use ServiceMonitors to generate Prometheus Kubernetes config
This commit is contained in:
@@ -1,92 +0,0 @@
|
||||
apiVersion: v1
|
||||
data:
|
||||
prometheus.yaml: |
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
relabel_configs:
|
||||
- action: keep
|
||||
regex: alertmanager-main
|
||||
source_labels:
|
||||
- __meta_kubernetes_service_name
|
||||
- action: keep
|
||||
regex: monitoring
|
||||
source_labels:
|
||||
- __meta_kubernetes_namespace
|
||||
- action: keep
|
||||
regex: web
|
||||
source_labels:
|
||||
- __meta_kubernetes_endpoint_port_name
|
||||
scheme: http
|
||||
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
rule_files:
|
||||
- /etc/prometheus/rules/*.rules
|
||||
|
||||
scrape_configs:
|
||||
- job_name: kubelets
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
# Skip verification until we have resolved why the certificate validation
|
||||
# for the kubelet on API server nodes fail.
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
|
||||
# Scrapes the endpoint lists for the Kubernetes API server, kube-state-metrics,
|
||||
# and node-exporter, which we all consider part of a default setup.
|
||||
- job_name: standard-endpoints
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
# As for kubelets, certificate validation fails for the API server (node)
|
||||
# and we circumvent it for now.
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
|
||||
relabel_configs:
|
||||
- action: keep
|
||||
source_labels: [__meta_kubernetes_service_name]
|
||||
regex: prometheus|node-exporter|kube-state-metrics
|
||||
- action: replace
|
||||
source_labels: [__meta_kubernetes_service_name]
|
||||
target_label: job
|
||||
|
||||
# Scrapes the endpoint lists for the kube-dns server. Which we consider
|
||||
# part of a default setup.
|
||||
- job_name: kube-components
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
|
||||
relabel_configs:
|
||||
- action: replace
|
||||
source_labels: [__meta_kubernetes_service_label_k8s_app]
|
||||
target_label: job
|
||||
- action: keep
|
||||
source_labels: [__meta_kubernetes_service_name]
|
||||
regex: ".*-prometheus-discovery"
|
||||
- action: keep
|
||||
source_labels: [__meta_kubernetes_endpoint_port_name]
|
||||
regex: "http-metrics.*|https-metrics.*"
|
||||
- action: replace
|
||||
source_labels: [__meta_kubernetes_endpoint_port_name]
|
||||
regex: "https-metrics.*"
|
||||
target_label: __scheme__
|
||||
replacement: https
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
name: prometheus-k8s
|
||||
@@ -226,7 +226,7 @@ data:
|
||||
histogram_quantile(0.5,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
|
||||
|
||||
ALERT K8SNodeDown
|
||||
IF up{job="kubelets"} == 0
|
||||
IF up{job="kubelet"} == 0
|
||||
FOR 1h
|
||||
LABELS {
|
||||
service = "k8s",
|
||||
@@ -281,7 +281,7 @@ data:
|
||||
}
|
||||
|
||||
ALERT K8SKubeletDown
|
||||
IF absent(up{job="kubelets"}) or count by (cluster) (up{job="kubelets"} == 0) / count by (cluster) (up{job="kubelets"}) > 0.1
|
||||
IF absent(up{job="kubelet"}) or count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.1
|
||||
FOR 1h
|
||||
LABELS {
|
||||
service = "k8s",
|
||||
@@ -378,7 +378,7 @@ data:
|
||||
}
|
||||
|
||||
ALERT K8STooManyOpenFiles
|
||||
IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 50
|
||||
IF 100*process_open_fds{job=~"kubelet|kubernetes"} / process_max_fds > 50
|
||||
FOR 10m
|
||||
LABELS {
|
||||
service = "k8s",
|
||||
@@ -390,7 +390,7 @@ data:
|
||||
}
|
||||
|
||||
ALERT K8STooManyOpenFiles
|
||||
IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 80
|
||||
IF 100*process_open_fds{job=~"kubelet|kubernetes"} / process_max_fds > 80
|
||||
FOR 10m
|
||||
LABELS {
|
||||
service = "k8s",
|
||||
|
||||
69
manifests/prometheus/prometheus-k8s-servicemonitor.yaml
Normal file
69
manifests/prometheus/prometheus-k8s-servicemonitor.yaml
Normal file
@@ -0,0 +1,69 @@
|
||||
apiVersion: monitoring.coreos.com/v1alpha1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: kube-apiserver
|
||||
labels:
|
||||
k8s-apps: https
|
||||
spec:
|
||||
jobLabel: provider
|
||||
selector:
|
||||
matchLabels:
|
||||
component: apiserver
|
||||
provider: kubernetes
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- default
|
||||
endpoints:
|
||||
- port: https
|
||||
interval: 15s
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecureSkipVerify: true
|
||||
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1alpha1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: k8s-apps-https
|
||||
labels:
|
||||
k8s-apps: https
|
||||
spec:
|
||||
jobLabel: k8s-app
|
||||
selector:
|
||||
matchExpressions:
|
||||
- {key: k8s-app, operator: Exists}
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- kube-system
|
||||
endpoints:
|
||||
- port: https-metrics
|
||||
interval: 15s
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecureSkipVerify: true
|
||||
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1alpha1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: k8s-apps-http
|
||||
labels:
|
||||
k8s-apps: http
|
||||
spec:
|
||||
jobLabel: k8s-app
|
||||
selector:
|
||||
matchExpressions:
|
||||
- {key: k8s-app, operator: Exists}
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- kube-system
|
||||
- monitoring
|
||||
endpoints:
|
||||
- port: http-metrics
|
||||
interval: 15s
|
||||
- port: http-metrics-dnsmasq
|
||||
interval: 15s
|
||||
- port: http-metrics-skydns
|
||||
interval: 15s
|
||||
@@ -11,4 +11,4 @@ spec:
|
||||
protocol: TCP
|
||||
targetPort: web
|
||||
selector:
|
||||
prometheus: prometheus-k8s
|
||||
prometheus: k8s
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
apiVersion: monitoring.coreos.com/v1alpha1
|
||||
kind: Prometheus
|
||||
metadata:
|
||||
name: prometheus-k8s
|
||||
name: k8s
|
||||
labels:
|
||||
prometheus: k8s
|
||||
spec:
|
||||
version: v1.4.1
|
||||
version: v1.5.2
|
||||
serviceMonitorSelector:
|
||||
matchExpression:
|
||||
- {key: k8s-apps, operator: Exists}
|
||||
resources:
|
||||
requests:
|
||||
# 2Gi is default, but won't schedule if you don't have a node with >2Gi
|
||||
@@ -13,3 +16,8 @@ spec:
|
||||
# production use. This value is mainly meant for demonstration/testing
|
||||
# purposes.
|
||||
memory: 400Mi
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- namespace: monitoring
|
||||
name: alertmanager-main
|
||||
port: web
|
||||
|
||||
Reference in New Issue
Block a user