use ServiceMonitors to generate Prometheus Kubernetes config

This commit is contained in:
Frederic Branczyk
2016-12-14 17:48:54 -08:00
parent d93a9adadc
commit d4b581fa49
18 changed files with 103 additions and 242 deletions

View File

@@ -1,85 +0,0 @@
alerting:
alertmanagers:
- kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: keep
regex: alertmanager-main
source_labels:
- __meta_kubernetes_service_name
- action: keep
regex: monitoring
source_labels:
- __meta_kubernetes_namespace
- action: keep
regex: web
source_labels:
- __meta_kubernetes_endpoint_port_name
scheme: http
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- /etc/prometheus/rules/*.rules
scrape_configs:
- job_name: kubelets
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# Skip verification until we have resolved why the certificate validation
# for the kubelet on API server nodes fail.
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
# Scrapes the endpoint lists for the Kubernetes API server, kube-state-metrics,
# and node-exporter, which we all consider part of a default setup.
- job_name: standard-endpoints
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# As for kubelets, certificate validation fails for the API server (node)
# and we circumvent it for now.
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: keep
source_labels: [__meta_kubernetes_service_name]
regex: prometheus|node-exporter|kube-state-metrics
- action: replace
source_labels: [__meta_kubernetes_service_name]
target_label: job
# Scrapes the endpoint lists for the kube-dns server. Which we consider
# part of a default setup.
- job_name: kube-components
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: replace
source_labels: [__meta_kubernetes_service_label_k8s_app]
target_label: job
- action: keep
source_labels: [__meta_kubernetes_service_name]
regex: ".*-prometheus-discovery"
- action: keep
source_labels: [__meta_kubernetes_endpoint_port_name]
regex: "http-metrics.*|https-metrics.*"
- action: replace
source_labels: [__meta_kubernetes_endpoint_port_name]
regex: "https-metrics.*"
target_label: __scheme__
replacement: https

View File

@@ -171,7 +171,7 @@ cluster:scheduler_binding_latency:quantile_seconds{quantile="0.5"} =
histogram_quantile(0.5,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
ALERT K8SNodeDown
IF up{job="kubelets"} == 0
IF up{job="kubelet"} == 0
FOR 1h
LABELS {
service = "k8s",
@@ -226,7 +226,7 @@ ALERT K8SKubeletNodeExporterDown
}
ALERT K8SKubeletDown
IF absent(up{job="kubelets"}) or count by (cluster) (up{job="kubelets"} == 0) / count by (cluster) (up{job="kubelets"}) > 0.1
IF absent(up{job="kubelet"}) or count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.1
FOR 1h
LABELS {
service = "k8s",
@@ -323,7 +323,7 @@ ALERT K8SConntrackTuningMissing
}
ALERT K8STooManyOpenFiles
IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 50
IF 100*process_open_fds{job=~"kubelet|kubernetes"} / process_max_fds > 50
FOR 10m
LABELS {
service = "k8s",
@@ -335,7 +335,7 @@ ALERT K8STooManyOpenFiles
}
ALERT K8STooManyOpenFiles
IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 80
IF 100*process_open_fds{job=~"kubelet|kubernetes"} / process_max_fds > 80
FOR 10m
LABELS {
service = "k8s",