use ServiceMonitors to generate Prometheus Kubernetes config
This commit is contained in:
@@ -1,85 +0,0 @@
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
relabel_configs:
|
||||
- action: keep
|
||||
regex: alertmanager-main
|
||||
source_labels:
|
||||
- __meta_kubernetes_service_name
|
||||
- action: keep
|
||||
regex: monitoring
|
||||
source_labels:
|
||||
- __meta_kubernetes_namespace
|
||||
- action: keep
|
||||
regex: web
|
||||
source_labels:
|
||||
- __meta_kubernetes_endpoint_port_name
|
||||
scheme: http
|
||||
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
rule_files:
|
||||
- /etc/prometheus/rules/*.rules
|
||||
|
||||
scrape_configs:
|
||||
- job_name: kubelets
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
# Skip verification until we have resolved why the certificate validation
|
||||
# for the kubelet on API server nodes fail.
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
|
||||
# Scrapes the endpoint lists for the Kubernetes API server, kube-state-metrics,
|
||||
# and node-exporter, which we all consider part of a default setup.
|
||||
- job_name: standard-endpoints
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
# As for kubelets, certificate validation fails for the API server (node)
|
||||
# and we circumvent it for now.
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
|
||||
relabel_configs:
|
||||
- action: keep
|
||||
source_labels: [__meta_kubernetes_service_name]
|
||||
regex: prometheus|node-exporter|kube-state-metrics
|
||||
- action: replace
|
||||
source_labels: [__meta_kubernetes_service_name]
|
||||
target_label: job
|
||||
|
||||
# Scrapes the endpoint lists for the kube-dns server. Which we consider
|
||||
# part of a default setup.
|
||||
- job_name: kube-components
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
|
||||
relabel_configs:
|
||||
- action: replace
|
||||
source_labels: [__meta_kubernetes_service_label_k8s_app]
|
||||
target_label: job
|
||||
- action: keep
|
||||
source_labels: [__meta_kubernetes_service_name]
|
||||
regex: ".*-prometheus-discovery"
|
||||
- action: keep
|
||||
source_labels: [__meta_kubernetes_endpoint_port_name]
|
||||
regex: "http-metrics.*|https-metrics.*"
|
||||
- action: replace
|
||||
source_labels: [__meta_kubernetes_endpoint_port_name]
|
||||
regex: "https-metrics.*"
|
||||
target_label: __scheme__
|
||||
replacement: https
|
@@ -171,7 +171,7 @@ cluster:scheduler_binding_latency:quantile_seconds{quantile="0.5"} =
|
||||
histogram_quantile(0.5,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
|
||||
|
||||
ALERT K8SNodeDown
|
||||
IF up{job="kubelets"} == 0
|
||||
IF up{job="kubelet"} == 0
|
||||
FOR 1h
|
||||
LABELS {
|
||||
service = "k8s",
|
||||
@@ -226,7 +226,7 @@ ALERT K8SKubeletNodeExporterDown
|
||||
}
|
||||
|
||||
ALERT K8SKubeletDown
|
||||
IF absent(up{job="kubelets"}) or count by (cluster) (up{job="kubelets"} == 0) / count by (cluster) (up{job="kubelets"}) > 0.1
|
||||
IF absent(up{job="kubelet"}) or count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.1
|
||||
FOR 1h
|
||||
LABELS {
|
||||
service = "k8s",
|
||||
@@ -323,7 +323,7 @@ ALERT K8SConntrackTuningMissing
|
||||
}
|
||||
|
||||
ALERT K8STooManyOpenFiles
|
||||
IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 50
|
||||
IF 100*process_open_fds{job=~"kubelet|kubernetes"} / process_max_fds > 50
|
||||
FOR 10m
|
||||
LABELS {
|
||||
service = "k8s",
|
||||
@@ -335,7 +335,7 @@ ALERT K8STooManyOpenFiles
|
||||
}
|
||||
|
||||
ALERT K8STooManyOpenFiles
|
||||
IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 80
|
||||
IF 100*process_open_fds{job=~"kubelet|kubernetes"} / process_max_fds > 80
|
||||
FOR 10m
|
||||
LABELS {
|
||||
service = "k8s",
|
||||
|
@@ -26,7 +26,6 @@ echo "done!"
|
||||
kctl apply -f manifests/exporters
|
||||
kctl apply -f manifests/grafana
|
||||
|
||||
kctl apply -f manifests/prometheus/prometheus-k8s-cm.yaml
|
||||
kctl apply -f manifests/prometheus/prometheus-k8s-rules.yaml
|
||||
kctl apply -f manifests/prometheus/prometheus-k8s-svc.yaml
|
||||
|
||||
@@ -36,6 +35,7 @@ kctl apply -f manifests/alertmanager/alertmanager-service.yaml
|
||||
# `kubectl apply` is currently not working for third party resources so we are
|
||||
# using `kubectl create` here for the time being.
|
||||
# (https://github.com/kubernetes/kubernetes/issues/29542)
|
||||
kctl create -f manifests/prometheus/prometheus-k8s-servicemonitor.yaml
|
||||
kctl create -f manifests/prometheus/prometheus-k8s.yaml
|
||||
kctl create -f manifests/alertmanager/alertmanager.yaml
|
||||
|
||||
|
@@ -1,8 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Generate Prometheus configuration ConfigMap
|
||||
kubectl create configmap --dry-run=true prometheus-k8s --from-file=assets/prometheus/prometheus.yaml -oyaml > manifests/prometheus/prometheus-k8s-cm.yaml
|
||||
|
||||
# Generate Alert Rules ConfigMap
|
||||
kubectl create configmap --dry-run=true prometheus-k8s-rules --from-file=assets/prometheus/rules/ -oyaml > manifests/prometheus/prometheus-k8s-rules.yaml
|
||||
|
||||
|
@@ -11,4 +11,4 @@ spec:
|
||||
protocol: TCP
|
||||
targetPort: web
|
||||
selector:
|
||||
alertmanager: alertmanager-main
|
||||
alertmanager: main
|
||||
|
@@ -1,7 +1,7 @@
|
||||
apiVersion: "monitoring.coreos.com/v1alpha1"
|
||||
kind: "Alertmanager"
|
||||
metadata:
|
||||
name: "alertmanager-main"
|
||||
name: "main"
|
||||
labels:
|
||||
alertmanager: "main"
|
||||
spec:
|
||||
|
@@ -6,7 +6,7 @@ metadata:
|
||||
labels:
|
||||
prometheus: frontend
|
||||
spec:
|
||||
version: v1.4.1
|
||||
version: v1.5.2
|
||||
serviceMonitorSelector:
|
||||
matchLabels:
|
||||
tier: frontend
|
||||
|
@@ -11,7 +11,7 @@ spec:
|
||||
spec:
|
||||
containers:
|
||||
- name: kube-state-metrics
|
||||
image: gcr.io/google_containers/kube-state-metrics:v0.3.0
|
||||
image: gcr.io/google_containers/kube-state-metrics:v0.4.1
|
||||
ports:
|
||||
- name: metrics
|
||||
containerPort: 8080
|
||||
|
@@ -3,10 +3,13 @@ kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app: kube-state-metrics
|
||||
k8s-app: kube-state-metrics
|
||||
annotations:
|
||||
alpha.monitoring.coreos.com/non-namespaced: "true"
|
||||
name: kube-state-metrics
|
||||
spec:
|
||||
ports:
|
||||
- name: metrics
|
||||
- name: http-metrics
|
||||
port: 8080
|
||||
targetPort: metrics
|
||||
protocol: TCP
|
||||
|
@@ -3,12 +3,13 @@ kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app: node-exporter
|
||||
k8s-app: node-exporter
|
||||
name: node-exporter
|
||||
spec:
|
||||
type: ClusterIP
|
||||
clusterIP: None
|
||||
ports:
|
||||
- name: metrics
|
||||
- name: http-metrics
|
||||
port: 9100
|
||||
protocol: TCP
|
||||
selector:
|
||||
|
@@ -1,27 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: kube-apiserver-prometheus-discovery
|
||||
labels:
|
||||
k8s-app: kubernetes
|
||||
spec:
|
||||
type: ClusterIP
|
||||
clusterIP: None
|
||||
ports:
|
||||
- name: https-metrics
|
||||
port: 8443
|
||||
protocol: TCP
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Endpoints
|
||||
metadata:
|
||||
name: kube-apiserver-prometheus-discovery
|
||||
labels:
|
||||
k8s-app: kubernetes
|
||||
subsets:
|
||||
- addresses:
|
||||
- ip: 192.168.99.100
|
||||
ports:
|
||||
- name: https-metrics
|
||||
port: 8443
|
||||
protocol: TCP
|
@@ -1,16 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: kube-apiserver-prometheus-discovery
|
||||
labels:
|
||||
k8s-app: kubernetes
|
||||
spec:
|
||||
selector:
|
||||
k8s-app: kube-apiserver
|
||||
type: ClusterIP
|
||||
clusterIP: None
|
||||
ports:
|
||||
- name: https-metrics
|
||||
port: 443
|
||||
targetPort: 443
|
||||
protocol: TCP
|
@@ -13,7 +13,10 @@ spec:
|
||||
spec:
|
||||
containers:
|
||||
- name: prometheus-operator
|
||||
image: quay.io/coreos/prometheus-operator:v0.2.1
|
||||
image: quay.io/coreos/prometheus-operator:v0.6.0
|
||||
args:
|
||||
- "--kubelet-object=kube-system/kubelet"
|
||||
- "--config-reloader-image=quay.io/coreos/configmap-reload:latest"
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
|
@@ -1,92 +0,0 @@
|
||||
apiVersion: v1
|
||||
data:
|
||||
prometheus.yaml: |
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
relabel_configs:
|
||||
- action: keep
|
||||
regex: alertmanager-main
|
||||
source_labels:
|
||||
- __meta_kubernetes_service_name
|
||||
- action: keep
|
||||
regex: monitoring
|
||||
source_labels:
|
||||
- __meta_kubernetes_namespace
|
||||
- action: keep
|
||||
regex: web
|
||||
source_labels:
|
||||
- __meta_kubernetes_endpoint_port_name
|
||||
scheme: http
|
||||
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
rule_files:
|
||||
- /etc/prometheus/rules/*.rules
|
||||
|
||||
scrape_configs:
|
||||
- job_name: kubelets
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
# Skip verification until we have resolved why the certificate validation
|
||||
# for the kubelet on API server nodes fail.
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
|
||||
# Scrapes the endpoint lists for the Kubernetes API server, kube-state-metrics,
|
||||
# and node-exporter, which we all consider part of a default setup.
|
||||
- job_name: standard-endpoints
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
# As for kubelets, certificate validation fails for the API server (node)
|
||||
# and we circumvent it for now.
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
|
||||
relabel_configs:
|
||||
- action: keep
|
||||
source_labels: [__meta_kubernetes_service_name]
|
||||
regex: prometheus|node-exporter|kube-state-metrics
|
||||
- action: replace
|
||||
source_labels: [__meta_kubernetes_service_name]
|
||||
target_label: job
|
||||
|
||||
# Scrapes the endpoint lists for the kube-dns server. Which we consider
|
||||
# part of a default setup.
|
||||
- job_name: kube-components
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
|
||||
relabel_configs:
|
||||
- action: replace
|
||||
source_labels: [__meta_kubernetes_service_label_k8s_app]
|
||||
target_label: job
|
||||
- action: keep
|
||||
source_labels: [__meta_kubernetes_service_name]
|
||||
regex: ".*-prometheus-discovery"
|
||||
- action: keep
|
||||
source_labels: [__meta_kubernetes_endpoint_port_name]
|
||||
regex: "http-metrics.*|https-metrics.*"
|
||||
- action: replace
|
||||
source_labels: [__meta_kubernetes_endpoint_port_name]
|
||||
regex: "https-metrics.*"
|
||||
target_label: __scheme__
|
||||
replacement: https
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
name: prometheus-k8s
|
@@ -226,7 +226,7 @@ data:
|
||||
histogram_quantile(0.5,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
|
||||
|
||||
ALERT K8SNodeDown
|
||||
IF up{job="kubelets"} == 0
|
||||
IF up{job="kubelet"} == 0
|
||||
FOR 1h
|
||||
LABELS {
|
||||
service = "k8s",
|
||||
@@ -281,7 +281,7 @@ data:
|
||||
}
|
||||
|
||||
ALERT K8SKubeletDown
|
||||
IF absent(up{job="kubelets"}) or count by (cluster) (up{job="kubelets"} == 0) / count by (cluster) (up{job="kubelets"}) > 0.1
|
||||
IF absent(up{job="kubelet"}) or count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.1
|
||||
FOR 1h
|
||||
LABELS {
|
||||
service = "k8s",
|
||||
@@ -378,7 +378,7 @@ data:
|
||||
}
|
||||
|
||||
ALERT K8STooManyOpenFiles
|
||||
IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 50
|
||||
IF 100*process_open_fds{job=~"kubelet|kubernetes"} / process_max_fds > 50
|
||||
FOR 10m
|
||||
LABELS {
|
||||
service = "k8s",
|
||||
@@ -390,7 +390,7 @@ data:
|
||||
}
|
||||
|
||||
ALERT K8STooManyOpenFiles
|
||||
IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 80
|
||||
IF 100*process_open_fds{job=~"kubelet|kubernetes"} / process_max_fds > 80
|
||||
FOR 10m
|
||||
LABELS {
|
||||
service = "k8s",
|
||||
|
69
manifests/prometheus/prometheus-k8s-servicemonitor.yaml
Normal file
69
manifests/prometheus/prometheus-k8s-servicemonitor.yaml
Normal file
@@ -0,0 +1,69 @@
|
||||
apiVersion: monitoring.coreos.com/v1alpha1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: kube-apiserver
|
||||
labels:
|
||||
k8s-apps: https
|
||||
spec:
|
||||
jobLabel: provider
|
||||
selector:
|
||||
matchLabels:
|
||||
component: apiserver
|
||||
provider: kubernetes
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- default
|
||||
endpoints:
|
||||
- port: https
|
||||
interval: 15s
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecureSkipVerify: true
|
||||
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1alpha1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: k8s-apps-https
|
||||
labels:
|
||||
k8s-apps: https
|
||||
spec:
|
||||
jobLabel: k8s-app
|
||||
selector:
|
||||
matchExpressions:
|
||||
- {key: k8s-app, operator: Exists}
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- kube-system
|
||||
endpoints:
|
||||
- port: https-metrics
|
||||
interval: 15s
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecureSkipVerify: true
|
||||
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1alpha1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: k8s-apps-http
|
||||
labels:
|
||||
k8s-apps: http
|
||||
spec:
|
||||
jobLabel: k8s-app
|
||||
selector:
|
||||
matchExpressions:
|
||||
- {key: k8s-app, operator: Exists}
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- kube-system
|
||||
- monitoring
|
||||
endpoints:
|
||||
- port: http-metrics
|
||||
interval: 15s
|
||||
- port: http-metrics-dnsmasq
|
||||
interval: 15s
|
||||
- port: http-metrics-skydns
|
||||
interval: 15s
|
@@ -11,4 +11,4 @@ spec:
|
||||
protocol: TCP
|
||||
targetPort: web
|
||||
selector:
|
||||
prometheus: prometheus-k8s
|
||||
prometheus: k8s
|
||||
|
@@ -1,11 +1,14 @@
|
||||
apiVersion: monitoring.coreos.com/v1alpha1
|
||||
kind: Prometheus
|
||||
metadata:
|
||||
name: prometheus-k8s
|
||||
name: k8s
|
||||
labels:
|
||||
prometheus: k8s
|
||||
spec:
|
||||
version: v1.4.1
|
||||
version: v1.5.2
|
||||
serviceMonitorSelector:
|
||||
matchExpression:
|
||||
- {key: k8s-apps, operator: Exists}
|
||||
resources:
|
||||
requests:
|
||||
# 2Gi is default, but won't schedule if you don't have a node with >2Gi
|
||||
@@ -13,3 +16,8 @@ spec:
|
||||
# production use. This value is mainly meant for demonstration/testing
|
||||
# purposes.
|
||||
memory: 400Mi
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- namespace: monitoring
|
||||
name: alertmanager-main
|
||||
port: web
|
||||
|
Reference in New Issue
Block a user