Merge branch 'master' of github.com:coreos/kube-prometheus
This commit is contained in:
68
assets/prometheus/prometheus.yaml
Normal file
68
assets/prometheus/prometheus.yaml
Normal file
@@ -0,0 +1,68 @@
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
rule_files:
|
||||
- /etc/prometheus/rules/*.rules
|
||||
|
||||
scrape_configs:
|
||||
- job_name: kubelets
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
# Skip verification until we have resolved why the certificate validation
|
||||
# for the kubelet on API server nodes fail.
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
|
||||
# Scrapes the endpoint lists for the Kubernetes API server, kube-state-metrics,
|
||||
# and node-exporter, which we all consider part of a default setup.
|
||||
- job_name: standard-endpoints
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
# As for kubelets, certificate validation fails for the API server (node)
|
||||
# and we circumvent it for now.
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
|
||||
relabel_configs:
|
||||
- action: keep
|
||||
source_labels: [__meta_kubernetes_service_name]
|
||||
regex: prometheus|kubernetes|node-exporter|kube-state-metrics|etcd-k8s
|
||||
- action: replace
|
||||
source_labels: [__meta_kubernetes_service_name]
|
||||
target_label: job
|
||||
- action: replace
|
||||
source_labels: [__meta_kubernetes_service_name]
|
||||
regex: kubernetes
|
||||
target_label: __scheme__
|
||||
replacement: https
|
||||
|
||||
# Scrapes the endpoint lists for the kube-dns server. Which we consider
|
||||
# part of a default setup.
|
||||
- job_name: kube-components
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
|
||||
relabel_configs:
|
||||
- action: replace
|
||||
source_labels: [__meta_kubernetes_service_name]
|
||||
target_label: job
|
||||
regex: "kube-(.*)-prometheus-discovery"
|
||||
replacement: "kube-${1}"
|
||||
- action: keep
|
||||
source_labels: [__meta_kubernetes_service_name]
|
||||
regex: "kube-(.*)-prometheus-discovery"
|
||||
- action: keep
|
||||
source_labels: [__meta_kubernetes_endpoint_port_name]
|
||||
regex: "prometheus"
|
@@ -1,3 +1,5 @@
|
||||
# NOTE: These rules were kindly contributed by the SoundCloud engineering team.
|
||||
|
||||
### Container resources ###
|
||||
|
||||
cluster_namespace_controller_pod_container:spec_memory_limit_bytes =
|
||||
@@ -249,7 +251,7 @@ ALERT K8SApiserverDown
|
||||
|
||||
# Disable for non HA kubernetes setups.
|
||||
ALERT K8SApiserverDown
|
||||
IF absent({job="kubernetes"}) or count by(cluster) (up{job="kubernetes"} == 1) < 2
|
||||
IF absent({job="kubernetes"}) or (count by(cluster) (up{job="kubernetes"} == 1) < count by(cluster) (up{job="kubernetes"}))
|
||||
FOR 5m
|
||||
LABELS {
|
||||
service = "k8s",
|
||||
@@ -361,7 +363,7 @@ ALERT K8STooManyOpenFiles
|
||||
ALERT K8SApiServerLatency
|
||||
IF histogram_quantile(
|
||||
0.99,
|
||||
sum without (instance,node,resource) (apiserver_request_latencies_bucket{verb!~"CONNECT|WATCHLIST"})
|
||||
sum without (instance,node,resource) (apiserver_request_latencies_bucket{verb!~"CONNECT|WATCHLIST|WATCH"})
|
||||
) / 1e6 > 1.0
|
||||
FOR 10m
|
||||
LABELS {
|
@@ -1,7 +1,10 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Generate Prometheus configuration ConfigMap
|
||||
kubectl create configmap --dry-run=true prometheus-k8s --from-file=assets/prometheus/prometheus.yaml -oyaml > manifests/prometheus/prometheus-k8s-cm.yaml
|
||||
|
||||
# Generate Alert Rules ConfigMap
|
||||
kubectl create configmap --dry-run=true prometheus-k8s-rules --from-file=assets/alerts/ -oyaml > manifests/prometheus/prometheus-k8s-rules.yaml
|
||||
kubectl create configmap --dry-run=true prometheus-k8s-rules --from-file=assets/prometheus/rules/ -oyaml > manifests/prometheus/prometheus-k8s-rules.yaml
|
||||
|
||||
# Generate Dashboard ConfigMap
|
||||
kubectl create configmap --dry-run=true grafana-dashboards --from-file=assets/grafana/ -oyaml > manifests/grafana/grafana-cm.yaml
|
||||
|
@@ -1,18 +1,15 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: prometheus-k8s
|
||||
data:
|
||||
prometheus.yaml: |
|
||||
global:
|
||||
evaluation_interval: 30s
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
rule_files:
|
||||
- /etc/prometheus/rules/*.rules
|
||||
- /etc/prometheus/rules/*.rules
|
||||
|
||||
scrape_configs:
|
||||
- job_name: kubelets
|
||||
scrape_interval: 20s
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
@@ -27,7 +24,6 @@ data:
|
||||
# Scrapes the endpoint lists for the Kubernetes API server, kube-state-metrics,
|
||||
# and node-exporter, which we all consider part of a default setup.
|
||||
- job_name: standard-endpoints
|
||||
scrape_interval: 20s
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
# As for kubelets, certificate validation fails for the API server (node)
|
||||
@@ -41,7 +37,7 @@ data:
|
||||
relabel_configs:
|
||||
- action: keep
|
||||
source_labels: [__meta_kubernetes_service_name]
|
||||
regex: kubernetes|node-exporter|kube-state-metrics|etcd-k8s
|
||||
regex: prometheus|kubernetes|node-exporter|kube-state-metrics|etcd-k8s
|
||||
- action: replace
|
||||
source_labels: [__meta_kubernetes_service_name]
|
||||
target_label: job
|
||||
@@ -54,7 +50,6 @@ data:
|
||||
# Scrapes the endpoint lists for the kube-dns server. Which we consider
|
||||
# part of a default setup.
|
||||
- job_name: kube-components
|
||||
scrape_interval: 20s
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
@@ -74,3 +69,7 @@ data:
|
||||
- action: keep
|
||||
source_labels: [__meta_kubernetes_endpoint_port_name]
|
||||
regex: "prometheus"
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
name: prometheus-k8s
|
||||
|
@@ -53,6 +53,8 @@ data:
|
||||
\ summary = \"high fsync durations\",\n description = \"ectd instance {{
|
||||
$labels.instance }} fync durations are high\",\n }\n"
|
||||
kubernetes.rules: |+
|
||||
# NOTE: These rules were kindly contributed by the SoundCloud engineering team.
|
||||
|
||||
### Container resources ###
|
||||
|
||||
cluster_namespace_controller_pod_container:spec_memory_limit_bytes =
|
||||
@@ -304,7 +306,7 @@ data:
|
||||
|
||||
# Disable for non HA kubernetes setups.
|
||||
ALERT K8SApiserverDown
|
||||
IF absent({job="kubernetes"}) or count by(cluster) (up{job="kubernetes"} == 1) < 2
|
||||
IF absent({job="kubernetes"}) or (count by(cluster) (up{job="kubernetes"} == 1) < count by(cluster) (up{job="kubernetes"}))
|
||||
FOR 5m
|
||||
LABELS {
|
||||
service = "k8s",
|
||||
@@ -416,7 +418,7 @@ data:
|
||||
ALERT K8SApiServerLatency
|
||||
IF histogram_quantile(
|
||||
0.99,
|
||||
sum without (instance,node,resource) (apiserver_request_latencies_bucket{verb!~"CONNECT|WATCHLIST"})
|
||||
sum without (instance,node,resource) (apiserver_request_latencies_bucket{verb!~"CONNECT|WATCHLIST|WATCH"})
|
||||
) / 1e6 > 1.0
|
||||
FOR 10m
|
||||
LABELS {
|
||||
|
Reference in New Issue
Block a user