Merge branch 'master' of github.com:coreos/kube-prometheus

This commit is contained in:
Fabian Reinartz
2016-11-04 13:04:51 -07:00
6 changed files with 88 additions and 14 deletions

View File

@@ -0,0 +1,68 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- /etc/prometheus/rules/*.rules
scrape_configs:
- job_name: kubelets
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# Skip verification until we have resolved why the certificate validation
# for the kubelet on API server nodes fail.
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
# Scrapes the endpoint lists for the Kubernetes API server, kube-state-metrics,
# and node-exporter, which we all consider part of a default setup.
- job_name: standard-endpoints
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# As for kubelets, certificate validation fails for the API server (node)
# and we circumvent it for now.
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: keep
source_labels: [__meta_kubernetes_service_name]
regex: prometheus|kubernetes|node-exporter|kube-state-metrics|etcd-k8s
- action: replace
source_labels: [__meta_kubernetes_service_name]
target_label: job
- action: replace
source_labels: [__meta_kubernetes_service_name]
regex: kubernetes
target_label: __scheme__
replacement: https
# Scrapes the endpoint lists for the kube-dns server. Which we consider
# part of a default setup.
- job_name: kube-components
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: replace
source_labels: [__meta_kubernetes_service_name]
target_label: job
regex: "kube-(.*)-prometheus-discovery"
replacement: "kube-${1}"
- action: keep
source_labels: [__meta_kubernetes_service_name]
regex: "kube-(.*)-prometheus-discovery"
- action: keep
source_labels: [__meta_kubernetes_endpoint_port_name]
regex: "prometheus"

View File

@@ -1,3 +1,5 @@
# NOTE: These rules were kindly contributed by the SoundCloud engineering team.
### Container resources ###
cluster_namespace_controller_pod_container:spec_memory_limit_bytes =
@@ -249,7 +251,7 @@ ALERT K8SApiserverDown
# Disable for non HA kubernetes setups.
ALERT K8SApiserverDown
IF absent({job="kubernetes"}) or count by(cluster) (up{job="kubernetes"} == 1) < 2
IF absent({job="kubernetes"}) or (count by(cluster) (up{job="kubernetes"} == 1) < count by(cluster) (up{job="kubernetes"}))
FOR 5m
LABELS {
service = "k8s",
@@ -361,7 +363,7 @@ ALERT K8STooManyOpenFiles
ALERT K8SApiServerLatency
IF histogram_quantile(
0.99,
sum without (instance,node,resource) (apiserver_request_latencies_bucket{verb!~"CONNECT|WATCHLIST"})
sum without (instance,node,resource) (apiserver_request_latencies_bucket{verb!~"CONNECT|WATCHLIST|WATCH"})
) / 1e6 > 1.0
FOR 10m
LABELS {

View File

@@ -1,7 +1,10 @@
#!/bin/bash
# Generate Prometheus configuration ConfigMap
kubectl create configmap --dry-run=true prometheus-k8s --from-file=assets/prometheus/prometheus.yaml -oyaml > manifests/prometheus/prometheus-k8s-cm.yaml
# Generate Alert Rules ConfigMap
kubectl create configmap --dry-run=true prometheus-k8s-rules --from-file=assets/alerts/ -oyaml > manifests/prometheus/prometheus-k8s-rules.yaml
kubectl create configmap --dry-run=true prometheus-k8s-rules --from-file=assets/prometheus/rules/ -oyaml > manifests/prometheus/prometheus-k8s-rules.yaml
# Generate Dashboard ConfigMap
kubectl create configmap --dry-run=true grafana-dashboards --from-file=assets/grafana/ -oyaml > manifests/grafana/grafana-cm.yaml

View File

@@ -1,18 +1,15 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-k8s
data:
prometheus.yaml: |
global:
evaluation_interval: 30s
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- /etc/prometheus/rules/*.rules
- /etc/prometheus/rules/*.rules
scrape_configs:
- job_name: kubelets
scrape_interval: 20s
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
@@ -27,7 +24,6 @@ data:
# Scrapes the endpoint lists for the Kubernetes API server, kube-state-metrics,
# and node-exporter, which we all consider part of a default setup.
- job_name: standard-endpoints
scrape_interval: 20s
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# As for kubelets, certificate validation fails for the API server (node)
@@ -41,7 +37,7 @@ data:
relabel_configs:
- action: keep
source_labels: [__meta_kubernetes_service_name]
regex: kubernetes|node-exporter|kube-state-metrics|etcd-k8s
regex: prometheus|kubernetes|node-exporter|kube-state-metrics|etcd-k8s
- action: replace
source_labels: [__meta_kubernetes_service_name]
target_label: job
@@ -54,7 +50,6 @@ data:
# Scrapes the endpoint lists for the kube-dns server. Which we consider
# part of a default setup.
- job_name: kube-components
scrape_interval: 20s
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
@@ -74,3 +69,7 @@ data:
- action: keep
source_labels: [__meta_kubernetes_endpoint_port_name]
regex: "prometheus"
kind: ConfigMap
metadata:
creationTimestamp: null
name: prometheus-k8s

View File

@@ -53,6 +53,8 @@ data:
\ summary = \"high fsync durations\",\n description = \"ectd instance {{
$labels.instance }} fync durations are high\",\n }\n"
kubernetes.rules: |+
# NOTE: These rules were kindly contributed by the SoundCloud engineering team.
### Container resources ###
cluster_namespace_controller_pod_container:spec_memory_limit_bytes =
@@ -304,7 +306,7 @@ data:
# Disable for non HA kubernetes setups.
ALERT K8SApiserverDown
IF absent({job="kubernetes"}) or count by(cluster) (up{job="kubernetes"} == 1) < 2
IF absent({job="kubernetes"}) or (count by(cluster) (up{job="kubernetes"} == 1) < count by(cluster) (up{job="kubernetes"}))
FOR 5m
LABELS {
service = "k8s",
@@ -416,7 +418,7 @@ data:
ALERT K8SApiServerLatency
IF histogram_quantile(
0.99,
sum without (instance,node,resource) (apiserver_request_latencies_bucket{verb!~"CONNECT|WATCHLIST"})
sum without (instance,node,resource) (apiserver_request_latencies_bucket{verb!~"CONNECT|WATCHLIST|WATCH"})
) / 1e6 > 1.0
FOR 10m
LABELS {