Merge pull request #33 from brancz/generate-k8s-config

Generate k8s config & operator v0.6.0
This commit is contained in:
Frederic Branczyk
2017-02-28 15:37:35 +01:00
committed by GitHub
26 changed files with 146 additions and 249 deletions

View File

@@ -1,85 +0,0 @@
alerting:
alertmanagers:
- kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: keep
regex: alertmanager-main
source_labels:
- __meta_kubernetes_service_name
- action: keep
regex: monitoring
source_labels:
- __meta_kubernetes_namespace
- action: keep
regex: web
source_labels:
- __meta_kubernetes_endpoint_port_name
scheme: http
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- /etc/prometheus/rules/*.rules
scrape_configs:
- job_name: kubelets
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# Skip verification until we have resolved why the certificate validation
# for the kubelet on API server nodes fail.
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
# Scrapes the endpoint lists for the Kubernetes API server, kube-state-metrics,
# and node-exporter, which we all consider part of a default setup.
- job_name: standard-endpoints
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# As for kubelets, certificate validation fails for the API server (node)
# and we circumvent it for now.
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: keep
source_labels: [__meta_kubernetes_service_name]
regex: prometheus|node-exporter|kube-state-metrics
- action: replace
source_labels: [__meta_kubernetes_service_name]
target_label: job
# Scrapes the endpoint lists for the kube-dns server. Which we consider
# part of a default setup.
- job_name: kube-components
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: replace
source_labels: [__meta_kubernetes_service_label_k8s_app]
target_label: job
- action: keep
source_labels: [__meta_kubernetes_service_name]
regex: ".*-prometheus-discovery"
- action: keep
source_labels: [__meta_kubernetes_endpoint_port_name]
regex: "http-metrics.*|https-metrics.*"
- action: replace
source_labels: [__meta_kubernetes_endpoint_port_name]
regex: "https-metrics.*"
target_label: __scheme__
replacement: https

View File

@@ -171,7 +171,7 @@ cluster:scheduler_binding_latency:quantile_seconds{quantile="0.5"} =
histogram_quantile(0.5,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
ALERT K8SNodeDown
IF up{job="kubelets"} == 0
IF up{job="kubelet"} == 0
FOR 1h
LABELS {
service = "k8s",
@@ -226,7 +226,7 @@ ALERT K8SKubeletNodeExporterDown
}
ALERT K8SKubeletDown
IF absent(up{job="kubelets"}) or count by (cluster) (up{job="kubelets"} == 0) / count by (cluster) (up{job="kubelets"}) > 0.1
IF absent(up{job="kubelet"}) or count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.1
FOR 1h
LABELS {
service = "k8s",
@@ -323,7 +323,7 @@ ALERT K8SConntrackTuningMissing
}
ALERT K8STooManyOpenFiles
IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 50
IF 100*process_open_fds{job=~"kubelet|kubernetes"} / process_max_fds > 50
FOR 10m
LABELS {
service = "k8s",
@@ -335,7 +335,7 @@ ALERT K8STooManyOpenFiles
}
ALERT K8STooManyOpenFiles
IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 80
IF 100*process_open_fds{job=~"kubelet|kubernetes"} / process_max_fds > 80
FOR 10m
LABELS {
service = "k8s",

View File

@@ -26,7 +26,6 @@ echo "done!"
kctl apply -f manifests/exporters
kctl apply -f manifests/grafana
kctl apply -f manifests/prometheus/prometheus-k8s-cm.yaml
kctl apply -f manifests/prometheus/prometheus-k8s-rules.yaml
kctl apply -f manifests/prometheus/prometheus-k8s-svc.yaml
@@ -36,6 +35,7 @@ kctl apply -f manifests/alertmanager/alertmanager-service.yaml
# `kubectl apply` is currently not working for third party resources so we are
# using `kubectl create` here for the time being.
# (https://github.com/kubernetes/kubernetes/issues/29542)
kctl create -f manifests/prometheus/prometheus-k8s-servicemonitor.yaml
kctl create -f manifests/prometheus/prometheus-k8s.yaml
kctl create -f manifests/alertmanager/alertmanager.yaml

View File

@@ -0,0 +1,6 @@
#!/usr/bin/env bash
hack/cluster-monitoring/deploy
awk 'FNR==1{print "---"}1' manifests/k8s/minikube/*.yaml | sed s/MINIKUBE_IP/`minikube ip`/g | kubectl --namespace=kube-system apply -f -

View File

@@ -0,0 +1,6 @@
#!/usr/bin/env bash
hack/cluster-monitoring/teardown
kubectl --namespace=kube-system delete -f manifests/k8s/minikube

View File

@@ -0,0 +1,6 @@
#!/usr/bin/env bash
hack/cluster-monitoring/deploy
kubectl --namespace=kube-system apply -f manifests/k8s/self-hosted

View File

@@ -0,0 +1,6 @@
#!/usr/bin/env bash
hack/cluster-monitoring/teardown
kubectl --namespace=kube-system delete -f manifests/k8s/self-hosted

View File

@@ -4,8 +4,16 @@ if [ -z "${KUBECONFIG}" ]; then
KUBECONFIG=~/.kube/config
fi
kubectl --kubeconfig="$KUBECONFIG" apply -f manifests/examples/example-app/prometheus-frontend-svc.yaml
kubectl --kubeconfig="$KUBECONFIG" apply -f manifests/examples/example-app/example-app.yaml
kubectl --kubeconfig="$KUBECONFIG" create -f manifests/examples/example-app/prometheus-frontend.yaml
kubectl --kubeconfig="$KUBECONFIG" create -f manifests/examples/example-app/servicemonitor-frontend.yaml
if [ -z "${NAMESPACE}" ]; then
NAMESPACE=default
fi
kubectl --namespace "$NAMESPACE" --kubeconfig="$KUBECONFIG" apply -f manifests/examples/example-app/prometheus-frontend-svc.yaml
kubectl --namespace "$NAMESPACE" --kubeconfig="$KUBECONFIG" apply -f manifests/examples/example-app/example-app.yaml
# `kubectl apply` is currently not working for third party resources so we are
# using `kubectl create` here for the time being.
# (https://github.com/kubernetes/kubernetes/issues/29542)
kubectl --namespace "$NAMESPACE" --kubeconfig="$KUBECONFIG" create -f manifests/examples/example-app/prometheus-frontend.yaml
kubectl --namespace "$NAMESPACE" --kubeconfig="$KUBECONFIG" create -f manifests/examples/example-app/servicemonitor-frontend.yaml

View File

@@ -4,5 +4,9 @@ if [ -z "${KUBECONFIG}" ]; then
KUBECONFIG=~/.kube/config
fi
kubectl --kubeconfig="$KUBECONFIG" delete -f manifests/examples/example-app
if [ -z "${NAMESPACE}" ]; then
NAMESPACE=default
fi
kubectl --namespace "$NAMESPACE" --kubeconfig="$KUBECONFIG" delete -f manifests/examples/example-app

View File

@@ -1,8 +1,5 @@
#!/bin/bash
# Generate Prometheus configuration ConfigMap
kubectl create configmap --dry-run=true prometheus-k8s --from-file=assets/prometheus/prometheus.yaml -oyaml > manifests/prometheus/prometheus-k8s-cm.yaml
# Generate Alert Rules ConfigMap
kubectl create configmap --dry-run=true prometheus-k8s-rules --from-file=assets/prometheus/rules/ -oyaml > manifests/prometheus/prometheus-k8s-rules.yaml

View File

@@ -11,4 +11,4 @@ spec:
protocol: TCP
targetPort: web
selector:
alertmanager: alertmanager-main
alertmanager: main

View File

@@ -1,7 +1,7 @@
apiVersion: "monitoring.coreos.com/v1alpha1"
kind: "Alertmanager"
metadata:
name: "alertmanager-main"
name: "main"
labels:
alertmanager: "main"
spec:

View File

@@ -6,7 +6,7 @@ metadata:
labels:
prometheus: frontend
spec:
version: v1.4.1
version: v1.5.2
serviceMonitorSelector:
matchLabels:
tier: frontend

View File

@@ -11,7 +11,7 @@ spec:
spec:
containers:
- name: kube-state-metrics
image: gcr.io/google_containers/kube-state-metrics:v0.3.0
image: gcr.io/google_containers/kube-state-metrics:v0.4.1
ports:
- name: metrics
containerPort: 8080

View File

@@ -3,10 +3,13 @@ kind: Service
metadata:
labels:
app: kube-state-metrics
k8s-app: kube-state-metrics
annotations:
alpha.monitoring.coreos.com/non-namespaced: "true"
name: kube-state-metrics
spec:
ports:
- name: metrics
- name: http-metrics
port: 8080
targetPort: metrics
protocol: TCP

View File

@@ -3,12 +3,13 @@ kind: Service
metadata:
labels:
app: node-exporter
k8s-app: node-exporter
name: node-exporter
spec:
type: ClusterIP
clusterIP: None
ports:
- name: metrics
- name: http-metrics
port: 9100
protocol: TCP
selector:

View File

@@ -1,27 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: kube-apiserver-prometheus-discovery
labels:
k8s-app: kubernetes
spec:
type: ClusterIP
clusterIP: None
ports:
- name: https-metrics
port: 8443
protocol: TCP
---
apiVersion: v1
kind: Endpoints
metadata:
name: kube-apiserver-prometheus-discovery
labels:
k8s-app: kubernetes
subsets:
- addresses:
- ip: 192.168.99.100
ports:
- name: https-metrics
port: 8443
protocol: TCP

View File

@@ -21,7 +21,7 @@ metadata:
k8s-app: kube-controller-manager
subsets:
- addresses:
- ip: 192.168.99.100
- ip: MINIKUBE_IP
ports:
- name: http-metrics
port: 10252

View File

@@ -21,7 +21,7 @@ metadata:
k8s-app: kube-scheduler
subsets:
- addresses:
- ip: 192.168.99.100
- ip: MINIKUBE_IP
ports:
- name: http-metrics
port: 10251

View File

@@ -1,16 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: kube-apiserver-prometheus-discovery
labels:
k8s-app: kubernetes
spec:
selector:
k8s-app: kube-apiserver
type: ClusterIP
clusterIP: None
ports:
- name: https-metrics
port: 443
targetPort: 443
protocol: TCP

View File

@@ -13,7 +13,10 @@ spec:
spec:
containers:
- name: prometheus-operator
image: quay.io/coreos/prometheus-operator:v0.2.1
image: quay.io/coreos/prometheus-operator:v0.6.0
args:
- "--kubelet-object=kube-system/kubelet"
- "--config-reloader-image=quay.io/coreos/configmap-reload:latest"
resources:
requests:
cpu: 100m

View File

@@ -1,92 +0,0 @@
apiVersion: v1
data:
prometheus.yaml: |
alerting:
alertmanagers:
- kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: keep
regex: alertmanager-main
source_labels:
- __meta_kubernetes_service_name
- action: keep
regex: monitoring
source_labels:
- __meta_kubernetes_namespace
- action: keep
regex: web
source_labels:
- __meta_kubernetes_endpoint_port_name
scheme: http
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- /etc/prometheus/rules/*.rules
scrape_configs:
- job_name: kubelets
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# Skip verification until we have resolved why the certificate validation
# for the kubelet on API server nodes fail.
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
# Scrapes the endpoint lists for the Kubernetes API server, kube-state-metrics,
# and node-exporter, which we all consider part of a default setup.
- job_name: standard-endpoints
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# As for kubelets, certificate validation fails for the API server (node)
# and we circumvent it for now.
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: keep
source_labels: [__meta_kubernetes_service_name]
regex: prometheus|node-exporter|kube-state-metrics
- action: replace
source_labels: [__meta_kubernetes_service_name]
target_label: job
# Scrapes the endpoint lists for the kube-dns server. Which we consider
# part of a default setup.
- job_name: kube-components
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: replace
source_labels: [__meta_kubernetes_service_label_k8s_app]
target_label: job
- action: keep
source_labels: [__meta_kubernetes_service_name]
regex: ".*-prometheus-discovery"
- action: keep
source_labels: [__meta_kubernetes_endpoint_port_name]
regex: "http-metrics.*|https-metrics.*"
- action: replace
source_labels: [__meta_kubernetes_endpoint_port_name]
regex: "https-metrics.*"
target_label: __scheme__
replacement: https
kind: ConfigMap
metadata:
creationTimestamp: null
name: prometheus-k8s

View File

@@ -226,7 +226,7 @@ data:
histogram_quantile(0.5,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
ALERT K8SNodeDown
IF up{job="kubelets"} == 0
IF up{job="kubelet"} == 0
FOR 1h
LABELS {
service = "k8s",
@@ -281,7 +281,7 @@ data:
}
ALERT K8SKubeletDown
IF absent(up{job="kubelets"}) or count by (cluster) (up{job="kubelets"} == 0) / count by (cluster) (up{job="kubelets"}) > 0.1
IF absent(up{job="kubelet"}) or count by (cluster) (up{job="kubelet"} == 0) / count by (cluster) (up{job="kubelet"}) > 0.1
FOR 1h
LABELS {
service = "k8s",
@@ -378,7 +378,7 @@ data:
}
ALERT K8STooManyOpenFiles
IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 50
IF 100*process_open_fds{job=~"kubelet|kubernetes"} / process_max_fds > 50
FOR 10m
LABELS {
service = "k8s",
@@ -390,7 +390,7 @@ data:
}
ALERT K8STooManyOpenFiles
IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 80
IF 100*process_open_fds{job=~"kubelet|kubernetes"} / process_max_fds > 80
FOR 10m
LABELS {
service = "k8s",

View File

@@ -0,0 +1,69 @@
apiVersion: monitoring.coreos.com/v1alpha1
kind: ServiceMonitor
metadata:
name: kube-apiserver
labels:
k8s-apps: https
spec:
jobLabel: provider
selector:
matchLabels:
component: apiserver
provider: kubernetes
namespaceSelector:
matchNames:
- default
endpoints:
- port: https
interval: 15s
scheme: https
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecureSkipVerify: true
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
---
apiVersion: monitoring.coreos.com/v1alpha1
kind: ServiceMonitor
metadata:
name: k8s-apps-https
labels:
k8s-apps: https
spec:
jobLabel: k8s-app
selector:
matchExpressions:
- {key: k8s-app, operator: Exists}
namespaceSelector:
matchNames:
- kube-system
endpoints:
- port: https-metrics
interval: 15s
scheme: https
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecureSkipVerify: true
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
---
apiVersion: monitoring.coreos.com/v1alpha1
kind: ServiceMonitor
metadata:
name: k8s-apps-http
labels:
k8s-apps: http
spec:
jobLabel: k8s-app
selector:
matchExpressions:
- {key: k8s-app, operator: Exists}
namespaceSelector:
matchNames:
- kube-system
- monitoring
endpoints:
- port: http-metrics
interval: 15s
- port: http-metrics-dnsmasq
interval: 15s
- port: http-metrics-skydns
interval: 15s

View File

@@ -11,4 +11,4 @@ spec:
protocol: TCP
targetPort: web
selector:
prometheus: prometheus-k8s
prometheus: k8s

View File

@@ -1,11 +1,14 @@
apiVersion: monitoring.coreos.com/v1alpha1
kind: Prometheus
metadata:
name: prometheus-k8s
name: k8s
labels:
prometheus: k8s
spec:
version: v1.4.1
version: v1.5.2
serviceMonitorSelector:
matchExpression:
- {key: k8s-apps, operator: Exists}
resources:
requests:
# 2Gi is default, but won't schedule if you don't have a node with >2Gi
@@ -13,3 +16,8 @@ spec:
# production use. This value is mainly meant for demonstration/testing
# purposes.
memory: 400Mi
alerting:
alertmanagers:
- namespace: monitoring
name: alertmanager-main
port: web