This commit is contained in:
Eduardo Gonzalez
2017-09-09 12:03:36 +02:00
40 changed files with 11968 additions and 6707 deletions

View File

@@ -1,9 +1,9 @@
apiVersion: "monitoring.coreos.com/v1alpha1"
kind: "Alertmanager"
apiVersion: monitoring.coreos.com/v1
kind: Alertmanager
metadata:
name: "main"
name: main
labels:
alertmanager: "main"
alertmanager: main
spec:
replicas: 3
version: v0.7.1

View File

@@ -1,4 +1,4 @@
apiVersion: monitoring.coreos.com/v1alpha1
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:

View File

@@ -1,4 +1,4 @@
apiVersion: monitoring.coreos.com/v1alpha1
apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
name: frontend

View File

@@ -1,4 +1,4 @@
apiVersion: monitoring.coreos.com/v1alpha1
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: frontend

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,4 @@
apiVersion: extensions/v1beta1
apiVersion: apps/v1beta1
kind: Deployment
metadata:
name: grafana
@@ -41,7 +41,7 @@ spec:
memory: 200Mi
cpu: 200m
- name: grafana-watcher
image: quay.io/coreos/grafana-watcher:v0.0.7
image: quay.io/coreos/grafana-watcher:v0.0.8
args:
- '--watch-dir=/var/grafana-dashboards'
- '--grafana-url=http://localhost:3000'

View File

@@ -7,9 +7,9 @@ metadata:
spec:
type: NodePort
ports:
- name: web
port: 3000
- port: 3000
protocol: TCP
nodePort: 30902
targetPort: web
selector:
app: grafana

View File

@@ -11,6 +11,7 @@ rules:
- resourcequotas
- replicationcontrollers
- limitranges
- persistentvolumeclaims
verbs: ["list", "watch"]
- apiGroups: ["extensions"]
resources:
@@ -18,3 +19,12 @@ rules:
- deployments
- replicasets
verbs: ["list", "watch"]
- apiGroups: ["apps"]
resources:
- statefulsets
verbs: ["list", "watch"]
- apiGroups: ["batch"]
resources:
- cronjobs
- jobs
verbs: ["list", "watch"]

View File

@@ -12,10 +12,16 @@ spec:
serviceAccountName: kube-state-metrics
containers:
- name: kube-state-metrics
image: quay.io/coreos/kube-state-metrics:v0.5.0
image: quay.io/coreos/kube-state-metrics:v1.0.1
ports:
- name: metrics
containerPort: 8080
readinessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 5
timeoutSeconds: 5
resources:
requests:
memory: 100Mi
@@ -23,4 +29,30 @@ spec:
limits:
memory: 200Mi
cpu: 200m
- name: addon-resizer
image: gcr.io/google_containers/addon-resizer:1.0
resources:
limits:
cpu: 100m
memory: 30Mi
requests:
cpu: 100m
memory: 30Mi
env:
- name: MY_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: MY_POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
command:
- /pod_nanny
- --container=kube-state-metrics
- --cpu=100m
- --extra-cpu=1m
- --memory=100Mi
- --extra-memory=2Mi
- --threshold=5
- --deployment=kube-state-metrics

View File

@@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: RoleBinding
metadata:
name: kube-state-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: kube-state-metrics-resizer
subjects:
- kind: ServiceAccount
name: kube-state-metrics

View File

@@ -0,0 +1,15 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: Role
metadata:
name: kube-state-metrics-resizer
rules:
- apiGroups: [""]
resources:
- pods
verbs: ["get"]
- apiGroups: ["extensions"]
resources:
- deployments
resourceNames: ["kube-state-metrics"]
verbs: ["get", "update"]

View File

@@ -35,6 +35,9 @@ spec:
- name: sys
readOnly: true
mountPath: /host/sys
tolerations:
- effect: NoSchedule
operator: Exists
volumes:
- name: proc
hostPath:

View File

@@ -8,7 +8,13 @@ rules:
resources:
- thirdpartyresources
verbs:
- create
- "*"
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- "*"
- apiGroups:
- monitoring.coreos.com
resources:
@@ -40,3 +46,7 @@ rules:
resources:
- nodes
verbs: ["list", "watch"]
- apiGroups: [""]
resources:
- namespaces
verbs: ["list"]

View File

@@ -15,7 +15,7 @@ spec:
- args:
- --kubelet-service=kube-system/kubelet
- --config-reloader-image=quay.io/coreos/configmap-reload:v0.0.1
image: quay.io/coreos/prometheus-operator:v0.11.1
image: quay.io/coreos/prometheus-operator:v0.12.0
name: prometheus-operator
ports:
- containerPort: 8080

View File

@@ -328,7 +328,7 @@ data:
}
kubelet.rules: |+
ALERT K8SNodeNotReady
IF kube_node_status_ready{condition="true"} == 0
IF kube_node_status_condition{condition="Ready", status="true"} == 0
FOR 1h
LABELS {
severity = "warning",
@@ -340,12 +340,12 @@ data:
ALERT K8SManyNodesNotReady
IF
count(kube_node_status_ready{condition="true"} == 0) > 1
count(kube_node_status_condition{condition="Ready", status="true"} == 0) > 1
AND
(
count(kube_node_status_ready{condition="true"} == 0)
count(kube_node_status_condition{condition="Ready", status="true"} == 0)
/
count(kube_node_status_ready{condition="true"})
count(kube_node_status_condition{condition="Ready", status="true"})
) > 0.2
FOR 1m
LABELS {
@@ -582,8 +582,9 @@ data:
summary = "node-exporter cannot be scraped",
description = "Prometheus could not scrape a node-exporter for more than 10m, or node-exporters have disappeared from discovery.",
}
ALERT K8SNodeOutOfDisk
IF kube_node_status_out_of_disk{condition="true"} == 1
IF kube_node_status_condition{condition="OutOfDisk", status="true"} == 1
LABELS {
service = "k8s",
severity = "critical"
@@ -592,9 +593,9 @@ data:
summary = "Node ran out of disk space.",
description = "{{ $labels.node }} has run out of disk space.",
}
ALERT K8SNodeMemoryPressure
IF kube_node_status_memory_pressure{condition="true"} == 1
IF kube_node_status_condition{condition="MemoryPressure", status="true"} == 1
LABELS {
service = "k8s",
severity = "warning"
@@ -603,9 +604,9 @@ data:
summary = "Node is under memory pressure.",
description = "{{ $labels.node }} is under memory pressure.",
}
ALERT K8SNodeDiskPressure
IF kube_node_status_disk_pressure{condition="true"} == 1
IF kube_node_status_condition{condition="DiskPressure", status="true"} == 1
LABELS {
service = "k8s",
severity = "warning"

View File

@@ -1,4 +1,4 @@
apiVersion: monitoring.coreos.com/v1alpha1
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: alertmanager

View File

@@ -1,4 +1,4 @@
apiVersion: monitoring.coreos.com/v1alpha1
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: kube-apiserver

View File

@@ -1,4 +1,4 @@
apiVersion: monitoring.coreos.com/v1alpha1
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: kube-controller-manager

View File

@@ -1,4 +1,4 @@
apiVersion: monitoring.coreos.com/v1alpha1
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: kube-scheduler

View File

@@ -1,4 +1,4 @@
apiVersion: monitoring.coreos.com/v1alpha1
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: kube-state-metrics

View File

@@ -1,4 +1,4 @@
apiVersion: monitoring.coreos.com/v1alpha1
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: kubelet

View File

@@ -1,4 +1,4 @@
apiVersion: monitoring.coreos.com/v1alpha1
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: node-exporter

View File

@@ -1,4 +1,4 @@
apiVersion: monitoring.coreos.com/v1alpha1
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: prometheus-operator

View File

@@ -1,4 +1,4 @@
apiVersion: monitoring.coreos.com/v1alpha1
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: prometheus

View File

@@ -1,4 +1,4 @@
apiVersion: monitoring.coreos.com/v1alpha1
apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
name: k8s