Remove old manifests and replace with jsonnet build

This commit is contained in:
Frederic Branczyk
2018-04-10 10:51:00 +02:00
parent d8692794a9
commit 507617e150
127 changed files with 6332 additions and 8494 deletions

View File

@@ -1,6 +1,8 @@
apiVersion: v1
data:
alertmanager.yaml: Z2xvYmFsOgogIHJlc29sdmVfdGltZW91dDogNW0Kcm91dGU6CiAgZ3JvdXBfYnk6IFsnam9iJ10KICBncm91cF93YWl0OiAzMHMKICBncm91cF9pbnRlcnZhbDogNW0KICByZXBlYXRfaW50ZXJ2YWw6IDEyaAogIHJlY2VpdmVyOiAnbnVsbCcKICByb3V0ZXM6CiAgLSBtYXRjaDoKICAgICAgYWxlcnRuYW1lOiBEZWFkTWFuc1N3aXRjaAogICAgcmVjZWl2ZXI6ICdudWxsJwpyZWNlaXZlcnM6Ci0gbmFtZTogJ251bGwnCg==
kind: Secret
metadata:
name: alertmanager-main
data:
alertmanager.yaml: Z2xvYmFsOgogIHJlc29sdmVfdGltZW91dDogNW0Kcm91dGU6CiAgZ3JvdXBfYnk6IFsnam9iJ10KICBncm91cF93YWl0OiAzMHMKICBncm91cF9pbnRlcnZhbDogNW0KICByZXBlYXRfaW50ZXJ2YWw6IDEyaAogIHJlY2VpdmVyOiAnbnVsbCcKICByb3V0ZXM6CiAgLSBtYXRjaDoKICAgICAgYWxlcnRuYW1lOiBEZWFkTWFuc1N3aXRjaAogICAgcmVjZWl2ZXI6ICdudWxsJwpyZWNlaXZlcnM6Ci0gbmFtZTogJ251bGwnCg==
namespace: monitoring
type: Opaque

View File

@@ -0,0 +1,5 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: alertmanager-main
namespace: monitoring

View File

@@ -4,13 +4,12 @@ metadata:
labels:
alertmanager: main
name: alertmanager-main
namespace: monitoring
spec:
type: NodePort
ports:
- name: web
nodePort: 30903
port: 9093
protocol: TCP
targetPort: web
selector:
alertmanager: main
app: alertmanager

View File

@@ -1,9 +1,11 @@
apiVersion: monitoring.coreos.com/v1
kind: Alertmanager
metadata:
name: main
labels:
alertmanager: main
name: main
namespace: monitoring
spec:
replicas: 3
serviceAccountName: alertmanager-main
version: v0.14.0

View File

@@ -1,7 +0,0 @@
apiserver-key.pem
apiserver.csr
apiserver.pem
metrics-ca-config.json
metrics-ca.crt
metrics-ca.key
cm-adapter-serving-certs.yaml

View File

@@ -1,11 +0,0 @@
# Custom Metrics API
The custom metrics API allows the HPA v2 to scale on arbirary metrics.
This directory contains an example deployment of the custom metrics API adapter using Prometheus as the backing monitoring system.
In order to deploy the custom metrics adapter for Prometheus you need to generate TLS certficates used to serve the API. An example of how these could be generated can be found in `./gencerts.sh`, note that this is _not_ recommended to be used in production. You need to employ a secure PKI strategy, this is merely an example to get started and try it out quickly.
Once the generated `Secret` with the certificates is in place, you can deploy everything in the `monitoring` namespace using `./deploy.sh`.
When you're done, you can teardown using the `./teardown.sh` script.

View File

@@ -1,12 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: custom-metrics:system:auth-delegator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:auth-delegator
subjects:
- kind: ServiceAccount
name: custom-metrics-apiserver
namespace: monitoring

View File

@@ -1,41 +0,0 @@
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
labels:
app: custom-metrics-apiserver
name: custom-metrics-apiserver
spec:
replicas: 1
selector:
matchLabels:
app: custom-metrics-apiserver
template:
metadata:
labels:
app: custom-metrics-apiserver
name: custom-metrics-apiserver
spec:
serviceAccountName: custom-metrics-apiserver
containers:
- name: custom-metrics-apiserver
image: quay.io/coreos/k8s-prometheus-adapter-amd64:v0.2.0
args:
- /adapter
- --secure-port=6443
- --tls-cert-file=/var/run/serving-cert/serving.crt
- --tls-private-key-file=/var/run/serving-cert/serving.key
- --logtostderr=true
- --prometheus-url=http://prometheus-k8s.monitoring.svc:9090/
- --metrics-relist-interval=30s
- --rate-interval=5m
- --v=10
ports:
- containerPort: 6443
volumeMounts:
- mountPath: /var/run/serving-cert
name: volume-serving-cert
readOnly: true
volumes:
- name: volume-serving-cert
secret:
secretName: cm-adapter-serving-certs

View File

@@ -1,12 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: custom-metrics-resource-reader
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: custom-metrics-resource-reader
subjects:
- kind: ServiceAccount
name: custom-metrics-apiserver
namespace: monitoring

View File

@@ -1,10 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: custom-metrics-apiserver
spec:
ports:
- port: 443
targetPort: 6443
selector:
app: custom-metrics-apiserver

View File

@@ -1,13 +0,0 @@
apiVersion: apiregistration.k8s.io/v1beta1
kind: APIService
metadata:
name: v1beta1.custom.metrics.k8s.io
spec:
service:
name: custom-metrics-apiserver
namespace: monitoring
group: custom.metrics.k8s.io
version: v1beta1
insecureSkipTLSVerify: true
groupPriorityMinimum: 100
versionPriority: 100

View File

@@ -1,9 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: custom-metrics-server-resources
rules:
- apiGroups:
- custom.metrics.k8s.io
resources: ["*"]
verbs: ["*"]

View File

@@ -1,14 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: custom-metrics-resource-reader
rules:
- apiGroups:
- ""
resources:
- namespaces
- pods
- services
verbs:
- get
- list

View File

@@ -1,13 +0,0 @@
#!/usr/bin/env bash
kubectl create -f custom-metrics-apiserver-auth-delegator-cluster-role-binding.yaml
kubectl create -f custom-metrics-apiserver-auth-reader-role-binding.yaml
kubectl -n monitoring create -f cm-adapter-serving-certs.yaml
kubectl -n monitoring create -f custom-metrics-apiserver-deployment.yaml
kubectl create -f custom-metrics-apiserver-resource-reader-cluster-role-binding.yaml
kubectl -n monitoring create -f custom-metrics-apiserver-service-account.yaml
kubectl -n monitoring create -f custom-metrics-apiserver-service.yaml
kubectl create -f custom-metrics-apiservice.yaml
kubectl create -f custom-metrics-cluster-role.yaml
kubectl create -f custom-metrics-resource-reader-cluster-role.yaml
kubectl create -f hpa-custom-metrics-cluster-role-binding.yaml

View File

@@ -1,21 +0,0 @@
#!/usr/bin/env bash
go get -v -u github.com/cloudflare/cfssl/cmd/...
export PURPOSE=metrics
openssl req -x509 -sha256 -new -nodes -days 365 -newkey rsa:2048 -keyout ${PURPOSE}-ca.key -out ${PURPOSE}-ca.crt -subj "/CN=ca"
echo '{"signing":{"default":{"expiry":"43800h","usages":["signing","key encipherment","'${PURPOSE}'"]}}}' > "${PURPOSE}-ca-config.json"
export SERVICE_NAME=custom-metrics-apiserver
export ALT_NAMES='"custom-metrics-apiserver.monitoring","custom-metrics-apiserver.monitoring.svc"'
echo '{"CN":"'${SERVICE_NAME}'","hosts":['${ALT_NAMES}'],"key":{"algo":"rsa","size":2048}}' | cfssl gencert -ca=metrics-ca.crt -ca-key=metrics-ca.key -config=metrics-ca-config.json - | cfssljson -bare apiserver
cat <<-EOF > cm-adapter-serving-certs.yaml
apiVersion: v1
kind: Secret
metadata:
name: cm-adapter-serving-certs
data:
serving.crt: $(cat apiserver.pem | base64 --wrap=0)
serving.key: $(cat apiserver-key.pem | base64 --wrap=0)
EOF

View File

@@ -1,12 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: hpa-controller-custom-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: custom-metrics-server-resources
subjects:
- kind: ServiceAccount
name: horizontal-pod-autoscaler
namespace: kube-system

View File

@@ -1,13 +0,0 @@
#!/usr/bin/env bash
kubectl delete -f custom-metrics-apiserver-auth-delegator-cluster-role-binding.yaml
kubectl delete -f custom-metrics-apiserver-auth-reader-role-binding.yaml
kubectl -n monitoring delete -f cm-adapter-serving-certs.yaml
kubectl -n monitoring delete -f custom-metrics-apiserver-deployment.yaml
kubectl delete -f custom-metrics-apiserver-resource-reader-cluster-role-binding.yaml
kubectl -n monitoring delete -f custom-metrics-apiserver-service-account.yaml
kubectl -n monitoring delete -f custom-metrics-apiserver-service.yaml
kubectl delete -f custom-metrics-apiservice.yaml
kubectl delete -f custom-metrics-cluster-role.yaml
kubectl delete -f custom-metrics-resource-reader-cluster-role.yaml
kubectl delete -f hpa-custom-metrics-cluster-role-binding.yaml

View File

@@ -1,28 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: etcd-k8s
labels:
k8s-app: etcd
spec:
type: ClusterIP
clusterIP: None
ports:
- name: api
port: 2379
protocol: TCP
---
apiVersion: v1
kind: Endpoints
metadata:
name: etcd-k8s
labels:
k8s-app: etcd
subsets:
- addresses:
- ip: 10.142.0.2
nodeName: 10.142.0.2
ports:
- name: api
port: 2379
protocol: TCP

View File

@@ -1,28 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: etcd-k8s
labels:
k8s-app: etcd
spec:
type: ClusterIP
clusterIP: None
ports:
- name: api
port: 2379
protocol: TCP
---
apiVersion: v1
kind: Endpoints
metadata:
name: etcd-k8s
labels:
k8s-app: etcd
subsets:
- addresses:
- ip: 172.17.4.51
nodeName: 172.17.4.51
ports:
- name: api
port: 2379
protocol: TCP

View File

@@ -1,8 +0,0 @@
apiVersion: v1
kind: Secret
metadata:
name: basic-auth
data:
password: dG9vcg== # toor
user: YWRtaW4= # admin
type: Opaque

View File

@@ -1,22 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
k8s-apps: basic-auth-example
name: basic-auth-example
spec:
endpoints:
- basicAuth:
password:
name: basic-auth
key: password
username:
name: basic-auth
key: user
port: metrics
namespaceSelector:
matchNames:
- logging
selector:
matchLabels:
app: myapp

View File

@@ -1,36 +0,0 @@
kind: Service
apiVersion: v1
metadata:
name: example-app
labels:
tier: frontend
namespace: default
spec:
selector:
app: example-app
ports:
- name: web
protocol: TCP
port: 8080
targetPort: web
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: example-app
namespace: default
spec:
replicas: 4
template:
metadata:
labels:
app: example-app
version: 1.1.3
spec:
containers:
- name: example-app
image: quay.io/fabxc/prometheus_demo_service
ports:
- name: web
containerPort: 8080
protocol: TCP

View File

@@ -1,12 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: Role
metadata:
name: alertmanager-discovery
namespace: monitoring
rules:
- apiGroups: [""]
resources:
- services
- endpoints
- pods
verbs: ["list", "watch"]

View File

@@ -1,13 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: RoleBinding
metadata:
name: prometheus-frontend
namespace: default
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-frontend
subjects:
- kind: ServiceAccount
name: prometheus-frontend
namespace: default

View File

@@ -1,17 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: Role
metadata:
name: prometheus-frontend
namespace: default
rules:
- apiGroups: [""]
resources:
- nodes
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources:
- configmaps
verbs: ["get"]

View File

@@ -1,5 +0,0 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus-frontend
namespace: default

View File

@@ -1,15 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: prometheus-frontend
namespace: default
spec:
type: NodePort
ports:
- name: web
nodePort: 30100
port: 9090
protocol: TCP
targetPort: web
selector:
prometheus: frontend

View File

@@ -1,25 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
name: frontend
namespace: default
labels:
prometheus: frontend
spec:
serviceAccountName: prometheus-frontend
version: v1.7.1
serviceMonitorSelector:
matchLabels:
tier: frontend
resources:
requests:
# 2Gi is default, but won't schedule if you don't have a node with >2Gi
# memory. Modify based on your target and time-series count for
# production use. This value is mainly meant for demonstration/testing
# purposes.
memory: 400Mi
alerting:
alertmanagers:
- namespace: monitoring
name: alertmanager-main
port: web

View File

@@ -1,19 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: frontend
namespace: default
labels:
tier: frontend
spec:
selector:
matchLabels:
tier: frontend
targetLabels:
- tier
endpoints:
- port: web
interval: 10s
namespaceSelector:
matchNames:
- default

View File

@@ -1,7 +0,0 @@
apiVersion: v1
kind: Secret
metadata:
name: grafana-credentials
data:
user: YWRtaW4=
password: YWRtaW4=

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,18 @@
apiVersion: v1
data:
dashboards.yaml: |-
[
{
"folder": "",
"name": "0",
"options": {
"path": "/grafana-dashboard-definitions/0"
},
"org_id": 1,
"type": "file"
}
]
kind: ConfigMap
metadata:
name: grafana-dashboards
namespace: monitoring

View File

@@ -1,12 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboards
data:
dashboards.yaml: |+
- name: '0'
org_id: 1
folder: ''
type: file
options:
folder: /grafana-dashboard-definitions/0

View File

@@ -1,15 +1,20 @@
apiVersion: v1
data:
prometheus.yaml: |-
{
"datasources": [
{
"access": "proxy",
"etitable": false,
"name": "prometheus",
"org_id": 1,
"type": "prometheus",
"url": "http://prometheus-k8s.monitoring.svc:9090",
"version": 1
}
]
}
kind: ConfigMap
metadata:
name: grafana-datasources
data:
prometheus.yaml: |+
datasources:
- name: prometheus
type: prometheus
access: proxy
org_id: 1
url: http://prometheus-k8s.monitoring.svc:9090
version: 1
editable: false
namespace: monitoring

View File

@@ -1,48 +1,59 @@
apiVersion: apps/v1beta1
apiVersion: apps/v1beta2
kind: Deployment
metadata:
labels:
app: grafana
name: grafana
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
app: grafana
template:
metadata:
labels:
app: grafana
spec:
containers:
- image: quay.io/coreos/monitoring-grafana:5.0.3
name: grafana
ports:
- containerPort: 3000
name: http
resources:
limits:
cpu: 200m
memory: 200Mi
requests:
cpu: 100m
memory: 100Mi
volumeMounts:
- mountPath: /data
name: grafana-storage
readOnly: false
- mountPath: /grafana/conf/provisioning/datasources
name: grafana-datasources
readOnly: false
- mountPath: /grafana/conf/provisioning/dashboards
name: grafana-dashboards
readOnly: false
- mountPath: /grafana-dashboard-definitions/0
name: grafana-dashboard-definitions
readOnly: false
securityContext:
runAsNonRoot: true
runAsUser: 65534
containers:
- name: grafana
image: quay.io/coreos/monitoring-grafana:5.0.3
volumeMounts:
- name: grafana-storage
mountPath: /data
- name: grafana-datasources
mountPath: /grafana/conf/provisioning/datasources
- name: grafana-dashboards
mountPath: /grafana/conf/provisioning/dashboards
- name: grafana-dashboard-definitions-0
mountPath: /grafana-dashboard-definitions/0
ports:
- name: web
containerPort: 3000
resources:
requests:
memory: 100Mi
cpu: 100m
limits:
memory: 200Mi
cpu: 200m
serviceAccountName: grafana
volumes:
- name: grafana-storage
emptyDir: {}
- name: grafana-datasources
configMap:
- emptyDir: {}
name: grafana-storage
- configMap:
name: grafana-datasources
- name: grafana-dashboards
configMap:
name: grafana-datasources
- configMap:
name: grafana-dashboards
- name: grafana-dashboard-definitions-0
configMap:
name: grafana-dashboard-definitions-0
name: grafana-dashboards
- configMap:
name: grafana-dashboard-definitions
name: grafana-dashboard-definitions

View File

@@ -1,4 +1,5 @@
kind: ServiceAccount
apiVersion: v1
kind: ServiceAccount
metadata:
name: custom-metrics-apiserver
name: grafana
namespace: monitoring

View File

@@ -2,14 +2,11 @@ apiVersion: v1
kind: Service
metadata:
name: grafana
labels:
app: grafana
namespace: monitoring
spec:
type: NodePort
ports:
- port: 3000
protocol: TCP
nodePort: 30902
targetPort: web
- name: http
port: 3000
targetPort: http
selector:
app: grafana

View File

@@ -1,17 +0,0 @@
apiVersion: v1
kind: Service
metadata:
namespace: kube-system
name: kube-controller-manager-prometheus-discovery
labels:
k8s-app: kube-controller-manager
spec:
selector:
component: kube-controller-manager
type: ClusterIP
clusterIP: None
ports:
- name: http-metrics
port: 10252
targetPort: 10252
protocol: TCP

View File

@@ -1,17 +0,0 @@
apiVersion: v1
kind: Service
metadata:
namespace: kube-system
name: kube-scheduler-prometheus-discovery
labels:
k8s-app: kube-scheduler
spec:
selector:
component: kube-scheduler
type: ClusterIP
clusterIP: None
ports:
- name: http-metrics
port: 10251
targetPort: 10251
protocol: TCP

View File

@@ -1,17 +0,0 @@
apiVersion: v1
kind: Service
metadata:
namespace: kube-system
name: kube-controller-manager-prometheus-discovery
labels:
k8s-app: kube-controller-manager
spec:
selector:
k8s-app: kube-controller-manager
type: ClusterIP
clusterIP: None
ports:
- name: http-metrics
port: 10252
targetPort: 10252
protocol: TCP

View File

@@ -1,21 +0,0 @@
apiVersion: v1
kind: Service
metadata:
namespace: kube-system
name: kube-dns-prometheus-discovery
labels:
k8s-app: kube-dns
spec:
selector:
k8s-app: kube-dns
type: ClusterIP
clusterIP: None
ports:
- name: http-metrics-skydns
port: 10055
targetPort: 10055
protocol: TCP
- name: http-metrics-dnsmasq
port: 10054
targetPort: 10054
protocol: TCP

View File

@@ -1,17 +0,0 @@
apiVersion: v1
kind: Service
metadata:
namespace: kube-system
name: kube-scheduler-prometheus-discovery
labels:
k8s-app: kube-scheduler
spec:
selector:
k8s-app: kube-scheduler
type: ClusterIP
clusterIP: None
ports:
- name: http-metrics
port: 10251
targetPort: 10251
protocol: TCP

View File

@@ -1,4 +1,4 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kube-state-metrics

View File

@@ -1,10 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kube-state-metrics
rules:
- apiGroups: [""]
- apiGroups:
- ""
resources:
- configmaps
- secrets
- nodes
- pods
- services
@@ -15,31 +18,49 @@ rules:
- persistentvolumes
- namespaces
- endpoints
verbs: ["list", "watch"]
- apiGroups: ["extensions"]
verbs:
- list
- watch
- apiGroups:
- extensions
resources:
- daemonsets
- deployments
- replicasets
verbs: ["list", "watch"]
- apiGroups: ["apps"]
verbs:
- list
- watch
- apiGroups:
- apps
resources:
- statefulsets
verbs: ["list", "watch"]
- apiGroups: ["batch"]
verbs:
- list
- watch
- apiGroups:
- batch
resources:
- cronjobs
- jobs
verbs: ["list", "watch"]
- apiGroups: ["autoscaling"]
verbs:
- list
- watch
- apiGroups:
- autoscaling
resources:
- horizontalpodautoscalers
verbs: ["list", "watch"]
- apiGroups: ["authentication.k8s.io"]
verbs:
- list
- watch
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs: ["create"]
- apiGroups: ["authorization.k8s.io"]
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs: ["create"]
verbs:
- create

View File

@@ -1,80 +1,95 @@
apiVersion: extensions/v1beta1
apiVersion: apps/v1beta2
kind: Deployment
metadata:
labels:
app: kube-state-metrics
name: kube-state-metrics
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
app: kube-state-metrics
template:
metadata:
labels:
app: kube-state-metrics
spec:
serviceAccountName: kube-state-metrics
containers:
- args:
- --secure-listen-address=:8443
- --upstream=http://127.0.0.1:8081/
image: quay.io/coreos/kube-rbac-proxy:v0.3.0
name: kube-rbac-proxy-main
ports:
- containerPort: 8443
name: https-main
resources:
limits:
cpu: 20m
memory: 40Mi
requests:
cpu: 10m
memory: 20Mi
- args:
- --secure-listen-address=:9443
- --upstream=http://127.0.0.1:8082/
image: quay.io/coreos/kube-rbac-proxy:v0.3.0
name: kube-rbac-proxy-self
ports:
- containerPort: 9443
name: https-self
resources:
limits:
cpu: 20m
memory: 40Mi
requests:
cpu: 10m
memory: 20Mi
- args:
- --host=127.0.0.1
- --port=8081
- --telemetry-host=127.0.0.1
- --telemetry-port=8082
image: quay.io/coreos/kube-state-metrics:v1.3.0
name: kube-state-metrics
resources:
limits:
cpu: 102m
memory: 180Mi
requests:
cpu: 102m
memory: 180Mi
- command:
- /pod_nanny
- --container=kube-state-metrics
- --cpu=100m
- --extra-cpu=2m
- --memory=150Mi
- --extra-memory=30Mi
- --threshold=5
- --deployment=kube-state-metrics
env:
- name: MY_POD_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
- name: MY_POD_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
image: quay.io/coreos/addon-resizer:1.0
name: addon-resizer
resources:
limits:
cpu: 10m
memory: 30Mi
requests:
cpu: 10m
memory: 30Mi
securityContext:
runAsNonRoot: true
runAsUser: 65534
containers:
- name: kube-rbac-proxy-main
image: quay.io/brancz/kube-rbac-proxy:v0.2.0
args:
- "--secure-listen-address=:8443"
- "--upstream=http://127.0.0.1:8081/"
ports:
- name: https-main
containerPort: 8443
resources:
requests:
memory: 20Mi
cpu: 10m
limits:
memory: 40Mi
cpu: 20m
- name: kube-rbac-proxy-self
image: quay.io/brancz/kube-rbac-proxy:v0.2.0
args:
- "--secure-listen-address=:9443"
- "--upstream=http://127.0.0.1:8082/"
ports:
- name: https-self
containerPort: 9443
resources:
requests:
memory: 20Mi
cpu: 10m
limits:
memory: 40Mi
cpu: 20m
- name: kube-state-metrics
image: quay.io/coreos/kube-state-metrics:v1.2.0
args:
- "--host=127.0.0.1"
- "--port=8081"
- "--telemetry-host=127.0.0.1"
- "--telemetry-port=8082"
- name: addon-resizer
image: gcr.io/google_containers/addon-resizer:1.0
resources:
limits:
cpu: 100m
memory: 30Mi
requests:
cpu: 100m
memory: 30Mi
env:
- name: MY_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: MY_POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
command:
- /pod_nanny
- --container=kube-state-metrics
- --cpu=100m
- --extra-cpu=2m
- --memory=150Mi
- --extra-memory=30Mi
- --threshold=5
- --deployment=kube-state-metrics
serviceAccountName: kube-state-metrics

View File

@@ -1,12 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: kube-state-metrics
namespace: monitoring
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: kube-state-metrics-resizer
name: kube-state-metrics-addon-resizer
subjects:
- kind: ServiceAccount
name: kube-state-metrics

View File

@@ -1,15 +1,21 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: kube-state-metrics-resizer
name: kube-state-metrics
namespace: monitoring
rules:
- apiGroups: [""]
- apiGroups:
- ""
resources:
- pods
verbs: ["get"]
- apiGroups: ["extensions"]
verbs:
- get
- apiGroups:
- extensions
resourceNames:
- kube-state-metrics
resources:
- deployments
resourceNames: ["kube-state-metrics"]
verbs: ["get", "update"]
verbs:
- get
- update

View File

@@ -2,3 +2,4 @@ apiVersion: v1
kind: ServiceAccount
metadata:
name: kube-state-metrics
namespace: monitoring

View File

@@ -2,20 +2,16 @@ apiVersion: v1
kind: Service
metadata:
labels:
app: kube-state-metrics
k8s-app: kube-state-metrics
name: kube-state-metrics
namespace: monitoring
spec:
clusterIP: None
ports:
- name: https-main
port: 8443
targetPort: https-main
protocol: TCP
- name: https-self
port: 9443
targetPort: https-self
protocol: TCP
selector:
app: kube-state-metrics

View File

@@ -1,12 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: metrics-server:system:auth-delegator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:auth-delegator
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system

View File

@@ -1,13 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: RoleBinding
metadata:
name: metrics-server-auth-reader
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system

View File

@@ -1,13 +0,0 @@
apiVersion: apiregistration.k8s.io/v1beta1
kind: APIService
metadata:
name: v1beta1.metrics.k8s.io
spec:
service:
name: metrics-server
namespace: kube-system
group: metrics.k8s.io
version: v1beta1
insecureSkipTLSVerify: true
groupPriorityMinimum: 100
versionPriority: 100

View File

@@ -1,23 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: system:metrics-server
rules:
- apiGroups:
- ""
resources:
- pods
- nodes
- namespaces
verbs:
- get
- list
- watch
- apiGroups:
- "extensions"
resources:
- deployments
verbs:
- get
- list
- watch

View File

@@ -1,25 +0,0 @@
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: metrics-server
namespace: kube-system
labels:
k8s-app: metrics-server
spec:
selector:
matchLabels:
k8s-app: metrics-server
template:
metadata:
name: metrics-server
labels:
k8s-app: metrics-server
spec:
serviceAccountName: metrics-server
containers:
- name: metrics-server
image: gcr.io/google_containers/metrics-server-amd64:v0.2.0
imagePullPolicy: Always
command:
- /metrics-server
- --source=kubernetes.summary_api:''

View File

@@ -1,5 +0,0 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: metrics-server
namespace: kube-system

View File

@@ -1,14 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: metrics-server
namespace: kube-system
labels:
kubernetes.io/name: "Metrics-server"
spec:
selector:
k8s-app: metrics-server
ports:
- port: 443
protocol: TCP
targetPort: 443

View File

@@ -3,11 +3,15 @@ kind: ClusterRole
metadata:
name: node-exporter
rules:
- apiGroups: ["authentication.k8s.io"]
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs: ["create"]
- apiGroups: ["authorization.k8s.io"]
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs: ["create"]
verbs:
- create

View File

@@ -1,69 +1,63 @@
apiVersion: extensions/v1beta1
apiVersion: apps/v1beta2
kind: DaemonSet
metadata:
labels:
app: node-exporter
name: node-exporter
namespace: monitoring
spec:
updateStrategy:
rollingUpdate:
maxUnavailable: 1
type: RollingUpdate
selector:
matchLabels:
app: node-exporter
template:
metadata:
labels:
app: node-exporter
name: node-exporter
spec:
serviceAccountName: node-exporter
containers:
- args:
- --web.listen-address=127.0.0.1:9101
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
image: quay.io/prometheus/node-exporter:v0.15.2
name: node-exporter
resources:
limits:
cpu: 102m
memory: 180Mi
requests:
cpu: 102m
memory: 180Mi
volumeMounts:
- mountPath: /host/proc
name: proc
readOnly: false
- mountPath: /host/sys
name: sys
readOnly: false
- args:
- --secure-listen-address=:9100
- --upstream=http://127.0.0.1:9101/
image: quay.io/coreos/kube-rbac-proxy:v0.3.0
name: kube-rbac-proxy
ports:
- containerPort: 9100
name: https
resources:
limits:
cpu: 20m
memory: 40Mi
requests:
cpu: 10m
memory: 20Mi
securityContext:
runAsNonRoot: true
runAsUser: 65534
hostNetwork: true
hostPID: true
containers:
- image: quay.io/prometheus/node-exporter:v0.15.2
args:
- "--web.listen-address=127.0.0.1:9101"
- "--path.procfs=/host/proc"
- "--path.sysfs=/host/sys"
name: node-exporter
resources:
requests:
memory: 30Mi
cpu: 100m
limits:
memory: 50Mi
cpu: 200m
volumeMounts:
- name: proc
readOnly: true
mountPath: /host/proc
- name: sys
readOnly: true
mountPath: /host/sys
- name: kube-rbac-proxy
image: quay.io/brancz/kube-rbac-proxy:v0.2.0
args:
- "--secure-listen-address=:9100"
- "--upstream=http://127.0.0.1:9101/"
ports:
- containerPort: 9100
hostPort: 9100
name: https
resources:
requests:
memory: 20Mi
cpu: 10m
limits:
memory: 40Mi
cpu: 20m
tolerations:
- effect: NoSchedule
operator: Exists
serviceAccountName: node-exporter
volumes:
- name: proc
hostPath:
- hostPath:
path: /proc
- name: sys
hostPath:
name: proc
- hostPath:
path: /sys
name: sys

View File

@@ -2,3 +2,4 @@ apiVersion: v1
kind: ServiceAccount
metadata:
name: node-exporter
namespace: monitoring

View File

@@ -2,16 +2,13 @@ apiVersion: v1
kind: Service
metadata:
labels:
app: node-exporter
k8s-app: node-exporter
name: node-exporter
namespace: monitoring
spec:
type: ClusterIP
clusterIP: None
ports:
- name: https
port: 9100
protocol: TCP
targetPort: https
selector:
app: node-exporter

View File

@@ -1,12 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: system:metrics-server
name: prometheus-k8s
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:metrics-server
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system
name: prometheus-k8s
namespace: monitoring

View File

@@ -0,0 +1,15 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus-k8s
rules:
- apiGroups:
- ""
resources:
- nodes/metrics
verbs:
- get
- nonResourceURLs:
- /metrics
verbs:
- get

View File

@@ -0,0 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prometheus-k8s-config
namespace: monitoring
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s-config
subjects:
- kind: ServiceAccount
name: prometheus-k8s-config
namespace: monitoring

View File

@@ -1,13 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prometheus-frontend
namespace: monitoring
name: prometheus-k8s
namespace: default
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: alertmanager-discovery
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-frontend
namespace: default
name: prometheus-k8s
namespace: monitoring

View File

@@ -1,13 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: custom-metrics-auth-reader
name: prometheus-k8s
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: extension-apiserver-authentication-reader
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: custom-metrics-apiserver
name: prometheus-k8s
namespace: monitoring

View File

@@ -0,0 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prometheus-k8s
namespace: monitoring
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring

View File

@@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-k8s-config
namespace: monitoring
rules:
- apiGroups:
- ""
resources:
- configmaps
verbs:
- get

View File

@@ -0,0 +1,17 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-k8s
namespace: default
rules:
- apiGroups:
- ""
resources:
- nodes
- services
- endpoints
- pods
verbs:
- get
- list
- watch

View File

@@ -0,0 +1,17 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-k8s
namespace: kube-system
rules:
- apiGroups:
- ""
resources:
- nodes
- services
- endpoints
- pods
verbs:
- get
- list
- watch

View File

@@ -0,0 +1,17 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-k8s
namespace: monitoring
rules:
- apiGroups:
- ""
resources:
- nodes
- services
- endpoints
- pods
verbs:
- get
- list
- watch

View File

@@ -1,12 +1,6 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-k8s-rules
labels:
role: alert-rules
prometheus: k8s
data:
alertmanager.rules.yaml: |+
alertmanager.rules.yaml: |
groups:
- name: alertmanager.rules
rules:
@@ -40,7 +34,7 @@ data:
description: Reloading Alertmanager's configuration has failed for {{ $labels.namespace
}}/{{ $labels.pod}}.
summary: Alertmanager's configuration reload failed
etcd3.rules.yaml: |+
etcd3.rules.yaml: |
groups:
- name: ./etcd3.rules
rules:
@@ -164,7 +158,7 @@ data:
annotations:
description: etcd instance {{ $labels.instance }} commit durations are high
summary: high commit durations
general.rules.yaml: |+
general.rules.yaml: |
groups:
- name: general.rules
rules:
@@ -204,7 +198,7 @@ data:
description: '{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} instance
will exhaust in file/socket descriptors within the next hour'
summary: file descriptors soon exhausted
kube-controller-manager.rules.yaml: |+
kube-controller-manager.rules.yaml: |
groups:
- name: kube-controller-manager.rules
rules:
@@ -218,7 +212,7 @@ data:
controllers are not making progress.
runbook: https://coreos.com/tectonic/docs/latest/troubleshooting/controller-recovery.html#recovering-a-controller-manager
summary: Controller manager is down
kube-scheduler.rules.yaml: |+
kube-scheduler.rules.yaml: |
groups:
- name: kube-scheduler.rules
rules:
@@ -277,7 +271,7 @@ data:
to nodes.
runbook: https://coreos.com/tectonic/docs/latest/troubleshooting/controller-recovery.html#recovering-a-scheduler
summary: Scheduler is down
kube-state-metrics.rules.yaml: |+
kube-state-metrics.rules.yaml: |
groups:
- name: kube-state-metrics.rules
rules:
@@ -337,7 +331,7 @@ data:
description: Pod {{$labels.namespace}}/{{$labels.pod}} was restarted {{$value}}
times within the last hour
summary: Pod is restarting frequently
kubelet.rules.yaml: |+
kubelet.rules.yaml: |
groups:
- name: kubelet.rules
rules:
@@ -386,7 +380,7 @@ data:
description: Kubelet {{$labels.instance}} is running {{$value}} pods, close
to the limit of 110
summary: Kubelet is close to pod limit
kubernetes.rules.yaml: |+
kubernetes.rules.yaml: |
groups:
- name: kubernetes.rules
rules:
@@ -477,7 +471,7 @@ data:
description: No API servers are reachable or all have disappeared from service
discovery
summary: No API servers are reachable
- alert: K8sCertificateExpirationNotice
labels:
severity: warning
@@ -485,7 +479,7 @@ data:
description: Kubernetes API Certificate is expiring soon (less than 7 days)
summary: Kubernetes API Certificate is expiering soon
expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="604800"}) > 0
- alert: K8sCertificateExpirationNotice
labels:
severity: critical
@@ -493,7 +487,7 @@ data:
description: Kubernetes API Certificate is expiring in less than 1 day
summary: Kubernetes API Certificate is expiering
expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="86400"}) > 0
node.rules.yaml: |+
node.rules.yaml: |
groups:
- name: node.rules
rules:
@@ -541,105 +535,53 @@ data:
description: device {{$labels.device}} on node {{$labels.instance}} is running
full within the next 2 hours (mounted at {{$labels.mountpoint}})
summary: Node disk is running full within 2 hours
prometheus.rules.yaml: |+
groups:
- name: prometheus.rules
rules:
- alert: PrometheusConfigReloadFailed
expr: prometheus_config_last_reload_successful == 0
for: 10m
labels:
severity: warning
annotations:
description: Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}}
summary: Reloading Promehteus' configuration failed
- alert: PrometheusNotificationQueueRunningFull
expr: predict_linear(prometheus_notifications_queue_length[5m], 60 * 30) > prometheus_notifications_queue_capacity
for: 10m
labels:
severity: warning
annotations:
description: Prometheus' alert notification queue is running full for {{$labels.namespace}}/{{
$labels.pod}}
summary: Prometheus' alert notification queue is running full
- alert: PrometheusErrorSendingAlerts
expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])
> 0.01
for: 10m
labels:
severity: warning
annotations:
description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
$labels.pod}} to Alertmanager {{$labels.Alertmanager}}
summary: Errors while sending alert from Prometheus
- alert: PrometheusErrorSendingAlerts
expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])
> 0.03
for: 10m
labels:
severity: critical
annotations:
description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
$labels.pod}} to Alertmanager {{$labels.Alertmanager}}
summary: Errors while sending alerts from Prometheus
- alert: PrometheusNotConnectedToAlertmanagers
expr: prometheus_notifications_alertmanagers_discovered < 1
for: 10m
labels:
severity: warning
annotations:
description: Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected
to any Alertmanagers
summary: Prometheus is not connected to any Alertmanagers
- alert: PrometheusTSDBReloadsFailing
expr: increase(prometheus_tsdb_reloads_failures_total[2h]) > 0
for: 12h
labels:
severity: warning
annotations:
description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}}
reload failures over the last four hours.'
summary: Prometheus has issues reloading data blocks from disk
- alert: PrometheusTSDBCompactionsFailing
expr: increase(prometheus_tsdb_compactions_failed_total[2h]) > 0
for: 12h
labels:
severity: warning
annotations:
description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}}
compaction failures over the last four hours.'
summary: Prometheus has issues compacting sample blocks
- alert: PrometheusTSDBWALCorruptions
expr: tsdb_wal_corruptions_total > 0
for: 4h
labels:
severity: warning
annotations:
description: '{{$labels.job}} at {{$labels.instance}} has a corrupted write-ahead
log (WAL).'
summary: Prometheus write-ahead log is corrupted
- alert: PrometheusNotIngestingSamples
expr: rate(prometheus_tsdb_head_samples_appended_total[5m]) <= 0
for: 10m
labels:
severity: warning
annotations:
description: "Prometheus {{ $labels.namespace }}/{{ $labels.pod}} isn't ingesting samples."
summary: "Prometheus isn't ingesting samples"
- alert: PrometheusTargetScapesDuplicate
expr: increase(prometheus_target_scrapes_sample_duplicate_timestamp_total[5m]) > 0
for: 10m
labels:
severity: warning
annotations:
description: "{{$labels.namespace}}/{{$labels.pod}} has many samples rejected due to duplicate timestamps but different values"
summary: Prometheus has many samples rejected
prometheus.rules.yaml: "groups:\n- name: prometheus.rules\n rules:\n - alert:
PrometheusConfigReloadFailed\n expr: prometheus_config_last_reload_successful
== 0\n for: 10m\n labels:\n severity: warning\n annotations:\n description:
Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}}\n
\ summary: Reloading Promehteus' configuration failed\n\n - alert: PrometheusNotificationQueueRunningFull\n
\ expr: predict_linear(prometheus_notifications_queue_length[5m], 60 * 30) >
prometheus_notifications_queue_capacity\n for: 10m\n labels:\n severity:
warning\n annotations:\n description: Prometheus' alert notification queue
is running full for {{$labels.namespace}}/{{\n $labels.pod}}\n summary:
Prometheus' alert notification queue is running full \n\n - alert: PrometheusErrorSendingAlerts\n
\ expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])\n
\ > 0.01\n for: 10m\n labels:\n severity: warning\n annotations:\n
\ description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{\n
\ $labels.pod}} to Alertmanager {{$labels.Alertmanager}}\n summary:
Errors while sending alert from Prometheus\n\n - alert: PrometheusErrorSendingAlerts\n
\ expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])\n
\ > 0.03\n for: 10m\n labels:\n severity: critical\n annotations:\n
\ description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{\n
\ $labels.pod}} to Alertmanager {{$labels.Alertmanager}}\n summary:
Errors while sending alerts from Prometheus\n\n - alert: PrometheusNotConnectedToAlertmanagers\n
\ expr: prometheus_notifications_alertmanagers_discovered < 1\n for: 10m\n
\ labels:\n severity: warning\n annotations:\n description: Prometheus
{{ $labels.namespace }}/{{ $labels.pod}} is not connected\n to any Alertmanagers\n
\ summary: Prometheus is not connected to any Alertmanagers\n\n - alert:
PrometheusTSDBReloadsFailing\n expr: increase(prometheus_tsdb_reloads_failures_total[2h])
> 0\n for: 12h\n labels:\n severity: warning\n annotations:\n description:
'{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}}\n reload
failures over the last four hours.'\n summary: Prometheus has issues reloading
data blocks from disk\n\n - alert: PrometheusTSDBCompactionsFailing\n expr:
increase(prometheus_tsdb_compactions_failed_total[2h]) > 0\n for: 12h\n labels:\n
\ severity: warning\n annotations:\n description: '{{$labels.job}}
at {{$labels.instance}} had {{$value | humanize}}\n compaction failures
over the last four hours.'\n summary: Prometheus has issues compacting sample
blocks\n\n - alert: PrometheusTSDBWALCorruptions\n expr: tsdb_wal_corruptions_total
> 0\n for: 4h\n labels:\n severity: warning\n annotations:\n description:
'{{$labels.job}} at {{$labels.instance}} has a corrupted write-ahead\n log
(WAL).'\n summary: Prometheus write-ahead log is corrupted\n\n - alert:
PrometheusNotIngestingSamples\n expr: rate(prometheus_tsdb_head_samples_appended_total[5m])
<= 0\n for: 10m\n labels:\n severity: warning\n annotations:\n description:
\"Prometheus {{ $labels.namespace }}/{{ $labels.pod}} isn't ingesting samples.\"\n
\ summary: \"Prometheus isn't ingesting samples\"\n\n - alert: PrometheusTargetScapesDuplicate\n
\ expr: increase(prometheus_target_scrapes_sample_duplicate_timestamp_total[5m])
> 0\n for: 10m\n labels:\n severity: warning\n annotations:\n description:
\"{{$labels.namespace}}/{{$labels.pod}} has many samples rejected due to duplicate
timestamps but different values\"\n summary: Prometheus has many samples
rejected\n"
kind: ConfigMap
metadata:
name: prometheus-k8s-rules
namespace: monitoring

View File

@@ -2,3 +2,4 @@ apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus-k8s
namespace: monitoring

View File

@@ -1,16 +1,17 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: alertmanager
labels:
k8s-app: alertmanager
name: alertmanager
namespace: monitoring
spec:
selector:
matchLabels:
alertmanager: main
endpoints:
- interval: 30s
port: web
namespaceSelector:
matchNames:
- monitoring
endpoints:
- port: web
interval: 30s
selector:
matchLabels:
alertmanager: main

View File

@@ -1,23 +1,24 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: kube-apiserver
labels:
k8s-app: apiserver
name: kube-apiserver
namespace: monitoring
spec:
jobLabel: component
selector:
matchLabels:
component: apiserver
provider: kubernetes
namespaceSelector:
matchNames:
- default
endpoints:
- port: https
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 30s
port: https
scheme: https
tlsConfig:
caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
serverName: kubernetes
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
jobLabel: component
namespaceSelector:
matchNames:
- default
selector:
matchLabels:
component: apiserver
provider: kubernetes

View File

@@ -4,16 +4,17 @@ metadata:
labels:
k8s-app: coredns
name: coredns
namespace: monitoring
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 15s
port: http-metrics
jobLabel: k8s-app
selector:
matchLabels:
k8s-app: coredns
component: metrics
namespaceSelector:
matchNames:
- kube-system
endpoints:
- port: http-metrics
interval: 15s
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
- kube-system
selector:
matchLabels:
component: metrics
k8s-app: coredns

View File

@@ -1,17 +1,18 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: kube-controller-manager
labels:
k8s-app: kube-controller-manager
name: kube-controller-manager
namespace: monitoring
spec:
jobLabel: k8s-app
endpoints:
- port: http-metrics
interval: 30s
selector:
matchLabels:
k8s-app: kube-controller-manager
- interval: 30s
port: http-metrics
jobLabel: k8s-app
namespaceSelector:
matchNames:
- kube-system
selector:
matchLabels:
k8s-app: kube-controller-manager

View File

@@ -1,17 +1,18 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: kube-scheduler
labels:
k8s-app: kube-scheduler
name: kube-scheduler
namespace: monitoring
spec:
jobLabel: k8s-app
endpoints:
- port: http-metrics
interval: 30s
selector:
matchLabels:
k8s-app: kube-scheduler
- interval: 30s
port: http-metrics
jobLabel: k8s-app
namespaceSelector:
matchNames:
- kube-system
selector:
matchLabels:
k8s-app: kube-scheduler

View File

@@ -1,28 +1,29 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: kube-state-metrics
labels:
k8s-app: kube-state-metrics
name: kube-state-metrics
namespace: monitoring
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
honorLabels: true
interval: 30s
port: https-main
scheme: https
tlsConfig:
insecureSkipVerify: true
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 30s
port: https-self
scheme: https
tlsConfig:
insecureSkipVerify: true
jobLabel: k8s-app
selector:
matchLabels:
k8s-app: kube-state-metrics
namespaceSelector:
matchNames:
- monitoring
endpoints:
- port: https-main
scheme: https
interval: 30s
honorLabels: true
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
tlsConfig:
insecureSkipVerify: true
- port: https-self
scheme: https
interval: 30s
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
tlsConfig:
insecureSkipVerify: true
selector:
matchLabels:
k8s-app: kube-state-metrics

View File

@@ -1,29 +1,30 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: kubelet
labels:
k8s-app: kubelet
name: kubelet
namespace: monitoring
spec:
jobLabel: k8s-app
endpoints:
- port: https-metrics
scheme: https
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 30s
port: https-metrics
scheme: https
tlsConfig:
insecureSkipVerify: true
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
- port: https-metrics
scheme: https
path: /metrics/cadvisor
interval: 30s
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
honorLabels: true
interval: 30s
path: /metrics/cadvisor
port: https-metrics
scheme: https
tlsConfig:
insecureSkipVerify: true
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
selector:
matchLabels:
k8s-app: kubelet
jobLabel: k8s-app
namespaceSelector:
matchNames:
- kube-system
selector:
matchLabels:
k8s-app: kubelet

View File

@@ -1,21 +1,22 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: node-exporter
labels:
k8s-app: node-exporter
name: node-exporter
namespace: monitoring
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 30s
port: https
scheme: https
tlsConfig:
insecureSkipVerify: true
jobLabel: k8s-app
selector:
matchLabels:
k8s-app: node-exporter
namespaceSelector:
matchNames:
- monitoring
endpoints:
- port: https
scheme: https
interval: 30s
bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
tlsConfig:
insecureSkipVerify: true
selector:
matchLabels:
k8s-app: node-exporter

View File

@@ -1,9 +1,10 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: prometheus-operator
labels:
k8s-app: prometheus-operator
name: prometheus-operator
namespace: monitoring
spec:
endpoints:
- port: http

View File

@@ -1,16 +1,17 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: prometheus
labels:
k8s-app: prometheus
name: prometheus
namespace: monitoring
spec:
selector:
matchLabels:
prometheus: k8s
endpoints:
- interval: 30s
port: web
namespaceSelector:
matchNames:
- monitoring
endpoints:
- port: web
interval: 30s
selector:
matchLabels:
prometheus: k8s

View File

@@ -4,13 +4,12 @@ metadata:
labels:
prometheus: k8s
name: prometheus-k8s
namespace: monitoring
spec:
type: NodePort
ports:
- name: web
nodePort: 30900
port: 9090
protocol: TCP
targetPort: web
selector:
app: prometheus
prometheus: k8s

View File

@@ -1,29 +1,27 @@
apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
name: k8s
labels:
prometheus: k8s
name: k8s
namespace: monitoring
spec:
alerting:
alertmanagers:
- name: alertmanager-main
namespace: monitoring
port: web
replicas: 2
version: v2.2.1
resources:
requests:
memory: 400Mi
ruleSelector:
matchLabels:
prometheus: k8s
role: alert-rules
serviceAccountName: prometheus-k8s
serviceMonitorSelector:
matchExpressions:
- {key: k8s-app, operator: Exists}
ruleSelector:
matchLabels:
role: alert-rules
prometheus: k8s
resources:
requests:
# 2Gi is default, but won't schedule if you don't have a node with >2Gi
# memory. Modify based on your target and time-series count for
# production use. This value is mainly meant for demonstration/testing
# purposes.
memory: 400Mi
alerting:
alertmanagers:
- namespace: monitoring
name: alertmanager-main
port: web
- key: k8s-app
operator: Exists
version: v2.2.1

View File

@@ -1,4 +1,4 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus-operator

View File

@@ -1,4 +1,4 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus-operator
@@ -8,13 +8,13 @@ rules:
resources:
- thirdpartyresources
verbs:
- "*"
- '*'
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- "*"
- '*'
- apiGroups:
- monitoring.coreos.com
resources:
@@ -24,31 +24,45 @@ rules:
- alertmanagers/finalizers
- servicemonitors
verbs:
- "*"
- '*'
- apiGroups:
- apps
resources:
- statefulsets
verbs: ["*"]
- apiGroups: [""]
verbs:
- '*'
- apiGroups:
- ""
resources:
- configmaps
- secrets
verbs: ["*"]
- apiGroups: [""]
verbs:
- '*'
- apiGroups:
- ""
resources:
- pods
verbs: ["list", "delete"]
- apiGroups: [""]
verbs:
- list
- delete
- apiGroups:
- ""
resources:
- services
- endpoints
verbs: ["get", "create", "update"]
- apiGroups: [""]
verbs:
- get
- create
- update
- apiGroups:
- ""
resources:
- nodes
verbs: ["list", "watch"]
- apiGroups: [""]
verbs:
- list
- watch
- apiGroups:
- ""
resources:
- namespaces
verbs: ["list"]
verbs:
- list

View File

@@ -4,6 +4,7 @@ metadata:
labels:
k8s-app: prometheus-operator
name: prometheus-operator
namespace: monitoring
spec:
replicas: 1
selector:

View File

@@ -2,3 +2,4 @@ apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus-operator
namespace: monitoring

View File

@@ -2,14 +2,11 @@ apiVersion: v1
kind: Service
metadata:
name: prometheus-operator
labels:
k8s-app: prometheus-operator
namespace: monitoring
spec:
type: ClusterIP
ports:
- name: http
port: 8080
targetPort: http
protocol: TCP
selector:
k8s-app: prometheus-operator

View File

@@ -1,54 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: RoleBinding
metadata:
name: prometheus-k8s
namespace: monitoring
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: RoleBinding
metadata:
name: prometheus-k8s
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: RoleBinding
metadata:
name: prometheus-k8s
namespace: default
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: prometheus-k8s
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring

View File

@@ -1,55 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: Role
metadata:
name: prometheus-k8s
namespace: monitoring
rules:
- apiGroups: [""]
resources:
- nodes
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources:
- configmaps
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: Role
metadata:
name: prometheus-k8s
namespace: kube-system
rules:
- apiGroups: [""]
resources:
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: Role
metadata:
name: prometheus-k8s
namespace: default
rules:
- apiGroups: [""]
resources:
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: prometheus-k8s
rules:
- apiGroups: [""]
resources:
- nodes/metrics
verbs: ["get"]
- nonResourceURLs: ["/metrics"]
verbs: ["get"]

View File

@@ -1,18 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: coredns-prometheus-discovery
namespace: kube-system
labels:
k8s-app: coredns
component: metrics
spec:
ports:
- name: http-metrics
port: 9153
protocol: TCP
targetPort: 9153
selector:
k8s-app: coredns
type: ClusterIP
clusterIP: None