Merge pull request #2485 from goll/addon-resizer

Use recommended addon-resizer 1.8.4
This commit is contained in:
Frederic Branczyk
2019-03-18 13:38:34 +01:00
committed by GitHub
15 changed files with 512 additions and 216 deletions

View File

@@ -260,12 +260,12 @@ These are the available fields with their respective default values:
namespace: "default", namespace: "default",
versions+:: { versions+:: {
alertmanager: "v0.16.0", alertmanager: "v0.16.1",
nodeExporter: "v0.17.0", nodeExporter: "v0.17.0",
kubeStateMetrics: "v1.5.0", kubeStateMetrics: "v1.5.0",
kubeRbacProxy: "v0.4.1", kubeRbacProxy: "v0.4.1",
addonResizer: "2.1", addonResizer: "1.8.4",
prometheusOperator: "v0.28.0", prometheusOperator: "v0.29.0",
prometheus: "v2.5.0", prometheus: "v2.5.0",
}, },
@@ -274,7 +274,7 @@ These are the available fields with their respective default values:
alertmanager: "quay.io/prometheus/alertmanager", alertmanager: "quay.io/prometheus/alertmanager",
kubeStateMetrics: "quay.io/coreos/kube-state-metrics", kubeStateMetrics: "quay.io/coreos/kube-state-metrics",
kubeRbacProxy: "quay.io/coreos/kube-rbac-proxy", kubeRbacProxy: "quay.io/coreos/kube-rbac-proxy",
addonResizer: "gcr.io/google-containers/addon-resizer-amd64", addonResizer: "k8s.gcr.io/addon-resizer",
nodeExporter: "quay.io/prometheus/node-exporter", nodeExporter: "quay.io/prometheus/node-exporter",
prometheusOperator: "quay.io/coreos/prometheus-operator", prometheusOperator: "quay.io/coreos/prometheus-operator",
}, },
@@ -298,7 +298,7 @@ These are the available fields with their respective default values:
receiver: 'null' receiver: 'null'
routes: routes:
- match: - match:
alertname: DeadMansSwitch alertname: Watchdog
receiver: 'null' receiver: 'null'
receivers: receivers:
- name: 'null' - name: 'null'
@@ -402,12 +402,12 @@ To produce the `docker pull/tag/push` commands that will synchronize upstream im
```shell ```shell
$ jsonnet -J vendor -S --tla-str repository=internal-registry.com/organization sync-to-internal-registry.jsonnet $ jsonnet -J vendor -S --tla-str repository=internal-registry.com/organization sync-to-internal-registry.jsonnet
docker pull gcr.io/google-containers/addon-resizer-amd64:2.1 docker pull k8s.gcr.io/addon-resizer:1.8.4
docker tag gcr.io/google-containers/addon-resizer-amd64:2.1 internal-registry.com/organization/addon-resizer:2.1 docker tag k8s.gcr.io/addon-resizer:1.8.4 internal-registry.com/organization/addon-resizer:1.8.4
docker push internal-registry.com/organization/addon-resizer:2.1 docker push internal-registry.com/organization/addon-resizer:1.8.4
docker pull quay.io/prometheus/alertmanager:v0.15.3 docker pull quay.io/prometheus/alertmanager:v0.16.1
docker tag quay.io/prometheus/alertmanager:v0.15.3 internal-registry.com/organization/alertmanager:v0.15.3 docker tag quay.io/prometheus/alertmanager:v0.16.1 internal-registry.com/organization/alertmanager:v0.16.1
docker push internal-registry.com/organization/alertmanager:v0.15.3 docker push internal-registry.com/organization/alertmanager:v0.16.1
... ...
``` ```
@@ -497,7 +497,7 @@ The Alertmanager configuration is located in the `_config.alertmanager.config` c
receiver: 'null' receiver: 'null'
routes: routes:
- match: - match:
alertname: DeadMansSwitch alertname: Watchdog
receiver: 'null' receiver: 'null'
receivers: receivers:
- name: 'null' - name: 'null'

View File

@@ -49,13 +49,13 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
name: 'example-group', name: 'example-group',
rules: [ rules: [
{ {
alert: 'DeadMansSwitch', alert: 'Watchdog',
expr: 'vector(1)', expr: 'vector(1)',
labels: { labels: {
severity: 'none', severity: 'none',
}, },
annotations: { annotations: {
description: 'This is a DeadMansSwitch meant to ensure that the entire alerting pipeline is functional.', description: 'This is a Watchdog meant to ensure that the entire alerting pipeline is functional.',
}, },
}, },
], ],

View File

@@ -12,7 +12,7 @@
receiver: 'null' receiver: 'null'
routes: routes:
- match: - match:
alertname: DeadMansSwitch alertname: Watchdog
receiver: 'null' receiver: 'null'
receivers: receivers:
- name: 'null' - name: 'null'

View File

@@ -9,7 +9,7 @@ route:
receiver: 'null' receiver: 'null'
routes: routes:
- match: - match:
alertname: DeadMansSwitch alertname: Watchdog
receiver: 'null' receiver: 'null'
receivers: receivers:
- name: 'null' - name: 'null'

View File

@@ -1 +1 @@
{"groups":[{"name":"example-group","rules":[{"alert":"DeadMansSwitch","annotations":{"description":"This is a DeadMansSwitch meant to ensure that the entire alerting pipeline is functional."},"expr":"vector(1)","labels":{"severity":"none"}}]}]} {"groups":[{"name":"example-group","rules":[{"alert":"Watchdog","annotations":{"description":"This is a Watchdog meant to ensure that the entire alerting pipeline is functional."},"expr":"vector(1)","labels":{"severity":"none"}}]}]}

View File

@@ -1,9 +1,9 @@
groups: groups:
- name: example-group - name: example-group
rules: rules:
- alert: DeadMansSwitch - alert: Watchdog
expr: vector(1) expr: vector(1)
labels: labels:
severity: "none" severity: "none"
annotations: annotations:
description: This is a DeadMansSwitch meant to ensure that the entire alerting pipeline is functional. description: This is a Watchdog meant to ensure that the entire alerting pipeline is functional.

View File

@@ -8,13 +8,13 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
name: 'example-group', name: 'example-group',
rules: [ rules: [
{ {
alert: 'DeadMansSwitch', alert: 'Watchdog',
expr: 'vector(1)', expr: 'vector(1)',
labels: { labels: {
severity: 'none', severity: 'none',
}, },
annotations: { annotations: {
description: 'This is a DeadMansSwitch meant to ensure that the entire alerting pipeline is functional.', description: 'This is a Watchdog meant to ensure that the entire alerting pipeline is functional.',
}, },
}, },
], ],

View File

@@ -5,7 +5,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
namespace: 'default', namespace: 'default',
versions+:: { versions+:: {
alertmanager: 'v0.16.0', alertmanager: 'v0.16.1',
}, },
imageRepos+:: { imageRepos+:: {
@@ -28,7 +28,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
{ {
receiver: 'null', receiver: 'null',
match: { match: {
alertname: 'DeadMansSwitch', alertname: 'Watchdog',
}, },
}, },
], ],

View File

@@ -18,13 +18,13 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
versions+:: { versions+:: {
kubeStateMetrics: 'v1.5.0', kubeStateMetrics: 'v1.5.0',
kubeRbacProxy: 'v0.4.1', kubeRbacProxy: 'v0.4.1',
addonResizer: '2.1', addonResizer: '1.8.4',
}, },
imageRepos+:: { imageRepos+:: {
kubeStateMetrics: 'quay.io/coreos/kube-state-metrics', kubeStateMetrics: 'quay.io/coreos/kube-state-metrics',
kubeRbacProxy: 'quay.io/coreos/kube-rbac-proxy', kubeRbacProxy: 'quay.io/coreos/kube-rbac-proxy',
addonResizer: 'gcr.io/google-containers/addon-resizer-amd64', addonResizer: 'k8s.gcr.io/addon-resizer',
}, },
}, },
@@ -175,7 +175,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
'--extra-cpu=' + $._config.kubeStateMetrics.cpuPerNode, '--extra-cpu=' + $._config.kubeStateMetrics.cpuPerNode,
'--memory=' + $._config.kubeStateMetrics.baseMemory, '--memory=' + $._config.kubeStateMetrics.baseMemory,
'--extra-memory=' + $._config.kubeStateMetrics.memoryPerNode, '--extra-memory=' + $._config.kubeStateMetrics.memoryPerNode,
'--acceptance-offset=5', '--threshold=5',
'--deployment=kube-state-metrics', '--deployment=kube-state-metrics',
]) + ]) +
container.withEnv([ container.withEnv([

View File

@@ -8,7 +8,7 @@
"subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus" "subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus"
} }
}, },
"version": "df002d09f7b7a50321786c4f19c70d371494410b" "version": "9faab58c2b1cce4def2cc35045162554b8e4a706"
}, },
{ {
"name": "ksonnet", "name": "ksonnet",
@@ -28,7 +28,7 @@
"subdir": "" "subdir": ""
} }
}, },
"version": "ccb787a44f2ebdecbb346d57490fa7e49981b323" "version": "b8b1a40066bd40bf7612bbb1cc9208f76530f44a"
}, },
{ {
"name": "grafonnet", "name": "grafonnet",
@@ -48,7 +48,7 @@
"subdir": "grafana-builder" "subdir": "grafana-builder"
} }
}, },
"version": "5d7e5391010c768a6ddd39163c35662f379e20ca" "version": "5cc4bfab6e2453266e47d01b78cbae0b2643426e"
}, },
{ {
"name": "grafana", "name": "grafana",
@@ -78,7 +78,7 @@
"subdir": "Documentation/etcd-mixin" "subdir": "Documentation/etcd-mixin"
} }
}, },
"version": "a7e3bd06b2ef0286e1571836997287a81146c25a" "version": "e1ca3b4434945e57e8e3a451cdbde74a903cc8e1"
} }
] ]
} }

View File

@@ -15,4 +15,4 @@ spec:
runAsNonRoot: true runAsNonRoot: true
runAsUser: 1000 runAsUser: 1000
serviceAccountName: alertmanager-main serviceAccountName: alertmanager-main
version: v0.16.0 version: v0.16.1

View File

@@ -1,6 +1,6 @@
apiVersion: v1 apiVersion: v1
data: data:
alertmanager.yaml: Imdsb2JhbCI6IAogICJyZXNvbHZlX3RpbWVvdXQiOiAiNW0iCiJyZWNlaXZlcnMiOiAKLSAibmFtZSI6ICJudWxsIgoicm91dGUiOiAKICAiZ3JvdXBfYnkiOiAKICAtICJqb2IiCiAgImdyb3VwX2ludGVydmFsIjogIjVtIgogICJncm91cF93YWl0IjogIjMwcyIKICAicmVjZWl2ZXIiOiAibnVsbCIKICAicmVwZWF0X2ludGVydmFsIjogIjEyaCIKICAicm91dGVzIjogCiAgLSAibWF0Y2giOiAKICAgICAgImFsZXJ0bmFtZSI6ICJEZWFkTWFuc1N3aXRjaCIKICAgICJyZWNlaXZlciI6ICJudWxsIg== alertmanager.yaml: Imdsb2JhbCI6IAogICJyZXNvbHZlX3RpbWVvdXQiOiAiNW0iCiJyZWNlaXZlcnMiOiAKLSAibmFtZSI6ICJudWxsIgoicm91dGUiOiAKICAiZ3JvdXBfYnkiOiAKICAtICJqb2IiCiAgImdyb3VwX2ludGVydmFsIjogIjVtIgogICJncm91cF93YWl0IjogIjMwcyIKICAicmVjZWl2ZXIiOiAibnVsbCIKICAicmVwZWF0X2ludGVydmFsIjogIjEyaCIKICAicm91dGVzIjogCiAgLSAibWF0Y2giOiAKICAgICAgImFsZXJ0bmFtZSI6ICJXYXRjaGRvZyIKICAgICJyZWNlaXZlciI6ICJudWxsIg==
kind: Secret kind: Secret
metadata: metadata:
name: alertmanager-main name: alertmanager-main

File diff suppressed because it is too large Load Diff

View File

@@ -71,7 +71,7 @@ spec:
- --extra-cpu=2m - --extra-cpu=2m
- --memory=150Mi - --memory=150Mi
- --extra-memory=30Mi - --extra-memory=30Mi
- --acceptance-offset=5 - --threshold=5
- --deployment=kube-state-metrics - --deployment=kube-state-metrics
env: env:
- name: MY_POD_NAME - name: MY_POD_NAME
@@ -84,7 +84,7 @@ spec:
fieldRef: fieldRef:
apiVersion: v1 apiVersion: v1
fieldPath: metadata.namespace fieldPath: metadata.namespace
image: gcr.io/google-containers/addon-resizer-amd64:2.1 image: k8s.gcr.io/addon-resizer:1.8.4
name: addon-resizer name: addon-resizer
resources: resources:
limits: limits:

View File

@@ -225,21 +225,21 @@ spec:
) )
record: node:node_memory_swap_io_bytes:sum_rate record: node:node_memory_swap_io_bytes:sum_rate
- expr: | - expr: |
avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m])) avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]))
record: :node_disk_utilisation:avg_irate record: :node_disk_utilisation:avg_irate
- expr: | - expr: |
avg by (node) ( avg by (node) (
irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]) irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])
* on (namespace, pod) group_left(node) * on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info: node_namespace_pod:kube_pod_info:
) )
record: node:node_disk_utilisation:avg_irate record: node:node_disk_utilisation:avg_irate
- expr: | - expr: |
avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]) / 1e3) avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) / 1e3)
record: :node_disk_saturation:avg_irate record: :node_disk_saturation:avg_irate
- expr: | - expr: |
avg by (node) ( avg by (node) (
irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]) / 1e3 irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) / 1e3
* on (namespace, pod) group_left(node) * on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info: node_namespace_pod:kube_pod_info:
) )
@@ -769,9 +769,9 @@ spec:
message: API server is returning errors for {{ $value }}% of requests. message: API server is returning errors for {{ $value }}% of requests.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: | expr: |
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) without(instance, pod) sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m]))
/ /
sum(rate(apiserver_request_count{job="apiserver"}[5m])) without(instance, pod) * 100 > 10 sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 3
for: 10m for: 10m
labels: labels:
severity: critical severity: critical
@@ -780,9 +780,33 @@ spec:
message: API server is returning errors for {{ $value }}% of requests. message: API server is returning errors for {{ $value }}% of requests.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: | expr: |
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) without(instance, pod) sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m]))
/ /
sum(rate(apiserver_request_count{job="apiserver"}[5m])) without(instance, pod) * 100 > 5 sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 1
for: 10m
labels:
severity: warning
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{ $value }}% of requests for
{{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: |
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
/
sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 10
for: 10m
labels:
severity: critical
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{ $value }}% of requests for
{{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: |
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
/
sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 5
for: 10m for: 10m
labels: labels:
severity: warning severity: warning
@@ -951,7 +975,7 @@ spec:
log (WAL).' log (WAL).'
summary: Prometheus write-ahead log is corrupted summary: Prometheus write-ahead log is corrupted
expr: | expr: |
tsdb_wal_corruptions_total{job="prometheus-k8s",namespace="monitoring"} > 0 prometheus_tsdb_wal_corruptions_total{job="prometheus-k8s",namespace="monitoring"} > 0
for: 4h for: 4h
labels: labels:
severity: warning severity: warning