Merge pull request #2485 from goll/addon-resizer

Use recommended addon-resizer 1.8.4
This commit is contained in:
Frederic Branczyk
2019-03-18 13:38:34 +01:00
committed by GitHub
15 changed files with 512 additions and 216 deletions

View File

@@ -260,12 +260,12 @@ These are the available fields with their respective default values:
namespace: "default",
versions+:: {
alertmanager: "v0.16.0",
alertmanager: "v0.16.1",
nodeExporter: "v0.17.0",
kubeStateMetrics: "v1.5.0",
kubeRbacProxy: "v0.4.1",
addonResizer: "2.1",
prometheusOperator: "v0.28.0",
addonResizer: "1.8.4",
prometheusOperator: "v0.29.0",
prometheus: "v2.5.0",
},
@@ -274,7 +274,7 @@ These are the available fields with their respective default values:
alertmanager: "quay.io/prometheus/alertmanager",
kubeStateMetrics: "quay.io/coreos/kube-state-metrics",
kubeRbacProxy: "quay.io/coreos/kube-rbac-proxy",
addonResizer: "gcr.io/google-containers/addon-resizer-amd64",
addonResizer: "k8s.gcr.io/addon-resizer",
nodeExporter: "quay.io/prometheus/node-exporter",
prometheusOperator: "quay.io/coreos/prometheus-operator",
},
@@ -298,7 +298,7 @@ These are the available fields with their respective default values:
receiver: 'null'
routes:
- match:
alertname: DeadMansSwitch
alertname: Watchdog
receiver: 'null'
receivers:
- name: 'null'
@@ -402,12 +402,12 @@ To produce the `docker pull/tag/push` commands that will synchronize upstream im
```shell
$ jsonnet -J vendor -S --tla-str repository=internal-registry.com/organization sync-to-internal-registry.jsonnet
docker pull gcr.io/google-containers/addon-resizer-amd64:2.1
docker tag gcr.io/google-containers/addon-resizer-amd64:2.1 internal-registry.com/organization/addon-resizer:2.1
docker push internal-registry.com/organization/addon-resizer:2.1
docker pull quay.io/prometheus/alertmanager:v0.15.3
docker tag quay.io/prometheus/alertmanager:v0.15.3 internal-registry.com/organization/alertmanager:v0.15.3
docker push internal-registry.com/organization/alertmanager:v0.15.3
docker pull k8s.gcr.io/addon-resizer:1.8.4
docker tag k8s.gcr.io/addon-resizer:1.8.4 internal-registry.com/organization/addon-resizer:1.8.4
docker push internal-registry.com/organization/addon-resizer:1.8.4
docker pull quay.io/prometheus/alertmanager:v0.16.1
docker tag quay.io/prometheus/alertmanager:v0.16.1 internal-registry.com/organization/alertmanager:v0.16.1
docker push internal-registry.com/organization/alertmanager:v0.16.1
...
```
@@ -497,7 +497,7 @@ The Alertmanager configuration is located in the `_config.alertmanager.config` c
receiver: 'null'
routes:
- match:
alertname: DeadMansSwitch
alertname: Watchdog
receiver: 'null'
receivers:
- name: 'null'

View File

@@ -49,13 +49,13 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
name: 'example-group',
rules: [
{
alert: 'DeadMansSwitch',
alert: 'Watchdog',
expr: 'vector(1)',
labels: {
severity: 'none',
},
annotations: {
description: 'This is a DeadMansSwitch meant to ensure that the entire alerting pipeline is functional.',
description: 'This is a Watchdog meant to ensure that the entire alerting pipeline is functional.',
},
},
],

View File

@@ -12,7 +12,7 @@
receiver: 'null'
routes:
- match:
alertname: DeadMansSwitch
alertname: Watchdog
receiver: 'null'
receivers:
- name: 'null'

View File

@@ -9,7 +9,7 @@ route:
receiver: 'null'
routes:
- match:
alertname: DeadMansSwitch
alertname: Watchdog
receiver: 'null'
receivers:
- name: 'null'

View File

@@ -1 +1 @@
{"groups":[{"name":"example-group","rules":[{"alert":"DeadMansSwitch","annotations":{"description":"This is a DeadMansSwitch meant to ensure that the entire alerting pipeline is functional."},"expr":"vector(1)","labels":{"severity":"none"}}]}]}
{"groups":[{"name":"example-group","rules":[{"alert":"Watchdog","annotations":{"description":"This is a Watchdog meant to ensure that the entire alerting pipeline is functional."},"expr":"vector(1)","labels":{"severity":"none"}}]}]}

View File

@@ -1,9 +1,9 @@
groups:
- name: example-group
rules:
- alert: DeadMansSwitch
- alert: Watchdog
expr: vector(1)
labels:
severity: "none"
annotations:
description: This is a DeadMansSwitch meant to ensure that the entire alerting pipeline is functional.
description: This is a Watchdog meant to ensure that the entire alerting pipeline is functional.

View File

@@ -8,13 +8,13 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
name: 'example-group',
rules: [
{
alert: 'DeadMansSwitch',
alert: 'Watchdog',
expr: 'vector(1)',
labels: {
severity: 'none',
},
annotations: {
description: 'This is a DeadMansSwitch meant to ensure that the entire alerting pipeline is functional.',
description: 'This is a Watchdog meant to ensure that the entire alerting pipeline is functional.',
},
},
],

View File

@@ -5,7 +5,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
namespace: 'default',
versions+:: {
alertmanager: 'v0.16.0',
alertmanager: 'v0.16.1',
},
imageRepos+:: {
@@ -28,7 +28,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
{
receiver: 'null',
match: {
alertname: 'DeadMansSwitch',
alertname: 'Watchdog',
},
},
],

View File

@@ -18,13 +18,13 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
versions+:: {
kubeStateMetrics: 'v1.5.0',
kubeRbacProxy: 'v0.4.1',
addonResizer: '2.1',
addonResizer: '1.8.4',
},
imageRepos+:: {
kubeStateMetrics: 'quay.io/coreos/kube-state-metrics',
kubeRbacProxy: 'quay.io/coreos/kube-rbac-proxy',
addonResizer: 'gcr.io/google-containers/addon-resizer-amd64',
addonResizer: 'k8s.gcr.io/addon-resizer',
},
},
@@ -175,7 +175,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
'--extra-cpu=' + $._config.kubeStateMetrics.cpuPerNode,
'--memory=' + $._config.kubeStateMetrics.baseMemory,
'--extra-memory=' + $._config.kubeStateMetrics.memoryPerNode,
'--acceptance-offset=5',
'--threshold=5',
'--deployment=kube-state-metrics',
]) +
container.withEnv([

View File

@@ -8,7 +8,7 @@
"subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus"
}
},
"version": "df002d09f7b7a50321786c4f19c70d371494410b"
"version": "9faab58c2b1cce4def2cc35045162554b8e4a706"
},
{
"name": "ksonnet",
@@ -28,7 +28,7 @@
"subdir": ""
}
},
"version": "ccb787a44f2ebdecbb346d57490fa7e49981b323"
"version": "b8b1a40066bd40bf7612bbb1cc9208f76530f44a"
},
{
"name": "grafonnet",
@@ -48,7 +48,7 @@
"subdir": "grafana-builder"
}
},
"version": "5d7e5391010c768a6ddd39163c35662f379e20ca"
"version": "5cc4bfab6e2453266e47d01b78cbae0b2643426e"
},
{
"name": "grafana",
@@ -78,7 +78,7 @@
"subdir": "Documentation/etcd-mixin"
}
},
"version": "a7e3bd06b2ef0286e1571836997287a81146c25a"
"version": "e1ca3b4434945e57e8e3a451cdbde74a903cc8e1"
}
]
}

View File

@@ -15,4 +15,4 @@ spec:
runAsNonRoot: true
runAsUser: 1000
serviceAccountName: alertmanager-main
version: v0.16.0
version: v0.16.1

View File

@@ -1,6 +1,6 @@
apiVersion: v1
data:
alertmanager.yaml: Imdsb2JhbCI6IAogICJyZXNvbHZlX3RpbWVvdXQiOiAiNW0iCiJyZWNlaXZlcnMiOiAKLSAibmFtZSI6ICJudWxsIgoicm91dGUiOiAKICAiZ3JvdXBfYnkiOiAKICAtICJqb2IiCiAgImdyb3VwX2ludGVydmFsIjogIjVtIgogICJncm91cF93YWl0IjogIjMwcyIKICAicmVjZWl2ZXIiOiAibnVsbCIKICAicmVwZWF0X2ludGVydmFsIjogIjEyaCIKICAicm91dGVzIjogCiAgLSAibWF0Y2giOiAKICAgICAgImFsZXJ0bmFtZSI6ICJEZWFkTWFuc1N3aXRjaCIKICAgICJyZWNlaXZlciI6ICJudWxsIg==
alertmanager.yaml: Imdsb2JhbCI6IAogICJyZXNvbHZlX3RpbWVvdXQiOiAiNW0iCiJyZWNlaXZlcnMiOiAKLSAibmFtZSI6ICJudWxsIgoicm91dGUiOiAKICAiZ3JvdXBfYnkiOiAKICAtICJqb2IiCiAgImdyb3VwX2ludGVydmFsIjogIjVtIgogICJncm91cF93YWl0IjogIjMwcyIKICAicmVjZWl2ZXIiOiAibnVsbCIKICAicmVwZWF0X2ludGVydmFsIjogIjEyaCIKICAicm91dGVzIjogCiAgLSAibWF0Y2giOiAKICAgICAgImFsZXJ0bmFtZSI6ICJXYXRjaGRvZyIKICAgICJyZWNlaXZlciI6ICJudWxsIg==
kind: Secret
metadata:
name: alertmanager-main

File diff suppressed because it is too large Load Diff

View File

@@ -71,7 +71,7 @@ spec:
- --extra-cpu=2m
- --memory=150Mi
- --extra-memory=30Mi
- --acceptance-offset=5
- --threshold=5
- --deployment=kube-state-metrics
env:
- name: MY_POD_NAME
@@ -84,7 +84,7 @@ spec:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
image: gcr.io/google-containers/addon-resizer-amd64:2.1
image: k8s.gcr.io/addon-resizer:1.8.4
name: addon-resizer
resources:
limits:

View File

@@ -225,21 +225,21 @@ spec:
)
record: node:node_memory_swap_io_bytes:sum_rate
- expr: |
avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]))
avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]))
record: :node_disk_utilisation:avg_irate
- expr: |
avg by (node) (
irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m])
irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
record: node:node_disk_utilisation:avg_irate
- expr: |
avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]) / 1e3)
avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) / 1e3)
record: :node_disk_saturation:avg_irate
- expr: |
avg by (node) (
irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]) / 1e3
irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) / 1e3
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
)
@@ -769,9 +769,9 @@ spec:
message: API server is returning errors for {{ $value }}% of requests.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: |
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) without(instance, pod)
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m]))
/
sum(rate(apiserver_request_count{job="apiserver"}[5m])) without(instance, pod) * 100 > 10
sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 3
for: 10m
labels:
severity: critical
@@ -780,9 +780,33 @@ spec:
message: API server is returning errors for {{ $value }}% of requests.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: |
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) without(instance, pod)
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m]))
/
sum(rate(apiserver_request_count{job="apiserver"}[5m])) without(instance, pod) * 100 > 5
sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 1
for: 10m
labels:
severity: warning
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{ $value }}% of requests for
{{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: |
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
/
sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 10
for: 10m
labels:
severity: critical
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{ $value }}% of requests for
{{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: |
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
/
sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 5
for: 10m
labels:
severity: warning
@@ -951,7 +975,7 @@ spec:
log (WAL).'
summary: Prometheus write-ahead log is corrupted
expr: |
tsdb_wal_corruptions_total{job="prometheus-k8s",namespace="monitoring"} > 0
prometheus_tsdb_wal_corruptions_total{job="prometheus-k8s",namespace="monitoring"} > 0
for: 4h
labels:
severity: warning