Merge pull request #2485 from goll/addon-resizer
Use recommended addon-resizer 1.8.4
This commit is contained in:
24
README.md
24
README.md
@@ -260,12 +260,12 @@ These are the available fields with their respective default values:
|
||||
namespace: "default",
|
||||
|
||||
versions+:: {
|
||||
alertmanager: "v0.16.0",
|
||||
alertmanager: "v0.16.1",
|
||||
nodeExporter: "v0.17.0",
|
||||
kubeStateMetrics: "v1.5.0",
|
||||
kubeRbacProxy: "v0.4.1",
|
||||
addonResizer: "2.1",
|
||||
prometheusOperator: "v0.28.0",
|
||||
addonResizer: "1.8.4",
|
||||
prometheusOperator: "v0.29.0",
|
||||
prometheus: "v2.5.0",
|
||||
},
|
||||
|
||||
@@ -274,7 +274,7 @@ These are the available fields with their respective default values:
|
||||
alertmanager: "quay.io/prometheus/alertmanager",
|
||||
kubeStateMetrics: "quay.io/coreos/kube-state-metrics",
|
||||
kubeRbacProxy: "quay.io/coreos/kube-rbac-proxy",
|
||||
addonResizer: "gcr.io/google-containers/addon-resizer-amd64",
|
||||
addonResizer: "k8s.gcr.io/addon-resizer",
|
||||
nodeExporter: "quay.io/prometheus/node-exporter",
|
||||
prometheusOperator: "quay.io/coreos/prometheus-operator",
|
||||
},
|
||||
@@ -298,7 +298,7 @@ These are the available fields with their respective default values:
|
||||
receiver: 'null'
|
||||
routes:
|
||||
- match:
|
||||
alertname: DeadMansSwitch
|
||||
alertname: Watchdog
|
||||
receiver: 'null'
|
||||
receivers:
|
||||
- name: 'null'
|
||||
@@ -402,12 +402,12 @@ To produce the `docker pull/tag/push` commands that will synchronize upstream im
|
||||
|
||||
```shell
|
||||
$ jsonnet -J vendor -S --tla-str repository=internal-registry.com/organization sync-to-internal-registry.jsonnet
|
||||
docker pull gcr.io/google-containers/addon-resizer-amd64:2.1
|
||||
docker tag gcr.io/google-containers/addon-resizer-amd64:2.1 internal-registry.com/organization/addon-resizer:2.1
|
||||
docker push internal-registry.com/organization/addon-resizer:2.1
|
||||
docker pull quay.io/prometheus/alertmanager:v0.15.3
|
||||
docker tag quay.io/prometheus/alertmanager:v0.15.3 internal-registry.com/organization/alertmanager:v0.15.3
|
||||
docker push internal-registry.com/organization/alertmanager:v0.15.3
|
||||
docker pull k8s.gcr.io/addon-resizer:1.8.4
|
||||
docker tag k8s.gcr.io/addon-resizer:1.8.4 internal-registry.com/organization/addon-resizer:1.8.4
|
||||
docker push internal-registry.com/organization/addon-resizer:1.8.4
|
||||
docker pull quay.io/prometheus/alertmanager:v0.16.1
|
||||
docker tag quay.io/prometheus/alertmanager:v0.16.1 internal-registry.com/organization/alertmanager:v0.16.1
|
||||
docker push internal-registry.com/organization/alertmanager:v0.16.1
|
||||
...
|
||||
```
|
||||
|
||||
@@ -497,7 +497,7 @@ The Alertmanager configuration is located in the `_config.alertmanager.config` c
|
||||
receiver: 'null'
|
||||
routes:
|
||||
- match:
|
||||
alertname: DeadMansSwitch
|
||||
alertname: Watchdog
|
||||
receiver: 'null'
|
||||
receivers:
|
||||
- name: 'null'
|
||||
|
@@ -49,13 +49,13 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
name: 'example-group',
|
||||
rules: [
|
||||
{
|
||||
alert: 'DeadMansSwitch',
|
||||
alert: 'Watchdog',
|
||||
expr: 'vector(1)',
|
||||
labels: {
|
||||
severity: 'none',
|
||||
},
|
||||
annotations: {
|
||||
description: 'This is a DeadMansSwitch meant to ensure that the entire alerting pipeline is functional.',
|
||||
description: 'This is a Watchdog meant to ensure that the entire alerting pipeline is functional.',
|
||||
},
|
||||
},
|
||||
],
|
||||
|
@@ -12,7 +12,7 @@
|
||||
receiver: 'null'
|
||||
routes:
|
||||
- match:
|
||||
alertname: DeadMansSwitch
|
||||
alertname: Watchdog
|
||||
receiver: 'null'
|
||||
receivers:
|
||||
- name: 'null'
|
||||
|
@@ -9,7 +9,7 @@ route:
|
||||
receiver: 'null'
|
||||
routes:
|
||||
- match:
|
||||
alertname: DeadMansSwitch
|
||||
alertname: Watchdog
|
||||
receiver: 'null'
|
||||
receivers:
|
||||
- name: 'null'
|
||||
|
@@ -1 +1 @@
|
||||
{"groups":[{"name":"example-group","rules":[{"alert":"DeadMansSwitch","annotations":{"description":"This is a DeadMansSwitch meant to ensure that the entire alerting pipeline is functional."},"expr":"vector(1)","labels":{"severity":"none"}}]}]}
|
||||
{"groups":[{"name":"example-group","rules":[{"alert":"Watchdog","annotations":{"description":"This is a Watchdog meant to ensure that the entire alerting pipeline is functional."},"expr":"vector(1)","labels":{"severity":"none"}}]}]}
|
@@ -1,9 +1,9 @@
|
||||
groups:
|
||||
- name: example-group
|
||||
rules:
|
||||
- alert: DeadMansSwitch
|
||||
- alert: Watchdog
|
||||
expr: vector(1)
|
||||
labels:
|
||||
severity: "none"
|
||||
annotations:
|
||||
description: This is a DeadMansSwitch meant to ensure that the entire alerting pipeline is functional.
|
||||
description: This is a Watchdog meant to ensure that the entire alerting pipeline is functional.
|
||||
|
@@ -8,13 +8,13 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
name: 'example-group',
|
||||
rules: [
|
||||
{
|
||||
alert: 'DeadMansSwitch',
|
||||
alert: 'Watchdog',
|
||||
expr: 'vector(1)',
|
||||
labels: {
|
||||
severity: 'none',
|
||||
},
|
||||
annotations: {
|
||||
description: 'This is a DeadMansSwitch meant to ensure that the entire alerting pipeline is functional.',
|
||||
description: 'This is a Watchdog meant to ensure that the entire alerting pipeline is functional.',
|
||||
},
|
||||
},
|
||||
],
|
||||
|
@@ -5,7 +5,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
namespace: 'default',
|
||||
|
||||
versions+:: {
|
||||
alertmanager: 'v0.16.0',
|
||||
alertmanager: 'v0.16.1',
|
||||
},
|
||||
|
||||
imageRepos+:: {
|
||||
@@ -28,7 +28,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
{
|
||||
receiver: 'null',
|
||||
match: {
|
||||
alertname: 'DeadMansSwitch',
|
||||
alertname: 'Watchdog',
|
||||
},
|
||||
},
|
||||
],
|
||||
|
@@ -18,13 +18,13 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
versions+:: {
|
||||
kubeStateMetrics: 'v1.5.0',
|
||||
kubeRbacProxy: 'v0.4.1',
|
||||
addonResizer: '2.1',
|
||||
addonResizer: '1.8.4',
|
||||
},
|
||||
|
||||
imageRepos+:: {
|
||||
kubeStateMetrics: 'quay.io/coreos/kube-state-metrics',
|
||||
kubeRbacProxy: 'quay.io/coreos/kube-rbac-proxy',
|
||||
addonResizer: 'gcr.io/google-containers/addon-resizer-amd64',
|
||||
addonResizer: 'k8s.gcr.io/addon-resizer',
|
||||
},
|
||||
},
|
||||
|
||||
@@ -175,7 +175,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
'--extra-cpu=' + $._config.kubeStateMetrics.cpuPerNode,
|
||||
'--memory=' + $._config.kubeStateMetrics.baseMemory,
|
||||
'--extra-memory=' + $._config.kubeStateMetrics.memoryPerNode,
|
||||
'--acceptance-offset=5',
|
||||
'--threshold=5',
|
||||
'--deployment=kube-state-metrics',
|
||||
]) +
|
||||
container.withEnv([
|
||||
|
@@ -8,7 +8,7 @@
|
||||
"subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus"
|
||||
}
|
||||
},
|
||||
"version": "df002d09f7b7a50321786c4f19c70d371494410b"
|
||||
"version": "9faab58c2b1cce4def2cc35045162554b8e4a706"
|
||||
},
|
||||
{
|
||||
"name": "ksonnet",
|
||||
@@ -28,7 +28,7 @@
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "ccb787a44f2ebdecbb346d57490fa7e49981b323"
|
||||
"version": "b8b1a40066bd40bf7612bbb1cc9208f76530f44a"
|
||||
},
|
||||
{
|
||||
"name": "grafonnet",
|
||||
@@ -48,7 +48,7 @@
|
||||
"subdir": "grafana-builder"
|
||||
}
|
||||
},
|
||||
"version": "5d7e5391010c768a6ddd39163c35662f379e20ca"
|
||||
"version": "5cc4bfab6e2453266e47d01b78cbae0b2643426e"
|
||||
},
|
||||
{
|
||||
"name": "grafana",
|
||||
@@ -78,7 +78,7 @@
|
||||
"subdir": "Documentation/etcd-mixin"
|
||||
}
|
||||
},
|
||||
"version": "a7e3bd06b2ef0286e1571836997287a81146c25a"
|
||||
"version": "e1ca3b4434945e57e8e3a451cdbde74a903cc8e1"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@@ -15,4 +15,4 @@ spec:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
serviceAccountName: alertmanager-main
|
||||
version: v0.16.0
|
||||
version: v0.16.1
|
||||
|
@@ -1,6 +1,6 @@
|
||||
apiVersion: v1
|
||||
data:
|
||||
alertmanager.yaml: Imdsb2JhbCI6IAogICJyZXNvbHZlX3RpbWVvdXQiOiAiNW0iCiJyZWNlaXZlcnMiOiAKLSAibmFtZSI6ICJudWxsIgoicm91dGUiOiAKICAiZ3JvdXBfYnkiOiAKICAtICJqb2IiCiAgImdyb3VwX2ludGVydmFsIjogIjVtIgogICJncm91cF93YWl0IjogIjMwcyIKICAicmVjZWl2ZXIiOiAibnVsbCIKICAicmVwZWF0X2ludGVydmFsIjogIjEyaCIKICAicm91dGVzIjogCiAgLSAibWF0Y2giOiAKICAgICAgImFsZXJ0bmFtZSI6ICJEZWFkTWFuc1N3aXRjaCIKICAgICJyZWNlaXZlciI6ICJudWxsIg==
|
||||
alertmanager.yaml: Imdsb2JhbCI6IAogICJyZXNvbHZlX3RpbWVvdXQiOiAiNW0iCiJyZWNlaXZlcnMiOiAKLSAibmFtZSI6ICJudWxsIgoicm91dGUiOiAKICAiZ3JvdXBfYnkiOiAKICAtICJqb2IiCiAgImdyb3VwX2ludGVydmFsIjogIjVtIgogICJncm91cF93YWl0IjogIjMwcyIKICAicmVjZWl2ZXIiOiAibnVsbCIKICAicmVwZWF0X2ludGVydmFsIjogIjEyaCIKICAicm91dGVzIjogCiAgLSAibWF0Y2giOiAKICAgICAgImFsZXJ0bmFtZSI6ICJXYXRjaGRvZyIKICAgICJyZWNlaXZlciI6ICJudWxsIg==
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: alertmanager-main
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -71,7 +71,7 @@ spec:
|
||||
- --extra-cpu=2m
|
||||
- --memory=150Mi
|
||||
- --extra-memory=30Mi
|
||||
- --acceptance-offset=5
|
||||
- --threshold=5
|
||||
- --deployment=kube-state-metrics
|
||||
env:
|
||||
- name: MY_POD_NAME
|
||||
@@ -84,7 +84,7 @@ spec:
|
||||
fieldRef:
|
||||
apiVersion: v1
|
||||
fieldPath: metadata.namespace
|
||||
image: gcr.io/google-containers/addon-resizer-amd64:2.1
|
||||
image: k8s.gcr.io/addon-resizer:1.8.4
|
||||
name: addon-resizer
|
||||
resources:
|
||||
limits:
|
||||
|
@@ -225,21 +225,21 @@ spec:
|
||||
)
|
||||
record: node:node_memory_swap_io_bytes:sum_rate
|
||||
- expr: |
|
||||
avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]))
|
||||
avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]))
|
||||
record: :node_disk_utilisation:avg_irate
|
||||
- expr: |
|
||||
avg by (node) (
|
||||
irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m])
|
||||
irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])
|
||||
* on (namespace, pod) group_left(node)
|
||||
node_namespace_pod:kube_pod_info:
|
||||
)
|
||||
record: node:node_disk_utilisation:avg_irate
|
||||
- expr: |
|
||||
avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]) / 1e3)
|
||||
avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) / 1e3)
|
||||
record: :node_disk_saturation:avg_irate
|
||||
- expr: |
|
||||
avg by (node) (
|
||||
irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+"}[1m]) / 1e3
|
||||
irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) / 1e3
|
||||
* on (namespace, pod) group_left(node)
|
||||
node_namespace_pod:kube_pod_info:
|
||||
)
|
||||
@@ -769,9 +769,9 @@ spec:
|
||||
message: API server is returning errors for {{ $value }}% of requests.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
|
||||
expr: |
|
||||
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) without(instance, pod)
|
||||
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m]))
|
||||
/
|
||||
sum(rate(apiserver_request_count{job="apiserver"}[5m])) without(instance, pod) * 100 > 10
|
||||
sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 3
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
@@ -780,9 +780,33 @@ spec:
|
||||
message: API server is returning errors for {{ $value }}% of requests.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
|
||||
expr: |
|
||||
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) without(instance, pod)
|
||||
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m]))
|
||||
/
|
||||
sum(rate(apiserver_request_count{job="apiserver"}[5m])) without(instance, pod) * 100 > 5
|
||||
sum(rate(apiserver_request_count{job="apiserver"}[5m])) * 100 > 1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeAPIErrorsHigh
|
||||
annotations:
|
||||
message: API server is returning errors for {{ $value }}% of requests for
|
||||
{{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
|
||||
expr: |
|
||||
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
|
||||
/
|
||||
sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 10
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubeAPIErrorsHigh
|
||||
annotations:
|
||||
message: API server is returning errors for {{ $value }}% of requests for
|
||||
{{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
|
||||
expr: |
|
||||
sum(rate(apiserver_request_count{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
|
||||
/
|
||||
sum(rate(apiserver_request_count{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 5
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
@@ -951,7 +975,7 @@ spec:
|
||||
log (WAL).'
|
||||
summary: Prometheus write-ahead log is corrupted
|
||||
expr: |
|
||||
tsdb_wal_corruptions_total{job="prometheus-k8s",namespace="monitoring"} > 0
|
||||
prometheus_tsdb_wal_corruptions_total{job="prometheus-k8s",namespace="monitoring"} > 0
|
||||
for: 4h
|
||||
labels:
|
||||
severity: warning
|
||||
|
Reference in New Issue
Block a user