From b6e7d708c5822baa1aa713575c568e2279941390 Mon Sep 17 00:00:00 2001 From: seph Date: Fri, 13 Jul 2018 11:48:27 -0400 Subject: [PATCH 1/7] Configure kube-state-metrics As I work with kube-state-metrics in a large cluster, I found I needed to make some adjustments. - Expose the collectors, allowing one to configure exclusions. - Expose the addon_resizer parameters, facilitating reproduce adjustments - Allow adjusting scrapeTimeout and scrapeInterval --- README.md | 21 ++++++++++++ .../kube-state-metrics.libsonnet | 33 ++++++++++++++----- 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index bf61d9ae..a9a42e54 100644 --- a/README.md +++ b/README.md @@ -369,3 +369,24 @@ The Prometheus `/targets` page will show the kubelet job with the error `403 Una #### Authorization problem The Prometheus `/targets` page will show the kubelet job with the error `401 Unauthorized`, when token authorization is not enabled. Ensure that the `--authorization-mode=Webhook` flag is enabled on all kubelet configurations. +### kube-state-metrics resource usaged + +In some environments, kube-state-metrics may need additional +resources. One driver for more resource needs, is a high number of +namespaces. There may be others. + +kube-state-metrics has it's resources using an +[addon-resizer](https://github.com/kubernetes/autoscaler/tree/master/addon-resizer/nanny) +You can control it's parameters by setting variables in the +config. They default to: + +``` jsonnet + resizer+:: { + kubeStateMetrics+:: { + cpu: '100m', + extraCpu: '2m', + memory: '150Mi', + extraMemory: '30Mi', + }, + } +``` diff --git a/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet b/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet index c36f293b..f9065282 100644 --- a/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet +++ b/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet @@ -4,6 +4,22 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; _config+:: { namespace: 'default', + kubeStateMetrics+:: { + // when this is an empty string, you get the default set + collectors: '', + scrapeTimeout: '', + scrapeInterval: '30s', + }, + + resizer+:: { + kubeStateMetrics+:: { + cpu: '100m', + extraCpu: '2m', + memory: '150Mi', + extraMemory: '30Mi', + }, + }, + versions+:: { kubeStateMetrics: 'v1.3.1', kubeRbacProxy: 'v0.3.1', @@ -137,19 +153,20 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; '--port=8081', '--telemetry-host=127.0.0.1', '--telemetry-port=8082', + '--collectors=' + $._config.kubeStateMetrics.collectors, ]) + - container.mixin.resources.withRequests({ cpu: '102m', memory: '180Mi' }) + - container.mixin.resources.withLimits({ cpu: '102m', memory: '180Mi' }); + container.mixin.resources.withRequests({ cpu: $._config.resizer.kubeStateMetrics.cpu, memory: $._config.resizer.kubeStateMetrics.memory }) + + container.mixin.resources.withLimits({ cpu: $._config.resizer.kubeStateMetrics.cpu, memory: $._config.resizer.kubeStateMetrics.memory }); local addonResizer = container.new('addon-resizer', $._config.imageRepos.addonResizer + ':' + $._config.versions.addonResizer) + container.withCommand([ '/pod_nanny', '--container=kube-state-metrics', - '--cpu=100m', - '--extra-cpu=2m', - '--memory=150Mi', - '--extra-memory=30Mi', + '--cpu=' + $._config.resizer.kubeStateMetrics.cpu, + '--extra-cpu=' + $._config.resizer.kubeStateMetrics.extraCpu, + '--memory=' + $._config.resizer.kubeStateMetrics.memory, + '--extra-memory=' + $._config.resizer.kubeStateMetrics.extraMemory, '--threshold=5', '--deployment=kube-state-metrics', ]) + @@ -258,13 +275,13 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; { port: 'https-main', scheme: 'https', - interval: '30s', + interval: $._config.kubeStateMetrics.scrapeInterval, honorLabels: true, bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', tlsConfig: { insecureSkipVerify: true, }, - }, + } + if $._config.kubeStateMetrics.scrapeTimeout != '' then { scrapeTimeout: $._config.kubeStateMetrics.scrapeTimeout } else {}, { port: 'https-self', scheme: 'https', From dabfca595bed9142a1850dca958a7e18037a18b2 Mon Sep 17 00:00:00 2001 From: Max Inden Date: Tue, 17 Jul 2018 15:10:38 +0200 Subject: [PATCH 2/7] Makefile: Properly rebuild po-docgen on src changes (#1625) --- manifests/grafana-dashboardDefinitions.yaml | 34 +++++++++++++++++++-- manifests/prometheus-rules.yaml | 2 +- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index a0dba292..1143970e 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -2501,6 +2501,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "A", "step": 10 }, { @@ -2509,6 +2510,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "B", "step": 10 }, { @@ -2517,6 +2519,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "C", "step": 10 }, { @@ -2525,6 +2528,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "D", "step": 10 }, { @@ -2533,6 +2537,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "E", "step": 10 } ], @@ -2861,6 +2866,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "A", "step": 10 }, { @@ -2869,6 +2875,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "B", "step": 10 }, { @@ -2877,6 +2884,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "C", "step": 10 }, { @@ -2885,6 +2893,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "D", "step": 10 }, { @@ -2893,6 +2902,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "E", "step": 10 } ], @@ -3303,6 +3313,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "A", "step": 10 }, { @@ -3311,6 +3322,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "B", "step": 10 }, { @@ -3319,6 +3331,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "C", "step": 10 }, { @@ -3327,6 +3340,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "D", "step": 10 }, { @@ -3335,6 +3349,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "E", "step": 10 } ], @@ -3663,6 +3678,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "A", "step": 10 }, { @@ -3671,6 +3687,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "B", "step": 10 }, { @@ -3679,6 +3696,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "C", "step": 10 }, { @@ -3687,6 +3705,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "D", "step": 10 }, { @@ -3695,6 +3714,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "E", "step": 10 } ], @@ -4132,6 +4152,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "A", "step": 10 }, { @@ -4140,6 +4161,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "B", "step": 10 }, { @@ -4148,6 +4170,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "C", "step": 10 }, { @@ -4156,6 +4179,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "D", "step": 10 }, { @@ -4164,6 +4188,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "E", "step": 10 } ], @@ -4492,6 +4517,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "A", "step": 10 }, { @@ -4500,6 +4526,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "B", "step": 10 }, { @@ -4508,6 +4535,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "C", "step": 10 }, { @@ -4516,6 +4544,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "D", "step": 10 }, { @@ -4524,6 +4553,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "E", "step": 10 } ], @@ -5696,14 +5726,14 @@ items: "refId": "A" }, { - "expr": "sum by(container) (kube_pod_container_resource_requests_memory_bytes{job=\"kubelet\", namespace=\"$namespace\", pod=\"$pod\", container=\u007e\"$container\", container!=\"POD\"})", + "expr": "sum by(container) (kube_pod_container_resource_requests_memory_bytes{job=\"kube-state-metrics\", namespace=\"$namespace\", pod=\"$pod\", container=\u007e\"$container\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "Requested: {{ container }}", "refId": "B" }, { - "expr": "sum by(container) (kube_pod_container_resource_limits_memory_bytes{job=\"kubelet\", namespace=\"$namespace\", pod=\"$pod\", container=\u007e\"$container\", container!=\"POD\"})", + "expr": "sum by(container) (kube_pod_container_resource_limits_memory_bytes{job=\"kube-state-metrics\", namespace=\"$namespace\", pod=\"$pod\", container=\u007e\"$container\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "Limit: {{ container }}", diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index 75d5f36e..49c4a995 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -388,7 +388,7 @@ spec: kube_deployment_spec_replicas{job="kube-state-metrics"} != kube_deployment_status_replicas_available{job="kube-state-metrics"} - for: 15m + for: 1h labels: severity: critical - alert: KubeStatefulSetReplicasMismatch From efe686c0c09efd920ff6d60461842a8a1e2c48d9 Mon Sep 17 00:00:00 2001 From: Max Inden Date: Tue, 17 Jul 2018 15:11:46 +0200 Subject: [PATCH 3/7] security: Enforce nobody user and read only / (#1393) * Make the Prometheus Operator Docker image run as `nobody` by default. * Disallow privilege escalation via K8s * Enforce read only root filesystem --- manifests/0prometheus-operator-deployment.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/manifests/0prometheus-operator-deployment.yaml b/manifests/0prometheus-operator-deployment.yaml index faca5a84..5a193a35 100644 --- a/manifests/0prometheus-operator-deployment.yaml +++ b/manifests/0prometheus-operator-deployment.yaml @@ -18,6 +18,7 @@ spec: containers: - args: - --kubelet-service=kube-system/kubelet + - -logtostderr=true - --config-reloader-image=quay.io/coreos/configmap-reload:v0.0.1 - --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.22.0 image: quay.io/coreos/prometheus-operator:v0.22.0 @@ -32,6 +33,9 @@ spec: requests: cpu: 100m memory: 50Mi + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true nodeSelector: beta.kubernetes.io/os: linux securityContext: From 358c8477eae9429cf5a8b889c72176ca95caee6d Mon Sep 17 00:00:00 2001 From: seph Date: Tue, 17 Jul 2018 09:52:30 -0400 Subject: [PATCH 4/7] Resource config now in config.kubeStateMetrics As requested, this updates the resource specification to live directly in config.kubeStateMetrics It also clarifies the config variables. These names are what google uses in some of their tooling. (And a slight tweak to the way collectors are specified) --- README.md | 17 +++++----- .../kube-state-metrics.libsonnet | 32 ++++++++----------- 2 files changed, 21 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index a9a42e54..67cbd4c1 100644 --- a/README.md +++ b/README.md @@ -369,24 +369,23 @@ The Prometheus `/targets` page will show the kubelet job with the error `403 Una #### Authorization problem The Prometheus `/targets` page will show the kubelet job with the error `401 Unauthorized`, when token authorization is not enabled. Ensure that the `--authorization-mode=Webhook` flag is enabled on all kubelet configurations. -### kube-state-metrics resource usaged + +### kube-state-metrics resource usage In some environments, kube-state-metrics may need additional resources. One driver for more resource needs, is a high number of namespaces. There may be others. -kube-state-metrics has it's resources using an +kube-state-metrics resource allocation is managed by [addon-resizer](https://github.com/kubernetes/autoscaler/tree/master/addon-resizer/nanny) You can control it's parameters by setting variables in the config. They default to: ``` jsonnet - resizer+:: { - kubeStateMetrics+:: { - cpu: '100m', - extraCpu: '2m', - memory: '150Mi', - extraMemory: '30Mi', - }, + kubeStateMetrics+:: { + baseCPU: '100m', + cpuPerNode: '2m', + baseMemory: '150Mi', + memoryPerNode: '30Mi', } ``` diff --git a/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet b/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet index f9065282..59c0104a 100644 --- a/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet +++ b/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet @@ -5,19 +5,14 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; namespace: 'default', kubeStateMetrics+:: { - // when this is an empty string, you get the default set - collectors: '', - scrapeTimeout: '', + collectors: '', // empty string gets a default set scrapeInterval: '30s', - }, + scrapeTimeout: '', - resizer+:: { - kubeStateMetrics+:: { - cpu: '100m', - extraCpu: '2m', - memory: '150Mi', - extraMemory: '30Mi', - }, + baseCPU: '100m', + baseMemory: '150Mi', + cpuPerNode: '2m', + memoryPerNode: '30Mi', }, versions+:: { @@ -153,20 +148,19 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; '--port=8081', '--telemetry-host=127.0.0.1', '--telemetry-port=8082', - '--collectors=' + $._config.kubeStateMetrics.collectors, - ]) + - container.mixin.resources.withRequests({ cpu: $._config.resizer.kubeStateMetrics.cpu, memory: $._config.resizer.kubeStateMetrics.memory }) + - container.mixin.resources.withLimits({ cpu: $._config.resizer.kubeStateMetrics.cpu, memory: $._config.resizer.kubeStateMetrics.memory }); + ] + if $._config.kubeStateMetrics.collectors != '' then ['--collectors=' + $._config.kubeStateMetrics.collectors] else []) + + container.mixin.resources.withRequests({ cpu: $._config.kubeStateMetrics.baseCPU, memory: $._config.kubeStateMetrics.baseMemory }) + + container.mixin.resources.withLimits({ cpu: $._config.kubeStateMetrics.baseCPU, memory: $._config.kubeStateMetrics.baseMemory }); local addonResizer = container.new('addon-resizer', $._config.imageRepos.addonResizer + ':' + $._config.versions.addonResizer) + container.withCommand([ '/pod_nanny', '--container=kube-state-metrics', - '--cpu=' + $._config.resizer.kubeStateMetrics.cpu, - '--extra-cpu=' + $._config.resizer.kubeStateMetrics.extraCpu, - '--memory=' + $._config.resizer.kubeStateMetrics.memory, - '--extra-memory=' + $._config.resizer.kubeStateMetrics.extraMemory, + '--cpu=' + $._config.kubeStateMetrics.baseCPU, + '--extra-cpu=' + $._config.kubeStateMetrics.cpuPerNode, + '--memory=' + $._config.kubeStateMetrics.baseMemory, + '--extra-memory=' + $._config.kubeStateMetrics.memoryPerNode, '--threshold=5', '--deployment=kube-state-metrics', ]) + From 596b8697d005fa388515a675346ca054f29f17e2 Mon Sep 17 00:00:00 2001 From: seph Date: Tue, 17 Jul 2018 10:13:18 -0400 Subject: [PATCH 5/7] Set default scrape values We default to a 30s scrapeInterval, we may as well also set scrapeTimeout to the same. --- .../kube-state-metrics/kube-state-metrics.libsonnet | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet b/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet index 59c0104a..2805fc9d 100644 --- a/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet +++ b/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet @@ -7,7 +7,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; kubeStateMetrics+:: { collectors: '', // empty string gets a default set scrapeInterval: '30s', - scrapeTimeout: '', + scrapeTimeout: '30s', baseCPU: '100m', baseMemory: '150Mi', @@ -270,12 +270,13 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; port: 'https-main', scheme: 'https', interval: $._config.kubeStateMetrics.scrapeInterval, + scrapeTimeout: $._config.kubeStateMetrics.scrapeTimeout, honorLabels: true, bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token', tlsConfig: { insecureSkipVerify: true, }, - } + if $._config.kubeStateMetrics.scrapeTimeout != '' then { scrapeTimeout: $._config.kubeStateMetrics.scrapeTimeout } else {}, + }, { port: 'https-self', scheme: 'https', From 04cf9ce35a6dd66006d1a58d2b7d720bf55efcef Mon Sep 17 00:00:00 2001 From: Frederic Branczyk Date: Tue, 17 Jul 2018 19:49:42 +0200 Subject: [PATCH 6/7] *: Re-generate --- manifests/grafana-dashboardDefinitions.yaml | 34 +++++++++++++++++-- manifests/kube-state-metrics-deployment.yaml | 8 ++--- .../kube-state-metrics-serviceMonitor.yaml | 1 + manifests/prometheus-rules.yaml | 2 +- 4 files changed, 38 insertions(+), 7 deletions(-) diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index a0dba292..1143970e 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -2501,6 +2501,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "A", "step": 10 }, { @@ -2509,6 +2510,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "B", "step": 10 }, { @@ -2517,6 +2519,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "C", "step": 10 }, { @@ -2525,6 +2528,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "D", "step": 10 }, { @@ -2533,6 +2537,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "E", "step": 10 } ], @@ -2861,6 +2866,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "A", "step": 10 }, { @@ -2869,6 +2875,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "B", "step": 10 }, { @@ -2877,6 +2884,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "C", "step": 10 }, { @@ -2885,6 +2893,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "D", "step": 10 }, { @@ -2893,6 +2902,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "E", "step": 10 } ], @@ -3303,6 +3313,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "A", "step": 10 }, { @@ -3311,6 +3322,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "B", "step": 10 }, { @@ -3319,6 +3331,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "C", "step": 10 }, { @@ -3327,6 +3340,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "D", "step": 10 }, { @@ -3335,6 +3349,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "E", "step": 10 } ], @@ -3663,6 +3678,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "A", "step": 10 }, { @@ -3671,6 +3687,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "B", "step": 10 }, { @@ -3679,6 +3696,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "C", "step": 10 }, { @@ -3687,6 +3705,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "D", "step": 10 }, { @@ -3695,6 +3714,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "E", "step": 10 } ], @@ -4132,6 +4152,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "A", "step": 10 }, { @@ -4140,6 +4161,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "B", "step": 10 }, { @@ -4148,6 +4170,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "C", "step": 10 }, { @@ -4156,6 +4179,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "D", "step": 10 }, { @@ -4164,6 +4188,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "E", "step": 10 } ], @@ -4492,6 +4517,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "A", "step": 10 }, { @@ -4500,6 +4526,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "B", "step": 10 }, { @@ -4508,6 +4535,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "C", "step": 10 }, { @@ -4516,6 +4544,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "D", "step": 10 }, { @@ -4524,6 +4553,7 @@ items: "instant": true, "intervalFactor": 2, "legendFormat": "", + "refId": "E", "step": 10 } ], @@ -5696,14 +5726,14 @@ items: "refId": "A" }, { - "expr": "sum by(container) (kube_pod_container_resource_requests_memory_bytes{job=\"kubelet\", namespace=\"$namespace\", pod=\"$pod\", container=\u007e\"$container\", container!=\"POD\"})", + "expr": "sum by(container) (kube_pod_container_resource_requests_memory_bytes{job=\"kube-state-metrics\", namespace=\"$namespace\", pod=\"$pod\", container=\u007e\"$container\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "Requested: {{ container }}", "refId": "B" }, { - "expr": "sum by(container) (kube_pod_container_resource_limits_memory_bytes{job=\"kubelet\", namespace=\"$namespace\", pod=\"$pod\", container=\u007e\"$container\", container!=\"POD\"})", + "expr": "sum by(container) (kube_pod_container_resource_limits_memory_bytes{job=\"kube-state-metrics\", namespace=\"$namespace\", pod=\"$pod\", container=\u007e\"$container\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "Limit: {{ container }}", diff --git a/manifests/kube-state-metrics-deployment.yaml b/manifests/kube-state-metrics-deployment.yaml index c7bb25c6..065c87a9 100644 --- a/manifests/kube-state-metrics-deployment.yaml +++ b/manifests/kube-state-metrics-deployment.yaml @@ -55,11 +55,11 @@ spec: name: kube-state-metrics resources: limits: - cpu: 102m - memory: 180Mi + cpu: 100m + memory: 150Mi requests: - cpu: 102m - memory: 180Mi + cpu: 100m + memory: 150Mi - command: - /pod_nanny - --container=kube-state-metrics diff --git a/manifests/kube-state-metrics-serviceMonitor.yaml b/manifests/kube-state-metrics-serviceMonitor.yaml index 3d1073ad..2100449d 100644 --- a/manifests/kube-state-metrics-serviceMonitor.yaml +++ b/manifests/kube-state-metrics-serviceMonitor.yaml @@ -12,6 +12,7 @@ spec: interval: 30s port: https-main scheme: https + scrapeTimeout: 30s tlsConfig: insecureSkipVerify: true - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index 75d5f36e..49c4a995 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -388,7 +388,7 @@ spec: kube_deployment_spec_replicas{job="kube-state-metrics"} != kube_deployment_status_replicas_available{job="kube-state-metrics"} - for: 15m + for: 1h labels: severity: critical - alert: KubeStatefulSetReplicasMismatch From ade7b88d654d698def489e860c5e36d522db6c44 Mon Sep 17 00:00:00 2001 From: Frederic Branczyk Date: Wed, 18 Jul 2018 10:25:09 +0200 Subject: [PATCH 7/7] Update jsonnet dependencies --- manifests/grafana-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manifests/grafana-deployment.yaml b/manifests/grafana-deployment.yaml index 4b00b004..cb8cc9d8 100644 --- a/manifests/grafana-deployment.yaml +++ b/manifests/grafana-deployment.yaml @@ -16,7 +16,7 @@ spec: app: grafana spec: containers: - - image: grafana/grafana:5.1.0 + - image: grafana/grafana:5.2.1 name: grafana ports: - containerPort: 3000