Merge remote-tracking branch 'master' into release-0.22

2018-07-24 17:25:40 +02:00
parent 8edd622f2e e47243b413
commit b7a9751f09
11 changed files with 68 additions and 30 deletions
--- a/README.md
+++ b/README.md
@@ -369,3 +369,23 @@ The Prometheus `/targets` page will show the kubelet job with the error `403 Una
 #### Authorization problem

 The Prometheus `/targets` page will show the kubelet job with the error `401 Unauthorized`, when token authorization is not enabled. Ensure that the `--authorization-mode=Webhook` flag is enabled on all kubelet configurations.
+
+### kube-state-metrics resource usage
+
+In some environments, kube-state-metrics may need additional
+resources. One driver for more resource needs, is a high number of
+namespaces. There may be others.
+
+kube-state-metrics resource allocation is managed by
+[addon-resizer](https://github.com/kubernetes/autoscaler/tree/master/addon-resizer/nanny)
+You can control it's parameters by setting variables in the
+config. They default to:
+
+``` jsonnet
+    kubeStateMetrics+:: {
+      baseCPU: '100m',
+      cpuPerNode: '2m',
+      baseMemory: '150Mi',
+      memoryPerNode: '30Mi',
+    }
+```
--- a/experimental/metrics-server/metrics-server-cluster-role.yaml
+++ b/experimental/metrics-server/metrics-server-cluster-role.yaml
@@ -8,6 +8,7 @@ rules:
  resources:
  - pods
  - nodes
+  - nodes/stats
  - namespaces
  verbs:
  - get
--- a/jsonnet/kube-prometheus/alerts/node.libsonnet
+++ b/jsonnet/kube-prometheus/alerts/node.libsonnet
@@ -11,7 +11,7 @@
              summary: 'Node disk is running full within 24 hours',
            },
            expr: |||
-              predict_linear(node_filesystem_free{%(nodeExporterSelector)s,mountpoint!~"^/etc/(?:resolv.conf|hosts|hostname)$"}[6h], 3600 * 24) < 0
+              predict_linear(node_filesystem_free{%(nodeExporterSelector)s,mountpoint!~"^/etc/(?:resolv.conf|hosts|hostname)$"}[6h], 3600 * 24) < 0 and on(instance) up{%(nodeExporterSelector)s}
            ||| % $._config,
            'for': '30m',
            labels: {
@@ -25,7 +25,7 @@
              summary: 'Node disk is running full within 2 hours',
            },
            expr: |||
-              predict_linear(node_filesystem_free{%(nodeExporterSelector)s,mountpoint!~"^/etc/(?:resolv.conf|hosts|hostname)$"}[30m], 3600 * 2) < 0
+              predict_linear(node_filesystem_free{%(nodeExporterSelector)s,mountpoint!~"^/etc/(?:resolv.conf|hosts|hostname)$"}[30m], 3600 * 2) < 0 and on(instance) up{%(nodeExporterSelector)s}
            ||| % $._config,
            'for': '10m',
            labels: {
--- a/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet
+++ b/jsonnet/kube-prometheus/kube-state-metrics/kube-state-metrics.libsonnet
@@ -4,6 +4,17 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
  _config+:: {
    namespace: 'default',

+    kubeStateMetrics+:: {
+      collectors: '',  // empty string gets a default set
+      scrapeInterval: '30s',
+      scrapeTimeout: '30s',
+
+      baseCPU: '100m',
+      baseMemory: '150Mi',
+      cpuPerNode: '2m',
+      memoryPerNode: '30Mi',
+    },
+
    versions+:: {
      kubeStateMetrics: 'v1.3.1',
      kubeRbacProxy: 'v0.3.1',
@@ -137,19 +148,19 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
          '--port=8081',
          '--telemetry-host=127.0.0.1',
          '--telemetry-port=8082',
-        ]) +
-        container.mixin.resources.withRequests({ cpu: '102m', memory: '180Mi' }) +
-        container.mixin.resources.withLimits({ cpu: '102m', memory: '180Mi' });
+        ] + if $._config.kubeStateMetrics.collectors != '' then ['--collectors=' + $._config.kubeStateMetrics.collectors] else []) +
+        container.mixin.resources.withRequests({ cpu: $._config.kubeStateMetrics.baseCPU, memory: $._config.kubeStateMetrics.baseMemory }) +
+        container.mixin.resources.withLimits({ cpu: $._config.kubeStateMetrics.baseCPU, memory: $._config.kubeStateMetrics.baseMemory });

      local addonResizer =
        container.new('addon-resizer', $._config.imageRepos.addonResizer + ':' + $._config.versions.addonResizer) +
        container.withCommand([
          '/pod_nanny',
          '--container=kube-state-metrics',
-          '--cpu=100m',
-          '--extra-cpu=2m',
-          '--memory=150Mi',
-          '--extra-memory=30Mi',
+          '--cpu=' + $._config.kubeStateMetrics.baseCPU,
+          '--extra-cpu=' + $._config.kubeStateMetrics.cpuPerNode,
+          '--memory=' + $._config.kubeStateMetrics.baseMemory,
+          '--extra-memory=' + $._config.kubeStateMetrics.memoryPerNode,
          '--threshold=5',
          '--deployment=kube-state-metrics',
        ]) +
@@ -258,7 +269,8 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
            {
              port: 'https-main',
              scheme: 'https',
-              interval: '30s',
+              interval: $._config.kubeStateMetrics.scrapeInterval,
+              scrapeTimeout: $._config.kubeStateMetrics.scrapeTimeout,
              honorLabels: true,
              bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
              tlsConfig: {
--- a/manifests/0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml
+++ b/manifests/0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml
@@ -23,8 +23,8 @@ spec:
            submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
          type: string
        spec:
-          description: 'Specification of the desired behavior of the Alertmanager
-            cluster. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status'
+          description: 'AlertmanagerSpec is a specification of the desired behavior
+            of the Alertmanager cluster. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status'
          properties:
            affinity:
              description: Affinity is a group of affinity scheduling rules.
@@ -2372,9 +2372,9 @@ spec:
              description: Version the cluster should be on.
              type: string
        status:
-          description: 'Most recent observed status of the Alertmanager cluster. Read-only.
-            Not included when requesting from the apiserver, only from the Prometheus
-            Operator API itself. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status'
+          description: 'AlertmanagerStatus is the most recent observed status of the
+            Alertmanager cluster. Read-only. Not included when requesting from the
+            apiserver, only from the Prometheus Operator API itself. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status'
          properties:
            availableReplicas:
              description: Total number of available pods (ready for at least minReadySeconds)
--- a/manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml
+++ b/manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml
@@ -23,8 +23,8 @@ spec:
            submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
          type: string
        spec:
-          description: 'Specification of the desired behavior of the Prometheus cluster.
-            More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status'
+          description: 'PrometheusSpec is a specification of the desired behavior
+            of the Prometheus cluster. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status'
          properties:
            additionalAlertManagerConfigs:
              description: SecretKeySelector selects a key of a Secret.
@@ -2862,7 +2862,7 @@ spec:
                  description: Peers is a DNS name for Thanos to discover peers through.
                  type: string
                s3:
-                  description: ThanosSpec defines parameters for of AWS Simple Storage
+                  description: ThanosS3Spec defines parameters for of AWS Simple Storage
                    Service (S3) with Thanos. (S3 compatible services apply as well)
                  properties:
                    accessKey:
@@ -2961,9 +2961,9 @@ spec:
              description: Version of Prometheus to be deployed.
              type: string
        status:
-          description: 'Most recent observed status of the Prometheus cluster. Read-only.
-            Not included when requesting from the apiserver, only from the Prometheus
-            Operator API itself. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status'
+          description: 'PrometheusStatus is the most recent observed status of the
+            Prometheus cluster. Read-only. Not included when requesting from the apiserver,
+            only from the Prometheus Operator API itself. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status'
          properties:
            availableReplicas:
              description: Total number of available pods (ready for at least minReadySeconds)
--- a/manifests/0prometheus-operator-0servicemonitorCustomResourceDefinition.yaml
+++ b/manifests/0prometheus-operator-0servicemonitorCustomResourceDefinition.yaml
@@ -169,7 +169,7 @@ spec:
              description: The label to use to retrieve the job name from.
              type: string
            namespaceSelector:
-              description: A selector for selecting namespaces either selecting all
+              description: NamespaceSelector is a selector for selecting either all
                namespaces or a list of namespaces.
              properties:
                any:
--- a/manifests/0prometheus-operator-deployment.yaml
+++ b/manifests/0prometheus-operator-deployment.yaml
@@ -18,6 +18,7 @@ spec:
      containers:
      - args:
        - --kubelet-service=kube-system/kubelet
+        - -logtostderr=true
        - --config-reloader-image=quay.io/coreos/configmap-reload:v0.0.1
        - --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.22.2
        image: quay.io/coreos/prometheus-operator:v0.22.2
@@ -28,10 +29,13 @@ spec:
        resources:
          limits:
            cpu: 200m
-            memory: 100Mi
+            memory: 200Mi
          requests:
            cpu: 100m
-            memory: 50Mi
+            memory: 100Mi
+        securityContext:
+          allowPrivilegeEscalation: false
+          readOnlyRootFilesystem: true
      nodeSelector:
        beta.kubernetes.io/os: linux
      securityContext:
--- a/manifests/kube-state-metrics-deployment.yaml
+++ b/manifests/kube-state-metrics-deployment.yaml
@@ -55,11 +55,11 @@ spec:
        name: kube-state-metrics
        resources:
          limits:
-            cpu: 102m
-            memory: 180Mi
+            cpu: 100m
+            memory: 150Mi
          requests:
-            cpu: 102m
-            memory: 180Mi
+            cpu: 100m
+            memory: 150Mi
      - command:
        - /pod_nanny
        - --container=kube-state-metrics
--- a/manifests/kube-state-metrics-serviceMonitor.yaml
+++ b/manifests/kube-state-metrics-serviceMonitor.yaml
@@ -12,6 +12,7 @@ spec:
    interval: 30s
    port: https-main
    scheme: https
+    scrapeTimeout: 30s
    tlsConfig:
      insecureSkipVerify: true
  - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
--- a/manifests/prometheus-rules.yaml
+++ b/manifests/prometheus-rules.yaml
@@ -744,7 +744,7 @@ spec:
          full within the next 24 hours (mounted at {{$labels.mountpoint}})
        summary: Node disk is running full within 24 hours
      expr: |
-        predict_linear(node_filesystem_free{job="node-exporter",mountpoint!~"^/etc/(?:resolv.conf|hosts|hostname)$"}[6h], 3600 * 24) < 0
+        predict_linear(node_filesystem_free{job="node-exporter",mountpoint!~"^/etc/(?:resolv.conf|hosts|hostname)$"}[6h], 3600 * 24) < 0 and on(instance) up{job="node-exporter"}
      for: 30m
      labels:
        severity: warning
@@ -754,7 +754,7 @@ spec:
          full within the next 2 hours (mounted at {{$labels.mountpoint}})
        summary: Node disk is running full within 2 hours
      expr: |
-        predict_linear(node_filesystem_free{job="node-exporter",mountpoint!~"^/etc/(?:resolv.conf|hosts|hostname)$"}[30m], 3600 * 2) < 0
+        predict_linear(node_filesystem_free{job="node-exporter",mountpoint!~"^/etc/(?:resolv.conf|hosts|hostname)$"}[30m], 3600 * 2) < 0 and on(instance) up{job="node-exporter"}
      for: 10m
      labels:
        severity: critical