Merge remote-tracking branch 'master' into release-0.22
This commit is contained in:
20
README.md
20
README.md
@@ -369,3 +369,23 @@ The Prometheus `/targets` page will show the kubelet job with the error `403 Una
|
||||
#### Authorization problem
|
||||
|
||||
The Prometheus `/targets` page will show the kubelet job with the error `401 Unauthorized`, when token authorization is not enabled. Ensure that the `--authorization-mode=Webhook` flag is enabled on all kubelet configurations.
|
||||
|
||||
### kube-state-metrics resource usage
|
||||
|
||||
In some environments, kube-state-metrics may need additional
|
||||
resources. One driver for more resource needs, is a high number of
|
||||
namespaces. There may be others.
|
||||
|
||||
kube-state-metrics resource allocation is managed by
|
||||
[addon-resizer](https://github.com/kubernetes/autoscaler/tree/master/addon-resizer/nanny)
|
||||
You can control it's parameters by setting variables in the
|
||||
config. They default to:
|
||||
|
||||
``` jsonnet
|
||||
kubeStateMetrics+:: {
|
||||
baseCPU: '100m',
|
||||
cpuPerNode: '2m',
|
||||
baseMemory: '150Mi',
|
||||
memoryPerNode: '30Mi',
|
||||
}
|
||||
```
|
||||
|
@@ -8,6 +8,7 @@ rules:
|
||||
resources:
|
||||
- pods
|
||||
- nodes
|
||||
- nodes/stats
|
||||
- namespaces
|
||||
verbs:
|
||||
- get
|
||||
|
@@ -11,7 +11,7 @@
|
||||
summary: 'Node disk is running full within 24 hours',
|
||||
},
|
||||
expr: |||
|
||||
predict_linear(node_filesystem_free{%(nodeExporterSelector)s,mountpoint!~"^/etc/(?:resolv.conf|hosts|hostname)$"}[6h], 3600 * 24) < 0
|
||||
predict_linear(node_filesystem_free{%(nodeExporterSelector)s,mountpoint!~"^/etc/(?:resolv.conf|hosts|hostname)$"}[6h], 3600 * 24) < 0 and on(instance) up{%(nodeExporterSelector)s}
|
||||
||| % $._config,
|
||||
'for': '30m',
|
||||
labels: {
|
||||
@@ -25,7 +25,7 @@
|
||||
summary: 'Node disk is running full within 2 hours',
|
||||
},
|
||||
expr: |||
|
||||
predict_linear(node_filesystem_free{%(nodeExporterSelector)s,mountpoint!~"^/etc/(?:resolv.conf|hosts|hostname)$"}[30m], 3600 * 2) < 0
|
||||
predict_linear(node_filesystem_free{%(nodeExporterSelector)s,mountpoint!~"^/etc/(?:resolv.conf|hosts|hostname)$"}[30m], 3600 * 2) < 0 and on(instance) up{%(nodeExporterSelector)s}
|
||||
||| % $._config,
|
||||
'for': '10m',
|
||||
labels: {
|
||||
|
@@ -4,6 +4,17 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
_config+:: {
|
||||
namespace: 'default',
|
||||
|
||||
kubeStateMetrics+:: {
|
||||
collectors: '', // empty string gets a default set
|
||||
scrapeInterval: '30s',
|
||||
scrapeTimeout: '30s',
|
||||
|
||||
baseCPU: '100m',
|
||||
baseMemory: '150Mi',
|
||||
cpuPerNode: '2m',
|
||||
memoryPerNode: '30Mi',
|
||||
},
|
||||
|
||||
versions+:: {
|
||||
kubeStateMetrics: 'v1.3.1',
|
||||
kubeRbacProxy: 'v0.3.1',
|
||||
@@ -137,19 +148,19 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
'--port=8081',
|
||||
'--telemetry-host=127.0.0.1',
|
||||
'--telemetry-port=8082',
|
||||
]) +
|
||||
container.mixin.resources.withRequests({ cpu: '102m', memory: '180Mi' }) +
|
||||
container.mixin.resources.withLimits({ cpu: '102m', memory: '180Mi' });
|
||||
] + if $._config.kubeStateMetrics.collectors != '' then ['--collectors=' + $._config.kubeStateMetrics.collectors] else []) +
|
||||
container.mixin.resources.withRequests({ cpu: $._config.kubeStateMetrics.baseCPU, memory: $._config.kubeStateMetrics.baseMemory }) +
|
||||
container.mixin.resources.withLimits({ cpu: $._config.kubeStateMetrics.baseCPU, memory: $._config.kubeStateMetrics.baseMemory });
|
||||
|
||||
local addonResizer =
|
||||
container.new('addon-resizer', $._config.imageRepos.addonResizer + ':' + $._config.versions.addonResizer) +
|
||||
container.withCommand([
|
||||
'/pod_nanny',
|
||||
'--container=kube-state-metrics',
|
||||
'--cpu=100m',
|
||||
'--extra-cpu=2m',
|
||||
'--memory=150Mi',
|
||||
'--extra-memory=30Mi',
|
||||
'--cpu=' + $._config.kubeStateMetrics.baseCPU,
|
||||
'--extra-cpu=' + $._config.kubeStateMetrics.cpuPerNode,
|
||||
'--memory=' + $._config.kubeStateMetrics.baseMemory,
|
||||
'--extra-memory=' + $._config.kubeStateMetrics.memoryPerNode,
|
||||
'--threshold=5',
|
||||
'--deployment=kube-state-metrics',
|
||||
]) +
|
||||
@@ -258,7 +269,8 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
{
|
||||
port: 'https-main',
|
||||
scheme: 'https',
|
||||
interval: '30s',
|
||||
interval: $._config.kubeStateMetrics.scrapeInterval,
|
||||
scrapeTimeout: $._config.kubeStateMetrics.scrapeTimeout,
|
||||
honorLabels: true,
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
tlsConfig: {
|
||||
|
@@ -23,8 +23,8 @@ spec:
|
||||
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
|
||||
type: string
|
||||
spec:
|
||||
description: 'Specification of the desired behavior of the Alertmanager
|
||||
cluster. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status'
|
||||
description: 'AlertmanagerSpec is a specification of the desired behavior
|
||||
of the Alertmanager cluster. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status'
|
||||
properties:
|
||||
affinity:
|
||||
description: Affinity is a group of affinity scheduling rules.
|
||||
@@ -2372,9 +2372,9 @@ spec:
|
||||
description: Version the cluster should be on.
|
||||
type: string
|
||||
status:
|
||||
description: 'Most recent observed status of the Alertmanager cluster. Read-only.
|
||||
Not included when requesting from the apiserver, only from the Prometheus
|
||||
Operator API itself. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status'
|
||||
description: 'AlertmanagerStatus is the most recent observed status of the
|
||||
Alertmanager cluster. Read-only. Not included when requesting from the
|
||||
apiserver, only from the Prometheus Operator API itself. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status'
|
||||
properties:
|
||||
availableReplicas:
|
||||
description: Total number of available pods (ready for at least minReadySeconds)
|
||||
|
@@ -23,8 +23,8 @@ spec:
|
||||
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
|
||||
type: string
|
||||
spec:
|
||||
description: 'Specification of the desired behavior of the Prometheus cluster.
|
||||
More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status'
|
||||
description: 'PrometheusSpec is a specification of the desired behavior
|
||||
of the Prometheus cluster. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status'
|
||||
properties:
|
||||
additionalAlertManagerConfigs:
|
||||
description: SecretKeySelector selects a key of a Secret.
|
||||
@@ -2862,7 +2862,7 @@ spec:
|
||||
description: Peers is a DNS name for Thanos to discover peers through.
|
||||
type: string
|
||||
s3:
|
||||
description: ThanosSpec defines parameters for of AWS Simple Storage
|
||||
description: ThanosS3Spec defines parameters for of AWS Simple Storage
|
||||
Service (S3) with Thanos. (S3 compatible services apply as well)
|
||||
properties:
|
||||
accessKey:
|
||||
@@ -2961,9 +2961,9 @@ spec:
|
||||
description: Version of Prometheus to be deployed.
|
||||
type: string
|
||||
status:
|
||||
description: 'Most recent observed status of the Prometheus cluster. Read-only.
|
||||
Not included when requesting from the apiserver, only from the Prometheus
|
||||
Operator API itself. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status'
|
||||
description: 'PrometheusStatus is the most recent observed status of the
|
||||
Prometheus cluster. Read-only. Not included when requesting from the apiserver,
|
||||
only from the Prometheus Operator API itself. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status'
|
||||
properties:
|
||||
availableReplicas:
|
||||
description: Total number of available pods (ready for at least minReadySeconds)
|
||||
|
@@ -169,7 +169,7 @@ spec:
|
||||
description: The label to use to retrieve the job name from.
|
||||
type: string
|
||||
namespaceSelector:
|
||||
description: A selector for selecting namespaces either selecting all
|
||||
description: NamespaceSelector is a selector for selecting either all
|
||||
namespaces or a list of namespaces.
|
||||
properties:
|
||||
any:
|
||||
|
@@ -18,6 +18,7 @@ spec:
|
||||
containers:
|
||||
- args:
|
||||
- --kubelet-service=kube-system/kubelet
|
||||
- -logtostderr=true
|
||||
- --config-reloader-image=quay.io/coreos/configmap-reload:v0.0.1
|
||||
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.22.2
|
||||
image: quay.io/coreos/prometheus-operator:v0.22.2
|
||||
@@ -28,10 +29,13 @@ spec:
|
||||
resources:
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 100Mi
|
||||
memory: 200Mi
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 50Mi
|
||||
memory: 100Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
nodeSelector:
|
||||
beta.kubernetes.io/os: linux
|
||||
securityContext:
|
||||
|
@@ -55,11 +55,11 @@ spec:
|
||||
name: kube-state-metrics
|
||||
resources:
|
||||
limits:
|
||||
cpu: 102m
|
||||
memory: 180Mi
|
||||
cpu: 100m
|
||||
memory: 150Mi
|
||||
requests:
|
||||
cpu: 102m
|
||||
memory: 180Mi
|
||||
cpu: 100m
|
||||
memory: 150Mi
|
||||
- command:
|
||||
- /pod_nanny
|
||||
- --container=kube-state-metrics
|
||||
|
@@ -12,6 +12,7 @@ spec:
|
||||
interval: 30s
|
||||
port: https-main
|
||||
scheme: https
|
||||
scrapeTimeout: 30s
|
||||
tlsConfig:
|
||||
insecureSkipVerify: true
|
||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
|
@@ -744,7 +744,7 @@ spec:
|
||||
full within the next 24 hours (mounted at {{$labels.mountpoint}})
|
||||
summary: Node disk is running full within 24 hours
|
||||
expr: |
|
||||
predict_linear(node_filesystem_free{job="node-exporter",mountpoint!~"^/etc/(?:resolv.conf|hosts|hostname)$"}[6h], 3600 * 24) < 0
|
||||
predict_linear(node_filesystem_free{job="node-exporter",mountpoint!~"^/etc/(?:resolv.conf|hosts|hostname)$"}[6h], 3600 * 24) < 0 and on(instance) up{job="node-exporter"}
|
||||
for: 30m
|
||||
labels:
|
||||
severity: warning
|
||||
@@ -754,7 +754,7 @@ spec:
|
||||
full within the next 2 hours (mounted at {{$labels.mountpoint}})
|
||||
summary: Node disk is running full within 2 hours
|
||||
expr: |
|
||||
predict_linear(node_filesystem_free{job="node-exporter",mountpoint!~"^/etc/(?:resolv.conf|hosts|hostname)$"}[30m], 3600 * 2) < 0
|
||||
predict_linear(node_filesystem_free{job="node-exporter",mountpoint!~"^/etc/(?:resolv.conf|hosts|hostname)$"}[30m], 3600 * 2) < 0 and on(instance) up{job="node-exporter"}
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
|
Reference in New Issue
Block a user