Compare commits

..

197 Commits

Author SHA1 Message Date
Lili Cosic
ee8077db04 Merge pull request #476 from dgrisonnet/ci-test-compat-04
ci: update release-0.4 e2e tests according to compat matrix
2020-04-01 19:02:30 +02:00
Damien Grisonnet
d3bee7fa1a test: increase pod polling time
The original polling time was a bit short for all pods to be up which made e2e
tests fail half of the time.

Signed-off-by: Damien Grisonnet <dgrisonn@redhat.com>
2020-03-31 18:26:56 +02:00
Damien Grisonnet
106132ac18 Makefile: pin jsonnet-ci to 0.36
Signed-off-by: Damien Grisonnet <dgrisonn@redhat.com>
2020-03-31 16:21:06 +02:00
Damien Grisonnet
8961be9639 ci: update e2e tests according to compat matrix
Signed-off-by: Damien Grisonnet <dgrisonn@redhat.com>
2020-03-31 14:38:20 +02:00
Frederic Branczyk
ae589e91ce Merge pull request #401 from s-urbaniak/up-down-0.4
[backport] jsonnet: add general rules for up/down targets
2020-01-30 16:05:49 +01:00
Sergiusz Urbaniak
8367575768 manifests: regenerate 2020-01-30 14:34:58 +01:00
Sergiusz Urbaniak
6b5033d65e jsonnet: add general rules for up/down targets 2020-01-30 14:29:40 +01:00
Paweł Krupa
68d6e611c6 Fast forward release-0.4 to master (#389)
Fast forward release-0.4 to master
2020-01-23 15:36:04 +01:00
Frederic Branczyk
f2b4528b63 Merge pull request #387 from brancz/reduce-histogram-buckets
*: Throw away unused high cardinality apiserver duration buckets
2020-01-23 15:32:18 +01:00
Krasi Georgiev
be8eb39024 re-added most collectors
Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com>
2020-01-23 15:18:59 +01:00
Krasi Georgiev
629e86e53a remove some unused collectors
Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com>
2020-01-23 15:18:59 +01:00
Frederic Branczyk
a7628e0223 Merge pull request #381 from krasi-georgiev/remove-collectors
remove some unused collectors
2020-01-23 14:50:47 +01:00
Krasi Georgiev
8984606f5d re-added most collectors
Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com>
2020-01-23 15:17:56 +02:00
Frederic Branczyk
48d95f0b9f *: Throw away unused high cardinality apiserver duration buckets 2020-01-23 13:24:42 +01:00
Frederic Branczyk
e410043b6b Merge pull request #386 from paulfantom/bump_kube-mix
Bump kubernetes-mixins
2020-01-23 12:22:40 +01:00
paulfantom
894069f24d manifests: regenerate 2020-01-23 12:01:21 +01:00
paulfantom
d074ea1427 bump kubernetes-mixins dependency 2020-01-23 12:01:10 +01:00
Frederic Branczyk
269aef6e37 Merge pull request #384 from s-urbaniak/agg
prometheus-adapter: add nodes resource to aggregated-metrics-reader
2020-01-22 09:45:38 +01:00
Sergiusz Urbaniak
90e5982de4 manifests: regenerate 2020-01-21 20:43:47 +01:00
Sergiusz Urbaniak
7165938b39 prometheus-adapter: add nodes resource to aggregated-metrics-reader 2020-01-21 18:36:52 +01:00
Frederic Branczyk
9ebe632d5d Merge pull request #380 from omerlh/prom-all-namespaces
added patch to allow prom to watch all namespaces
2020-01-20 14:16:29 +01:00
Lili Cosic
72ae778bfc Merge pull request #382 from tlereste/update_kube_state_metrics
bump kube-state-metrics to version 1.9.2
2020-01-17 11:17:57 +01:00
Thibault Le Reste
0608c96bf6 bump kube-state-metrics to version 1.9.2 2020-01-15 13:12:35 +01:00
Krasi Georgiev
44f3c61010 remove some unused collectors
Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com>
2020-01-15 12:03:04 +02:00
omerlh
f517b35a42 added patch to allow prom to watch all namespaces 2020-01-14 17:55:27 +02:00
Frederic Branczyk
54c0fda307 Merge pull request #378 from LiliC/drop-less
jsonnet,manifests: Do not drop not all metrics
2020-01-14 14:55:54 +01:00
Lili Cosic
6a3d667d3e manifests: Regenerate files 2020-01-14 10:34:46 +01:00
Lili Cosic
d9d3139dc8 jsonnet: Drop exact metrics 2020-01-14 10:26:42 +01:00
Frederic Branczyk
67ed0f63c2 Merge pull request #371 from tlereste/update_kube_state_metrics_version
update kube-state-metrics version to 1.9.1
2020-01-10 14:47:42 +01:00
Thibault Le Reste
7788d0d327 update kube-state-metrics version to 1.9.1 2020-01-10 14:23:52 +01:00
Lili Cosic
fca505f2a2 Merge pull request #368 from jfassad/master
jsonnet/kube-prometheus/kube-state-metrics: Add missing clusterRole permissions
2020-01-10 11:47:45 +01:00
João Assad
d40548d3a0 manifests: Regenerate manifests 2020-01-09 15:24:50 -03:00
João Assad
dba42d3477 jsonnet/kube-prometheus/kube-state-metrics: add missing clusterRole permissions 2020-01-09 15:12:59 -03:00
Lili Cosic
ee37661c34 Merge pull request #367 from LiliC/bump-k8s
tests/e2e/travis-e2e.sh: Switch to 1.17 k8s cluster
2020-01-09 13:13:39 +01:00
Lili Cosic
8b36950f0e tests/e2e/travis-e2e.sh: Switch to 1.17 k8s cluster 2020-01-09 13:03:01 +01:00
Frederic Branczyk
932745172d Merge pull request #365 from LiliC/drop-kubelet
Drop correct deprecated metrics and add e2e test to ensure that
2020-01-08 17:39:26 +01:00
Lili Cosic
1af59f3130 tests/e2e: Add e2e test to make sure all deprecated metrics are being
dropped
2020-01-08 12:35:21 +01:00
Lili Cosic
6562b02da8 manifests/*: Regenerate manifests 2020-01-08 12:35:21 +01:00
Lili Cosic
23999e44df jsonnet/kube-prometheus/prometheus: Drop correct deprecated metrics 2020-01-08 12:35:21 +01:00
Frederic Branczyk
69d3357892 Merge pull request #362 from pgier/lock-version-of-prometheus-operator-jsonnet-dependency
lock prometheus-operator jsonnet dependencies to v0.34.0
2020-01-07 08:06:46 +01:00
Frederic Branczyk
3465b0fa0d Merge pull request #346 from omerlh/patch-1
fix coredns monitoring on EKS
2020-01-06 16:19:16 +01:00
Paul Gier
1d1ce4967f lock prometheus-operator jsonnet dependencies to release-0.34 branch
This prevents mismatch between prometheus-operator binary and related
CRD yaml files.
2020-01-06 09:16:42 -06:00
Frederic Branczyk
3a0e6ba91f Merge pull request #360 from omerlh/patch-2
added metric_path to kublet/cadvisor selector
2020-01-06 13:24:23 +01:00
omerlh
81e2d19398 run make 2020-01-06 13:49:57 +02:00
Omer Levi Hevroni
92d4cbae08 added metric_path to kublet/cadvisor selector 2020-01-06 11:52:48 +02:00
Omer Levi Hevroni
2e72a8a832 fix coredns monitoring on EKS 2019-12-23 12:39:21 +02:00
Lili Cosic
9493a1a5f7 Merge pull request #342 from tlereste/update_kube_state_metrics
update kube-state-metrics version to 1.9.0
2019-12-20 16:57:17 +01:00
Thibault LE RESTE
0a48577bb7 update kube-state-metrics version to 1.9.0 2019-12-20 16:21:52 +01:00
Frederic Branczyk
9211c42df0 Merge pull request #336 from LiliC/change-dropped-metrics
jsonnet/kube-prometheus: Adjust dropped deprecated metrics names
2019-12-19 13:05:37 +01:00
Lili Cosic
5cddfd8da7 manifests: Regenerate manifests 2019-12-19 10:10:46 +01:00
Lili Cosic
bd69007c8c jsonnet/kube-prometheus: Adjust dropped deprecated metrics names
The names were not complete in the kubernetes CHANGELOG.
2019-12-19 10:09:34 +01:00
Frederic Branczyk
4f2b9c1ec8 Merge pull request #332 from LiliC/remove-pin-release
jsonnet/kube-prometheus/jsonnetfile.json: Pin prometheus-operator version to master instead
2019-12-18 13:16:03 +01:00
Lili Cosic
0be63d47fc manifests: Regenerate manifests 2019-12-18 11:18:21 +01:00
Lili Cosic
5fe60f37a2 jsonnetfile.lock.json: Update 2019-12-18 11:18:21 +01:00
Lili Cosic
200fee8d7c jsonnet/kube-prometheus/jsonnetfile.json: Pin prometheus-operator
version to master instead
2019-12-18 11:18:21 +01:00
Frederic Branczyk
1b9be6d00b Merge pull request #330 from LiliC/remove-depr-metrics
jsonnet,manifests: Drop all metrics which are deprecated in kubernetes
2019-12-17 16:51:40 +01:00
Lili Cosic
ce68c4b392 manifests/*: Regenerate manifest 2019-12-17 15:13:04 +01:00
Lili Cosic
5e9b883528 jsonnet/kube-prometheus*: Drop deprecated kubernetes metrics
These metrics were deprecated in kubernetes from 1.14 and 1.15 onwards.
2019-12-17 15:13:04 +01:00
Paweł Krupa
69b0ba03f1 Merge pull request #329 from paulfantom/e2e
tests/e2e: reenable checking targets availability
2019-12-16 14:40:43 +01:00
paulfantom
3279f222a0 tests/e2e: reenable checking targets availability 2019-12-16 14:23:43 +01:00
Paweł Krupa
543ccec970 Fix typo in node-exporter DaemonSet (#328)
Fix typo in node-exporter DaemonSet
2019-12-16 12:56:49 +01:00
paulfantom
f17ddfd293 assets: regenerate 2019-12-16 12:53:49 +01:00
paulfantom
3b8530d742 jsonnet/kube-prometheus/node-exporter: fix typo 2019-12-16 12:53:39 +01:00
Frederic Branczyk
44fe363211 Merge pull request #327 from paulfantom/deps
Update dependencies
2019-12-16 12:14:26 +01:00
paulfantom
326453cf47 manifests: regenerate 2019-12-16 11:24:04 +01:00
paulfantom
159a14ef47 update jsonnet dependencies 2019-12-16 11:20:37 +01:00
Frederic Branczyk
d03d57e6bb Merge pull request #326 from paulfantom/ipv6
IPv6 compatibility
2019-12-16 10:34:51 +01:00
Frederic Branczyk
31cb71fcd9 Merge pull request #317 from josqu4red/podmonitor-default-ns
Enable discovery of Podmonitors across namespaces
2019-12-12 16:54:39 +01:00
paulfantom
4474b24a32 manifests: regenerate 2019-12-12 16:26:58 +01:00
paulfantom
339ade5a81 jsonnet/kube-prometheus/node-exporter: wrap pod ip address in square brackets for ipv6 compatibility reasons 2019-12-12 16:14:08 +01:00
Frederic Branczyk
ce7c5fa3b4 Merge pull request #325 from sereinity-forks/master
Make limits/requests resources of kube-state-metrics removable
2019-12-12 16:06:58 +01:00
Sereinity
3f388b797d Make limits/requests resources of kube-state-metrics removable, unify tunning 2019-12-12 15:50:34 +01:00
Frederic Branczyk
20abdf3b72 Merge pull request #323 from simonpasquier/bump-kubernetes-mixin
Bump kubernetes mixin
2019-12-10 17:05:35 +01:00
Simon Pasquier
cd0f3c641e regenerate
Signed-off-by: Simon Pasquier <spasquie@redhat.com>
2019-12-10 16:48:51 +01:00
Simon Pasquier
408fde189b Bump kubernetes-mixin
Signed-off-by: Simon Pasquier <spasquie@redhat.com>
2019-12-10 16:48:28 +01:00
Jonathan Amiez
90cf0ae21c Update generated manifests 2019-12-05 15:12:18 +01:00
Jonathan Amiez
3ba4b5602a Enable PodMonitors discovery across namespaces 2019-12-05 15:09:40 +01:00
Frederic Branczyk
cb0e6e2c89 Merge pull request #309 from benjaminhuo/master
Group alert by namespace instead of job
2019-12-04 08:38:04 +01:00
Benjamin
03f7adcf92 regenerate
Signed-off-by: Benjamin <benjamin@yunify.com>
2019-12-04 10:14:42 +08:00
Benjamin
fd267aebeb Merge remote-tracking branch 'upstream/master' 2019-12-04 10:09:14 +08:00
Benjamin
420425d88e regenerate
Signed-off-by: Benjamin <benjamin@yunify.com>
2019-12-03 23:46:08 +08:00
Benjamin
965bec0ad7 Change Alertmanager group by condition
Signed-off-by: Benjamin <benjamin@yunify.com>
2019-12-03 20:02:47 +08:00
Frederic Branczyk
d22bad8293 Merge pull request #313 from yeya24/update-apiverison
Update apiversion
2019-12-03 11:22:47 +01:00
Frederic Branczyk
8c255e9e6c Merge pull request #310 from paulfantom/node-exporter-scrape-interval
Change node-exporter scrape interval to follow best practices
2019-12-03 10:15:52 +01:00
yeya24
56027ac757 update apiversion
Signed-off-by: yeya24 <yb532204897@gmail.com>
2019-12-01 09:33:11 -05:00
paulfantom
50b06b0d33 manifests: regenerate 2019-11-27 15:11:06 +01:00
paulfantom
6f6fd65a48 jsonnet/kube-prometheus/node-exporter: follow node-exporter best practices and scrape data every 15s 2019-11-27 15:09:04 +01:00
Frederic Branczyk
f48fe057dc Merge pull request #307 from EricHorst/patch-1
Update README.md with apply clarification.
2019-11-21 17:41:53 -08:00
Eric Horst
8487871388 Update README.md with apply clarification.
Update the kubectl apply commands in the customizing section to match those the quickstart section. The customizing section did not account for the recently introduced setup/ subdirectory.
2019-11-17 21:10:32 -08:00
Sergiusz Urbaniak
ce5fe790ee Merge pull request #299 from coreos/fix-jb
Fix jb
2019-11-06 08:33:48 +01:00
Frederic Branczyk
3b82c11944 manifests: Re-generate with latest deps 2019-11-06 07:09:00 +01:00
Frederic Branczyk
65e57d8ec7 Adapt to new jb version 2019-11-06 07:06:18 +01:00
Frederic Branczyk
223c163915 Merge pull request #298 from dctrwatson/disable-alerts-managed
Disable controller and scheduler alerts in managed clusters
2019-11-06 06:50:11 +01:00
John Watson
235761f915 Disable controller and scheduler alerts in managed clusters 2019-11-05 21:17:24 -08:00
Frederic Branczyk
6a6a43e227 Merge pull request #272 from karancode/aws_eks_cni
AWS EKS CNI Monitoring Support
2019-11-05 15:53:46 +01:00
karancode
8ee17e6735 with jsonnet-ci:latest image 2019-11-05 21:10:40 +09:00
karancode
528f338477 revert jsonnetfile json 2019-11-05 20:30:50 +09:00
karancode
78edcc0276 make clean generate 2019-11-05 20:25:55 +09:00
karancode
f05e73881a update dependency 2019-11-05 18:03:23 +09:00
karancode
60bd13b34b remove example 2019-11-05 18:02:46 +09:00
karancode
9072e3530a fix: remove garbage character 2019-11-05 17:15:45 +09:00
karancode
737720c119 test 2019-11-05 16:57:39 +09:00
karancode
4bd3cb586a add prometheus rule to patch 2019-11-05 16:39:45 +09:00
karancode
01f944aa30 sync master with current fork 2019-11-05 16:36:56 +09:00
Sergiusz Urbaniak
b8f513e4d4 Merge pull request #293 from s-urbaniak/update
jsonnet/kube-prometheus: bump prometheus-operator
2019-11-01 15:44:44 +01:00
Sergiusz Urbaniak
c8f0471279 jsonnet: regenerate 2019-11-01 15:27:14 +01:00
Sergiusz Urbaniak
5e75f27ae2 jsonnet: pin node-mixin due to upstream bug 2019-11-01 15:26:56 +01:00
Sergiusz Urbaniak
02369dd1da jsonnet/kube-prometheus: bump prometheus-operator 2019-11-01 14:20:12 +01:00
Sergiusz Urbaniak
a3b1121562 Merge pull request #290 from LiliC/imrove-targets-down
jsonnet/kube-prometheus/alerts: Add namespace to TargetDown msg
2019-10-29 15:44:02 +01:00
Lili Cosic
78b9183837 manifests/prometheus-rules.yaml: Regenerate file 2019-10-29 14:59:13 +01:00
Lili Cosic
01d30382aa jsonnet/kube-prometheus/alerts: Add namespace to TargetDown msg 2019-10-29 14:36:14 +01:00
Frederic Branczyk
cc389a718c Merge pull request #289 from BenoitKnecht/fix-memory-saturation-dashboard
Fix memory saturation dashboard
2019-10-29 10:38:31 +01:00
Benoît Knecht
20eb5b312a manifests: Regenerate with new node-mixin dependency
Generate manifests for fd5b77c.
2019-10-28 16:35:12 +01:00
Benoît Knecht
fd5b77cadf jsonnetfile.lock.json: Update node-mixin dependency
Upgrade to prometheus/node_exporter@d574b4b, which includes a better
metric for memory saturation.

See prometheus/node_exporter#1524.
2019-10-28 16:30:11 +01:00
Frederic Branczyk
7f2e669d46 Merge pull request #263 from Deepak1100/patch-1
Docs for rawGrafanaDashboards field
2019-10-28 14:11:56 +01:00
Deepak Jain
d99aefe276 CLOUD-3031| Deepak Jain| adding example file 2019-10-26 22:55:35 +05:30
Deepak Jain
059e74d156 Docs for rawGrafanaDashboards filed
Propose in this https://github.com/brancz/kubernetes-grafana/pull/78
2019-10-26 22:55:35 +05:30
Sergiusz Urbaniak
03b36af546 Merge pull request #282 from BenoitKnecht/prometheus-config
prometheus: Let name and replicas be set in _config
2019-10-25 09:45:11 +02:00
Frederic Branczyk
2f54bcb4c6 Merge pull request #286 from pgier/update-kube-state-metrics
Update kube-state-metrics to 1.8.0
2019-10-25 09:15:25 +02:00
Frederic Branczyk
1129dd7fb7 Merge pull request #287 from smarterclayton/bump_prometheus
jsonnet: Update to latest kubernetes-mixin
2019-10-25 09:12:45 +02:00
Clayton Coleman
6e0ca7565f jsonnet: Update to latest kubernetes-mixin
Pick up new alerts for unreachable nodes.
2019-10-25 00:23:55 -04:00
Paul Gier
ebb960ee5e update generated files for kube-state-metrics 1.7.2 -> 1.8.0 2019-10-24 15:02:29 -05:00
Paul Gier
f72d49ca26 update kube-state-metrics 1.7.2 -> 1.8.0 2019-10-24 15:01:46 -05:00
Benoît Knecht
5686d7b439 Makefile: Fix manifests target dependency (#285)
Since 1664600, manifests are built using `examples/kustomize.jsonnet`
instead of `example.jsonnet`.

This commit updates the dependencies in the `manifests` target to
reflect that change.
2019-10-24 13:37:11 +02:00
Benoît Knecht
dc77f255de prometheus: Let name and replicas be set in _config
Before #260, the Prometheus name and number of replicas could be
configured in `_config.prometheus.name` and
`_config.prometheus.replicas` respectively.

It isn't the case anymore, which means that configurations that did set
a custom name for Prometheus will get a second Prometheus instance
called `k8s` when they upgrade kube-prometheus.

This commit adds back the ability to configure both of these parameters
in `_config`.
2019-10-24 09:39:04 +02:00
Jake Utley
4e5b454ba8 Add metrics_path label to kubelet servicemonitor endpoints (#277)
* Add metrics_path label to kubelet servicemonitor endpoints

* Set kubelet metric_path label in jsonnet

* Add generated kubelet servicemonitor
2019-10-24 09:16:57 +02:00
karancode
a3ab6bd49b add available_ip rule 2019-10-24 04:12:07 +09:00
Benoît Knecht
a7884a6c18 node-exporter: Use configured resources for kube-rbac-proxy (#279)
Since #132, `kube-rbac-proxy` resources can be configured in
`config.resources['kube-rbac-proxy']`, but the node-exporter daemonset
was still using hard-coded values.

This commit sets the request and limit resources to the configured
values for the `kube-rbac-proxy` container in the node-exporter pods.
2019-10-23 10:58:24 +02:00
Paul Gier
c8273cf9e9 Scripts and readme (#258)
* Avoid race condition when deploying quickstart example

The namespace and CRD creation must happen before any dependent objects
are created.  So we can put these in a separate directory (manifest/setup)
so they can be created before the other objects.

Some minor updates to the README and added a couple of scripts
for the quickstarts

Update travis script to avoid race condition

Signed-off-by: Paul Gier <pgier@redhat.com>

* simplify the example quickstart script and improve readme

Signed-off-by: Paul Gier <pgier@redhat.com>

* increase minikube memory to 6g for quickstart example
2019-10-23 08:38:31 +02:00
karancode
d4ba158f9b bugfix final 2019-10-23 01:26:35 +09:00
karancode
3c4dbc52d9 bugfix eexamples/eks-cni-example 2019-10-23 01:10:12 +09:00
karancode
79c670bcd0 revert examples/kustomize.jsonnet 2019-10-23 00:37:05 +09:00
karancode
b03ff4f593 embedmd for doc 2019-10-23 00:24:00 +09:00
karancode
8228ebd2ba fix example 2019-10-23 00:22:59 +09:00
karancode
edb327531d update example 2019-10-23 00:15:25 +09:00
karancode
13c114a72f catch all eks 2019-10-23 00:15:05 +09:00
karancode
9249256b4a revert examples to original 2019-10-23 00:00:16 +09:00
karancode
e2b7e7f17d remove yamls from general mamnifests 2019-10-22 20:58:05 +09:00
karancode
d32e859a11 remove example 2019-10-22 20:52:09 +09:00
karancode
1cbc994344 fix namespace 2019-10-22 20:44:28 +09:00
karancode
cbbfa0cad5 move eks serviceMonitor to patch files 2019-10-22 20:33:03 +09:00
karancode
6ef4b3d330 remove local version 2019-10-22 03:26:02 +09:00
karancode
648db9d544 add readme 2019-10-22 03:24:31 +09:00
karancode
5cc6daab4a add aws eks cni service yaml 2019-10-22 02:57:40 +09:00
karancode
c156f21d50 bugfix service name 2019-10-22 02:44:57 +09:00
karancode
55db3208da fix names for service 2019-10-22 02:33:55 +09:00
karancode
3b8e685082 add aws-eks-cni service 2019-10-22 02:24:07 +09:00
karancode
3640448229 fix name 2019-10-22 02:03:58 +09:00
karancode
19624d9def add aws_eks_cni serviceMonitor 2019-10-22 01:45:55 +09:00
albertdb
24aebaf985 Bump prometheus-adapter and Grafana versions (#270)
* Bumping prometheus-adapter and Grafana versions

* Bumping prometheus-adapter version in libsonnet file

* Regenerating lockfile

* Bumping Grafana version in libsonnet file

* Updating kustomization.yaml
2019-10-21 12:42:19 +02:00
Frederic Branczyk
3d5fc3e38d Merge pull request #252 from benjaminhuo/ksm-autoscaler
Add cluster-proportional-vertical-autoscaler as a seperate deployment for ksm
2019-10-16 15:38:29 +02:00
Frederic Branczyk
86cd3bc703 Merge pull request #262 from liuxu623/master
Add k8s-resources-node dashboard
2019-10-16 13:12:48 +02:00
liuxu
ace8b4d6d3 add k8s-resources-node dashboard 2019-10-16 16:49:33 +08:00
Benjamin
a16d5b69ab Add seperate autoscaler for ksm
Signed-off-by: Benjamin <benjamin@yunify.com>
2019-10-16 16:02:44 +08:00
Frederic Branczyk
4e846a146f Merge pull request #265 from kpucynski/grafana-dashboards-update
Grafana dashboards update
2019-10-14 16:24:35 +02:00
Karol Pucynski
e7c3ca314d Grafana dashboards update 2019-10-14 16:00:06 +02:00
Frederic Branczyk
1ebce4955a Merge pull request #264 from paulfantom/ci_fixes
Do not download tooling when it is already available
2019-10-14 14:13:01 +02:00
paulfantom
7a2befe7fa *: Assume jb and embedmd are already available 2019-10-14 13:49:33 +02:00
paulfantom
bbd991a3b2 *: add names to CI jobs 2019-10-14 12:32:37 +02:00
Frederic Branczyk
8405360a46 Merge pull request #261 from s-urbaniak/prometheus-anti-affinity
jsonnet/kube-prometheus/kube-promehtues-anti-affinity: fix construction
2019-10-08 17:52:06 +02:00
Sergiusz Urbaniak
bd8d597f8d jsonnet/kube-prometheus/kube-promehtues-anti-affinity: fix construction
Currently, anti affinity sources configuration from the global configuration,
not respecting local prometheus settings.

This fixes it.
2019-10-08 17:30:12 +02:00
Sergiusz Urbaniak
bcadf3ae05 Merge pull request #260 from s-urbaniak/prometheus-constructor
kube-prometheus/prometheus: Add local configuration
2019-10-08 17:16:01 +02:00
Sergiusz Urbaniak
40a5dc2b71 kube-prometheus/prometheus: Add local configuration
This adds constructor'esque configuration options for prometheus assets.
They still reference global _config default values for backwards compatibility
but allow overriding values for new instances of prometheus assets.
2019-10-08 11:37:26 +02:00
Paweł Krupa
52685175f2 Merge pull request #259 from gitfool/fix-nodequery
Fix nodeQuery rate for window
2019-10-08 10:06:58 +02:00
Sean Fausett
6ec81661fa manifests: regenerate 2019-10-08 08:51:14 +13:00
Sean Fausett
5155e57141 jsonnet/kube-prometheus/prometheus-adapter: fix nodeQuery rate for window 2019-10-08 08:48:55 +13:00
Frederic Branczyk
73395e6d78 Merge pull request #257 from paulfantom/fix_window
fix incorrect window in containerQuery
2019-10-07 10:47:54 +02:00
paulfantom
a9f7b03f27 manifests: regenerate 2019-10-07 10:34:24 +02:00
paulfantom
d2dd84bc0f jsonnet/kube-prometheus/prometheus-adapter: fix incorrect window in containerQuery 2019-10-06 18:57:14 +02:00
Paweł Krupa
21ace9b55e increase time period for rate over cadvisor metrics (#254)
increase time period for rate over cadvisor metrics
2019-10-02 17:18:30 +02:00
paulfantom
dfb626837f manifests: regenerate 2019-10-02 16:38:20 +02:00
paulfantom
c72ae7b63c increase time period for rate over cadvisor metrics 2019-10-02 16:31:55 +02:00
Sergiusz Urbaniak
f458e85e5d Merge pull request #248 from s-urbaniak/fix_116
jsonnet/prometheus-adapter: Fix query for k8s 1.16
2019-09-27 11:20:29 +02:00
Sergiusz Urbaniak
ee7d0d367f jsonnet/prometheus-adapter: Fix query for k8s 1.16 2019-09-27 11:02:32 +02:00
Lili Cosic
139df678f0 Merge pull request #247 from LiliC/bump-mixins
Bump dependencies
2019-09-26 15:14:25 +02:00
Lili Cosic
0f5400e5fe manifests: Regenerate files 2019-09-26 14:53:40 +02:00
Lili Cosic
3924379e84 jsonnetfile.lock.json: Bump all deps 2019-09-26 14:46:11 +02:00
Lili Cosic
be47e4a7c2 Merge pull request #245 from LiliC/add-ksm-job
jsonnet/kube-prometheus/kube-state-metrics: Do not drop job label
2019-09-26 14:40:40 +02:00
Lili Cosic
5839b8c1cb manifests/kube-state-metrics-serviceMonitor.yaml: Regenerate 2019-09-26 14:22:13 +02:00
Lili Cosic
48eefc51d9 jsonnet/kube-prometheus/kube-state-metrics: Do not drop job label
Currently a lot of alerts relly on the job='kube-state-metrics' label.
2019-09-26 14:14:08 +02:00
Matthias Loibl
9486ec2bc1 Merge pull request #244 from brancz/fix-additional-rules
examples: Fix additional rules snippets
2019-09-26 14:07:52 +02:00
Frederic Branczyk
2e996fce91 examples: Fix additional rules snippets 2019-09-26 11:27:31 +02:00
Frederic Branczyk
e304d2a60f Merge pull request #240 from simonpasquier/support-jsonnetfmt
Update Makefile to support jsonnet >= 0.13
2019-09-25 10:20:10 +02:00
Frederic Branczyk
4da422095b Merge pull request #239 from simonpasquier/improve-target-down-message
jsonnet/kube-prometheus/alerts: improve TargetDown message
2019-09-25 09:56:53 +02:00
Simon Pasquier
e0c232df8b Update Makefile to support jsonnet >= 0.13 2019-09-25 09:16:23 +02:00
Frederic Branczyk
2dcc928425 Merge pull request #238 from LiliC/fix-replacment
manifests/node-exporter-serviceMonitor.yaml: Fix typo
2019-09-25 09:12:27 +02:00
Simon Pasquier
b9504efef7 jsonnet/kube-prometheus/alerts: improve TargetDown message
Signed-off-by: Simon Pasquier <spasquie@redhat.com>
2019-09-25 09:11:03 +02:00
Lili Cosic
8d7db8862d manifests/node-exporter-serviceMonitor.yaml: Regenerate manifest 2019-09-25 08:41:14 +02:00
Lili Cosic
b6c6d39fb9 sonnet/kube-prometheus/node-exporter: Fix typo 2019-09-24 17:02:03 +02:00
Frederic Branczyk
dc20838d65 Merge pull request #235 from guusvw/remove-addon-resizer-from-ksm
removing addonResizer from ksm
2019-09-24 14:59:32 +02:00
Guus van Weelden
87a4567faf update generated manifests
Signed-off-by: Guus van Weelden <guus.vanweelden@moia.io>
2019-09-24 14:36:22 +02:00
Guus van Weelden
b6becc0936 removing addonResizer from ksm
the addonresizer could lead to problems with the kube-state-metrics
it is also removed from the ksm maintained kubernetes manifests
https://github.com/kubernetes/kube-state-metrics/pull/750

Signed-off-by: Guus van Weelden <guus.vanweelden@moia.io>
2019-09-24 14:19:59 +02:00
Matthias Loibl
0fc41a075a Merge pull request #233 from LiliC/remove-labels
jsonnet/kube-prometheus/kube-state-metrics: Drop ksm own labels
2019-09-24 09:56:36 +02:00
Lili Cosic
cb227144e2 manifests/kube-state-metrics-serviceMonitor.yaml: Regenerate manifest 2019-09-23 18:37:29 +02:00
Lili Cosic
974d3a70be jsonnet/kube-prometheus/kube-state-metrics: Drop ksm own labels
These labels are confusing and misleading, as they describe
kube-state-metrics itself not the target itself.
2019-09-23 18:28:14 +02:00
Frederic Branczyk
0739c11ebb Merge pull request #230 from dparkar/dev/dparkar/aks/issue213
adding note for k8s before v1.14.0
2019-09-20 08:30:03 +02:00
Dhawal Parkar
c0b4e45bb4 adding note for k8s before v1.14.0 2019-09-19 15:18:50 -07:00
74 changed files with 16294 additions and 2018 deletions

View File

@@ -16,6 +16,9 @@ services:
jobs:
include:
- script: make --always-make generate-in-docker && git diff --exit-code
- script: make --always-make test-in-docker
- script: GO111MODULE=on ./tests/e2e/travis-e2e.sh
- name: Check generated files
script: make --always-make generate-in-docker && git diff --exit-code
- name: Run tests
script: make --always-make test-in-docker
- name: Run e2e tests
script: GO111MODULE=on ./tests/e2e/travis-e2e.sh

View File

@@ -1,14 +1,21 @@
JSONNET_FMT := jsonnet fmt -n 2 --max-blank-lines 2 --string-style s --comment-style s
JSONNET_ARGS := -n 2 --max-blank-lines 2 --string-style s --comment-style s
ifneq (,$(shell which jsonnetfmt))
JSONNET_FMT_CMD := jsonnetfmt
else
JSONNET_FMT_CMD := jsonnet
JSONNET_FMT_ARGS := fmt $(JSONNET_ARGS)
endif
JSONNET_FMT := $(JSONNET_FMT_CMD) $(JSONNET_FMT_ARGS)
JB_BINARY:=$(GOPATH)/bin/jb
EMBEDMD_BINARY:=$(GOPATH)/bin/embedmd
JB_BINARY := jb
EMBEDMD_BINARY := embedmd
CONTAINER_CMD:=docker run --rm \
-e http_proxy -e https_proxy -e no_proxy \
-u="$(shell id -u):$(shell id -g)" \
-v "$(shell go env GOCACHE):/.cache/go-build" \
-v "$(PWD):/go/src/github.com/coreos/kube-prometheus:Z" \
-w "/go/src/github.com/coreos/kube-prometheus" \
quay.io/coreos/jsonnet-ci
quay.io/coreos/jsonnet-ci:release-0.36
all: generate fmt test
@@ -24,14 +31,14 @@ clean:
generate: manifests **.md
**.md: $(EMBEDMD_BINARY) $(shell find examples) build.sh example.jsonnet
**.md: $(shell find examples) build.sh example.jsonnet
$(EMBEDMD_BINARY) -w `find . -name "*.md" | grep -v vendor`
manifests: vendor example.jsonnet build.sh
manifests: examples/kustomize.jsonnet vendor build.sh
rm -rf manifests
./build.sh ./examples/kustomize.jsonnet
./build.sh $<
vendor: $(JB_BINARY) jsonnetfile.json jsonnetfile.lock.json
vendor: jsonnetfile.json jsonnetfile.lock.json
rm -rf vendor
$(JB_BINARY) install
@@ -39,7 +46,7 @@ fmt:
find . -name 'vendor' -prune -o -name '*.libsonnet' -o -name '*.jsonnet' -print | \
xargs -n 1 -- $(JSONNET_FMT) -i
test: $(JB_BINARY)
test:
$(JB_BINARY) install
./test.sh
@@ -50,10 +57,4 @@ test-in-docker:
@echo ">> Compiling assets and generating Kubernetes manifests"
$(CONTAINER_CMD) make $(MFLAGS) test
$(JB_BINARY):
go get -u github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb
$(EMBEDMD_BINARY):
go get github.com/campoy/embedmd
.PHONY: generate generate-in-docker test test-in-docker fmt

View File

@@ -71,13 +71,13 @@ This adapter is an Extension API Server and Kubernetes needs to be have this fea
### minikube
In order to just try out this stack, start [minikube](https://github.com/kubernetes/minikube) with the following command:
To try out this stack, start [minikube](https://github.com/kubernetes/minikube) with the following command:
```shell
$ minikube delete && minikube start --kubernetes-version=v1.14.4 --memory=4096 --bootstrapper=kubeadm --extra-config=kubelet.authentication-token-webhook=true --extra-config=kubelet.authorization-mode=Webhook --extra-config=scheduler.address=0.0.0.0 --extra-config=controller-manager.address=0.0.0.0
$ minikube delete && minikube start --kubernetes-version=v1.16.0 --memory=6g --bootstrapper=kubeadm --extra-config=kubelet.authentication-token-webhook=true --extra-config=kubelet.authorization-mode=Webhook --extra-config=scheduler.address=0.0.0.0 --extra-config=controller-manager.address=0.0.0.0
```
The kube-prometheus stack includes a resource metrics API server, like the metrics-server does. So ensure the metrics-server plugin is disabled on minikube:
The kube-prometheus stack includes a resource metrics API server, so the metrics-server addon is not necessary. Ensure the metrics-server addon is disabled on minikube:
```shell
$ minikube addons disable metrics-server
@@ -85,23 +85,28 @@ $ minikube addons disable metrics-server
## Quickstart
>Note: For versions before Kubernetes v1.14.0 use the release-0.1 branch instead of master.
This project is intended to be used as a library (i.e. the intent is not for you to create your own modified copy of this repository).
Though for a quickstart a compiled version of the Kubernetes [manifests](manifests) generated with this library (specifically with `example.jsonnet`) is checked into this repository in order to try the content out quickly. To try out the stack un-customized run:
* Simply create the stack:
* Create the monitoring stack using the config in the `manifests` directory:
```shell
$ kubectl create -f manifests/
# It can take a few seconds for the above 'create manifests' command to fully create the following resources, so verify the resources are ready before proceeding.
$ until kubectl get customresourcedefinitions servicemonitors.monitoring.coreos.com ; do date; sleep 1; echo ""; done
$ until kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done
$ kubectl apply -f manifests/ # This command sometimes may need to be done twice (to workaround a race condition).
# Create the namespace and CRDs, and then wait for them to be availble before creating the remaining resources
kubectl create -f manifests/setup
until kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done
kubectl create -f manifests/
```
We create the namespace and CustomResourceDefinitions first to avoid race conditions when deploying the monitoring components.
Alternatively, the resources in both folders can be applied with a single command
`kubectl create -f manifests/setup -f manifests`, but it may be necessary to run the command multiple times for all components to
be created successfullly.
* And to teardown the stack:
```shell
$ kubectl delete -f manifests/
kubectl delete --ignore-not-found=true -f manifests/ -f manifests/setup
```
### Access the dashboards
@@ -185,8 +190,13 @@ local kp =
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['setup/0namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{
['setup/prometheus-operator-' + name]: kp.prometheusOperator[name]
for name in std.filter((function(name) name != 'serviceMonitor'), std.objectFields(kp.prometheusOperator))
} +
// serviceMonitor is separated so that it can be created after the CRDs are ready
{ 'prometheus-operator-serviceMonitor': kp.prometheusOperator.serviceMonitor } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
@@ -210,7 +220,7 @@ set -o pipefail
# Make sure to start with a clean 'manifests' dir
rm -rf manifests
mkdir manifests
mkdir -p manifests/setup
# optional, but we would like to generate yaml, not json
jsonnet -J vendor -m manifests "${1-example.jsonnet}" | xargs -I{} sh -c 'cat {} | gojsontoyaml > {}.yaml; rm -f {}' -- {}
@@ -226,8 +236,13 @@ The previous steps (compilation) has created a bunch of manifest files in the ma
Now simply use `kubectl` to install Prometheus and Grafana as per your configuration:
```shell
# Update the namespace and CRDs, and then wait for them to be availble before creating the remaining resources
$ kubectl apply -f manifests/setup
$ kubectl apply -f manifests/
```
Alternatively, the resources in both folders can be applied with a single command
`kubectl apply -Rf manifests`, but it may be necessary to run the command multiple times for all components to
be created successfullly.
Check the monitoring namespace (or the namespace you have specific in `namespace: `) and make sure the pods are running. Prometheus and Grafana should be up and running soon.
@@ -274,7 +289,6 @@ These are the available fields with their respective default values:
nodeExporter: "v0.18.1",
kubeStateMetrics: "v1.5.0",
kubeRbacProxy: "v0.4.1",
addonResizer: "1.8.4",
prometheusOperator: "v0.30.0",
prometheus: "v2.10.0",
},
@@ -284,7 +298,6 @@ These are the available fields with their respective default values:
alertmanager: "quay.io/prometheus/alertmanager",
kubeStateMetrics: "quay.io/coreos/kube-state-metrics",
kubeRbacProxy: "quay.io/coreos/kube-rbac-proxy",
addonResizer: "k8s.gcr.io/addon-resizer",
nodeExporter: "quay.io/prometheus/node-exporter",
prometheusOperator: "quay.io/coreos/prometheus-operator",
},
@@ -323,8 +336,6 @@ These are the available fields with their respective default values:
baseCPU: '100m',
baseMemory: '150Mi',
cpuPerNode: '2m',
memoryPerNode: '30Mi',
},
nodeExporter+:: {
@@ -567,34 +578,34 @@ You can define ServiceMonitor resources in your `jsonnet` spec. See the snippet
```jsonnet
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
prometheus+:: {
namespaces+: ['my-namespace', 'my-second-namespace'],
}
},
namespace: 'monitoring',
prometheus+:: {
serviceMonitorMyNamespace: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'my-servicemonitor',
namespace: 'my-namespace',
namespaces+: ['my-namespace', 'my-second-namespace'],
},
},
prometheus+:: {
serviceMonitorMyNamespace: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'my-servicemonitor',
namespace: 'my-namespace',
},
spec: {
jobLabel: 'app',
endpoints: [
{
port: 'http-metrics',
},
spec: {
jobLabel: 'app',
endpoints: [
{
port: 'http-metrics',
},
],
selector: {
matchLabels: {
'app': 'myapp',
},
},
],
selector: {
matchLabels: {
app: 'myapp',
},
},
},
},
},
},
};
@@ -647,6 +658,7 @@ As described in the [Prerequisites](#prerequisites) section, in order to retriev
If you are using Google's GKE product, see [cAdvisor support](docs/GKE-cadvisor-support.md).
If you are using AWS EKS, see [AWS EKS CNI support](docs/EKS-cni-support.md)
#### Authentication problem
The Prometheus `/targets` page will show the kubelet job with the error `403 Unauthorized`, when token authentication is not enabled. Ensure, that the `--authentication-token-webhook=true` flag is enabled on all kubelet configurations.

View File

@@ -9,7 +9,7 @@ set -o pipefail
# Make sure to start with a clean 'manifests' dir
rm -rf manifests
mkdir manifests
mkdir -p manifests/setup
# optional, but we would like to generate yaml, not json
jsonnet -J vendor -m manifests "${1-example.jsonnet}" | xargs -I{} sh -c 'cat {} | gojsontoyaml > {}.yaml; rm -f {}' -- {}

42
docs/EKS-cni-support.md Normal file
View File

@@ -0,0 +1,42 @@
# CNI monitoring special configuration updates for EKS
AWS EKS uses [CNI](https://github.com/aws/amazon-vpc-cni-k8s) networking plugin for pod networking in Kubernetes using Elastic Network Interfaces on AWS
One fatal issue that can occur is that you run out of IP addresses in your eks cluster. (Generally happens due to error configs where pods keep scheduling).
You can monitor the `awscni` using kube-promethus with :
[embedmd]:# (../examples/eks-cni-example.jsonnet)
```jsonnet
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-eks.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
prometheusRules+:: {
groups+: [
{
name: 'example-group',
rules: [
{
record: 'aws_eks_available_ip',
expr: 'sum by(instance) (awscni_total_ip_addresses) - sum by(instance) (awscni_assigned_ip_addresses) < 10',
},
],
},
],
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) }
```
After you have the required yaml file please run
```
kubectl apply -f manifests/prometheus-serviceMonitorAwsEksCNI.yaml
```

View File

@@ -24,8 +24,13 @@ local kp =
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['setup/0namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{
['setup/prometheus-operator-' + name]: kp.prometheusOperator[name]
for name in std.filter((function(name) name != 'serviceMonitor'), std.objectFields(kp.prometheusOperator))
} +
// serviceMonitor is separated so that it can be created after the CRDs are ready
{ 'prometheus-operator-serviceMonitor': kp.prometheusOperator.serviceMonitor } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
@@ -77,6 +82,7 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```
@@ -113,6 +119,7 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```
@@ -135,7 +142,12 @@ Then import it in jsonnet:
[embedmd]:# (../examples/prometheus-additional-rendered-rule-example.jsonnet)
```jsonnet
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
prometheusAlerts+:: (import 'existingrule.json'),
_config+:: {
namespace: 'monitoring',
},
prometheusAlerts+:: {
groups+: (import 'existingrule.json').groups,
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
@@ -144,6 +156,7 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```
### Changing default rules
@@ -297,3 +310,24 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```
Incase you have lots of json dashboard exported out from grafan UI the above approch is going to take lots of time. to improve performance we can use `rawGrafanaDashboards` field and provide it's value as json string by using importstr
[embedmd]:# (../examples/grafana-additional-rendered-dashboard-example-2.jsonnet)
```jsonnet
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
rawGrafanaDashboards+:: {
'my-dashboard.json': (importstr 'example-grafana-dashboard.json'),
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```

View File

@@ -12,8 +12,13 @@ local kp =
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['setup/0namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{
['setup/prometheus-operator-' + name]: kp.prometheusOperator[name]
for name in std.filter((function(name) name != 'serviceMonitor'), std.objectFields(kp.prometheusOperator))
} +
// serviceMonitor is separated so that it can be created after the CRDs are ready
{ 'prometheus-operator-serviceMonitor': kp.prometheusOperator.serviceMonitor } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +

View File

@@ -1,33 +1,33 @@
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
prometheus+:: {
namespaces+: ['my-namespace', 'my-second-namespace'],
}
},
namespace: 'monitoring',
prometheus+:: {
serviceMonitorMyNamespace: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'my-servicemonitor',
namespace: 'my-namespace',
namespaces+: ['my-namespace', 'my-second-namespace'],
},
},
prometheus+:: {
serviceMonitorMyNamespace: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'my-servicemonitor',
namespace: 'my-namespace',
},
spec: {
jobLabel: 'app',
endpoints: [
{
port: 'http-metrics',
},
spec: {
jobLabel: 'app',
endpoints: [
{
port: 'http-metrics',
},
],
selector: {
matchLabels: {
'app': 'myapp',
},
},
],
selector: {
matchLabels: {
app: 'myapp',
},
},
},
},
},
},
};

View File

@@ -0,0 +1,26 @@
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-eks.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
prometheusRules+:: {
groups+: [
{
name: 'example-group',
rules: [
{
record: 'aws_eks_available_ip',
expr: 'sum by(instance) (awscni_total_ip_addresses) - sum by(instance) (awscni_assigned_ip_addresses) < 10',
},
],
},
],
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) }

View File

@@ -14,12 +14,16 @@ spec:
port: 8080
targetPort: web
---
apiVersion: extensions/v1beta1
apiVersion: apps/v1
kind: Deployment
metadata:
name: example-app
namespace: default
spec:
selector:
matchLabels:
app: example-app
version: 1.1.3
replicas: 4
template:
metadata:

View File

@@ -0,0 +1,16 @@
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
rawGrafanaDashboards+:: {
'my-dashboard.json': (importstr 'example-grafana-dashboard.json'),
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -6,8 +6,15 @@ local kp =
};
local manifests =
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
// Uncomment line below to enable vertical auto scaling of kube-state-metrics
//{ ['ksm-autoscaler-' + name]: kp.ksmAutoscaler[name] for name in std.objectFields(kp.ksmAutoscaler) } +
{ ['setup/0namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{
['setup/prometheus-operator-' + name]: kp.prometheusOperator[name]
for name in std.filter((function(name) name != 'serviceMonitor'), std.objectFields(kp.prometheusOperator))
} +
// serviceMonitor is separated so that it can be created after the CRDs are ready
{ 'prometheus-operator-serviceMonitor': kp.prometheusOperator.serviceMonitor } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +

View File

@@ -29,4 +29,5 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -23,4 +23,5 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -1,5 +1,10 @@
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
prometheusAlerts+:: (import 'existingrule.json'),
_config+:: {
namespace: 'monitoring',
},
prometheusAlerts+:: {
groups+: (import 'existingrule.json').groups,
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
@@ -8,4 +13,5 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -14,6 +14,14 @@ rules:
- get
- list
- watch
- apiGroups:
- "apps"
resources:
- deployments
verbs:
- get
- list
- watch
- apiGroups:
- "extensions"
resources:

View File

@@ -1,4 +1,4 @@
apiVersion: extensions/v1beta1
apiVersion: apps/v1
kind: Deployment
metadata:
name: metrics-server

13
go.mod
View File

@@ -4,25 +4,30 @@ go 1.12
require (
github.com/Jeffail/gabs v1.2.0
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d // indirect
github.com/gogo/protobuf v1.1.1 // indirect
github.com/google/gofuzz v0.0.0-20170612174753-24818f796faf // indirect
github.com/googleapis/gnostic v0.0.0-20170729233727-0c5108395e2d // indirect
github.com/imdario/mergo v0.3.7 // indirect
github.com/json-iterator/go v0.0.0-20180701071628-ab8a2e0c74be // indirect
github.com/jsonnet-bundler/jsonnet-bundler v0.1.0 // indirect
github.com/mattn/go-colorable v0.1.4 // indirect
github.com/mattn/go-isatty v0.0.10 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.1 // indirect
github.com/pkg/errors v0.8.1
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/spf13/pflag v1.0.3 // indirect
github.com/stretchr/testify v1.2.2 // indirect
github.com/stretchr/objx v0.2.0 // indirect
golang.org/x/crypto v0.0.0-20190411191339-88737f569e3a // indirect
golang.org/x/net v0.0.0-20190206173232-65e2d4e15006 // indirect
golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a // indirect
golang.org/x/sys v0.0.0-20191023151326-f89234f9a2c2 // indirect
golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db // indirect
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 // indirect
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.2.2 // indirect
gopkg.in/yaml.v2 v2.2.4 // indirect
k8s.io/api v0.0.0-20190313235455-40a48860b5ab // indirect
k8s.io/apimachinery v0.0.0-20190313205120-d7deff9243b1
k8s.io/client-go v11.0.0+incompatible

32
go.sum
View File

@@ -1,8 +1,18 @@
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
github.com/Jeffail/gabs v1.2.0 h1:uFhoIVTtsX7hV2RxNgWad8gMU+8OJdzFbOathJdhD3o=
github.com/Jeffail/gabs v1.2.0/go.mod h1:6xMvQMK4k33lb7GUUpaAPh6nKMmemQeg5d4gn7/bOXc=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafoB+tBA3gMyHYHrpOtNuDiK/uB5uXxq5wM=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d h1:UQZhZ2O0vMHr2cI+DC1Mbh0TJxzA3RcLoMsFw+aXw7E=
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=
github.com/campoy/embedmd v1.0.0/go.mod h1:oxyr9RCiSXg0M3VJ3ks0UGfp98BpSSGr0kpiX3MzVl8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fatih/color v1.7.0 h1:DkWD4oS2D8LGGgTQ6IvwJJXSL5Vp2ffcQg58nFV38Ys=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/gogo/protobuf v1.1.1 h1:72R+M5VuhED/KujmZVcIquuo8mBgX4oVda//DQb3PXo=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM=
@@ -15,18 +25,32 @@ github.com/imdario/mergo v0.3.7 h1:Y+UAYTZ7gDEuOfhxKWy+dvb5dRQ6rJjFSdX2HZY1/gI=
github.com/imdario/mergo v0.3.7/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
github.com/json-iterator/go v0.0.0-20180701071628-ab8a2e0c74be h1:AHimNtVIpiBjPUhEF5KNCkrUyqTSA5zWUl8sQ2bfGBE=
github.com/json-iterator/go v0.0.0-20180701071628-ab8a2e0c74be/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/jsonnet-bundler/jsonnet-bundler v0.1.0 h1:T/HtHFr+mYCRULrH1x/RnoB0prIs0rMkolJhFMXNC9A=
github.com/jsonnet-bundler/jsonnet-bundler v0.1.0/go.mod h1:YKsSFc9VFhhLITkJS3X2PrRqWG9u2Jq99udTdDjQLfM=
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
github.com/mattn/go-colorable v0.1.4 h1:snbPLB8fVfU9iwbbo30TPtbLRzwWu6aJS6Xh4eaaviA=
github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
github.com/mattn/go-isatty v0.0.6/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.10 h1:qxFzApOv4WsAL965uUPIsXzAKCZxN2p9UqdhFS4ZW10=
github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
golang.org/x/crypto v0.0.0-20190411191339-88737f569e3a h1:Igim7XhdOpBnWPuYJ70XcNpq8q3BCACtVgNfoJxOV7g=
golang.org/x/crypto v0.0.0-20190411191339-88737f569e3a/go.mod h1:WFFai1msRO1wXaEeE5yQxYXgSfI8pQAWXbQop6sCtWE=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -37,8 +61,12 @@ golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a h1:tImsplftrFpALCYumobsd0
golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4 h1:YUO/7uOKsKeq9UokNS62b8FYywz3ker1l1vDZRCRefw=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190310054646-10058d7d4faa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e h1:nFYrTHrdrAOpShe27kaFHjsqYSEQ0KWqdWLu3xuZJts=
golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191023151326-f89234f9a2c2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db h1:6/JqlYfC1CCaLnGceQTI+sDGhC9UBSPAsBqI0Gun6kU=
golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
@@ -47,12 +75,16 @@ golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxb
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
google.golang.org/appengine v1.4.0 h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
k8s.io/api v0.0.0-20190313235455-40a48860b5ab h1:DG9A67baNpoeweOy2spF1OWHhnVY5KR7/Ek/+U1lVZc=
k8s.io/api v0.0.0-20190313235455-40a48860b5ab/go.mod h1:iuAfoD4hCxJ8Onx9kaTIt30j7jUFS00AXQi6QMi99vA=
k8s.io/apimachinery v0.0.0-20190313205120-d7deff9243b1 h1:IS7K02iBkQXpCeieSiyJjGoLSdVOv2DbPaWHJ+ZtgKg=

View File

@@ -13,13 +13,13 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
},
alertmanager+:: {
name: $._config.alertmanager.name,
name: 'main',
config: {
global: {
resolve_timeout: '5m',
},
route: {
group_by: ['job'],
group_by: ['namespace'],
group_wait: '30s',
group_interval: '5m',
repeat_interval: '12h',

View File

@@ -7,7 +7,7 @@
{
alert: 'TargetDown',
annotations: {
message: '{{ $value }}% of the {{ $labels.job }} targets are down.',
message: '{{ printf "%.4g" $value }}% of the {{ $labels.job }} targets in {{ $labels.namespace }} namespace are down.',
},
expr: '100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10',
'for': '10m',

View File

@@ -0,0 +1,50 @@
[
// Drop all kubelet metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)',
action: 'drop',
},
// Drop all scheduler metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)',
action: 'drop',
},
// Drop all apiserver metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)',
action: 'drop',
},
// Drop all docker metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)',
action: 'drop',
},
// Drop all reflector metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)',
action: 'drop',
},
// Drop all etcd metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)',
action: 'drop',
},
// Drop all transformation metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'transformation_(transformation_latencies_microseconds|failures_total)',
action: 'drop',
},
// Drop all other metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: '(admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)',
action: 'drop',
},
]

View File

@@ -38,7 +38,7 @@
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "release-0.33"
"version": "release-0.34"
},
{
"name": "etcd-mixin",

View File

@@ -0,0 +1,118 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
_config+:: {
versions+:: {
clusterVerticalAutoscaler: "v0.8.1"
},
imageRepos+:: {
clusterVerticalAutoscaler: 'gcr.io/google_containers/cpvpa-amd64'
},
kubeStateMetrics+:: {
stepCPU: '1m',
stepMemory: '2Mi',
},
},
ksmAutoscaler+:: {
clusterRole:
local clusterRole = k.rbac.v1.clusterRole;
local rulesType = clusterRole.rulesType;
local rules = [
rulesType.new() +
rulesType.withApiGroups(['']) +
rulesType.withResources([
'nodes',
]) +
rulesType.withVerbs(['list', 'watch']),
];
clusterRole.new() +
clusterRole.mixin.metadata.withName('ksm-autoscaler') +
clusterRole.withRules(rules),
clusterRoleBinding:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withName('ksm-autoscaler') +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName('ksm-autoscaler') +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'ksm-autoscaler', namespace: $._config.namespace }]),
roleBinding:
local roleBinding = k.rbac.v1.roleBinding;
roleBinding.new() +
roleBinding.mixin.metadata.withName('ksm-autoscaler') +
roleBinding.mixin.metadata.withNamespace($._config.namespace) +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withName('ksm-autoscaler') +
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'ksm-autoscaler' }]),
role:
local role = k.rbac.v1.role;
local rulesType = role.rulesType;
local extensionsRule = rulesType.new() +
rulesType.withApiGroups(['extensions']) +
rulesType.withResources([
'deployments',
]) +
rulesType.withVerbs(['patch']) +
rulesType.withResourceNames(['kube-state-metrics']);
local appsRule = rulesType.new() +
rulesType.withApiGroups(['apps']) +
rulesType.withResources([
'deployments',
]) +
rulesType.withVerbs(['patch']) +
rulesType.withResourceNames(['kube-state-metrics']);
local rules = [extensionsRule, appsRule];
role.new() +
role.mixin.metadata.withName('ksm-autoscaler') +
role.mixin.metadata.withNamespace($._config.namespace) +
role.withRules(rules),
serviceAccount:
local serviceAccount = k.core.v1.serviceAccount;
serviceAccount.new('ksm-autoscaler') +
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
deployment:
local deployment = k.apps.v1.deployment;
local container = deployment.mixin.spec.template.spec.containersType;
local podSelector = deployment.mixin.spec.template.spec.selectorType;
local podLabels = { app: 'ksm-autoscaler' };
local kubeStateMetricsAutoscaler =
container.new('ksm-autoscaler', $._config.imageRepos.clusterVerticalAutoscaler + ':' + $._config.versions.clusterVerticalAutoscaler) +
container.withArgs([
'/cpvpa',
'--target=deployment/kube-state-metrics',
'--namespace=' + $._config.namespace,
'--logtostderr=true',
'--poll-period-seconds=10',
'--default-config={"kube-state-metrics":{"requests":{"cpu":{"base":"' + $._config.kubeStateMetrics.baseCPU + '","step":"' + $._config.kubeStateMetrics.stepCPU + '","nodesPerStep":1},"memory":{"base":"' + $._config.kubeStateMetrics.baseMemory + '","step":"' + $._config.kubeStateMetrics.stepMemory + '","nodesPerStep":1}},"limits":{"cpu":{"base":"' + $._config.kubeStateMetrics.baseCPU + '","step":"' + $._config.kubeStateMetrics.stepCPU + '","nodesPerStep":1},"memory":{"base":"' + $._config.kubeStateMetrics.baseMemory + '","step":"' + $._config.kubeStateMetrics.stepMemory + '","nodesPerStep":1}}}}'
]) +
container.mixin.resources.withRequests({cpu: '20m', memory: '10Mi'});
local c = [kubeStateMetricsAutoscaler];
deployment.new('ksm-autoscaler', 1, c, podLabels) +
deployment.mixin.metadata.withNamespace($._config.namespace) +
deployment.mixin.metadata.withLabels(podLabels) +
deployment.mixin.spec.selector.withMatchLabels(podLabels) +
deployment.mixin.spec.template.spec.withNodeSelector({ 'kubernetes.io/os': 'linux' }) +
deployment.mixin.spec.template.spec.securityContext.withRunAsNonRoot(true) +
deployment.mixin.spec.template.spec.securityContext.withRunAsUser(65534) +
deployment.mixin.spec.template.spec.withServiceAccountName('ksm-autoscaler'),
},
}

View File

@@ -0,0 +1,20 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
prometheus+:: {
clusterRole+: {
rules+:
local role = k.rbac.v1.role;
local policyRule = role.rulesType;
local rule = policyRule.new() +
policyRule.withApiGroups(['']) +
policyRule.withResources([
'services',
'endpoints',
'pods',
]) +
policyRule.withVerbs(['get', 'list', 'watch']);
[rule]
},
}
}

View File

@@ -4,13 +4,13 @@ local affinity = statefulSet.mixin.spec.template.spec.affinity.podAntiAffinity.p
local matchExpression = affinity.mixin.podAffinityTerm.labelSelector.matchExpressionsType;
{
local antiaffinity(key, values) = {
local antiaffinity(key, values, namespace) = {
affinity: {
podAntiAffinity: {
preferredDuringSchedulingIgnoredDuringExecution: [
affinity.new() +
affinity.withWeight(100) +
affinity.mixin.podAffinityTerm.withNamespaces($._config.namespace) +
affinity.mixin.podAffinityTerm.withNamespaces(namespace) +
affinity.mixin.podAffinityTerm.withTopologyKey('kubernetes.io/hostname') +
affinity.mixin.podAffinityTerm.labelSelector.withMatchExpressions([
matchExpression.new() +
@@ -26,14 +26,16 @@ local matchExpression = affinity.mixin.podAffinityTerm.labelSelector.matchExpres
alertmanager+:: {
alertmanager+: {
spec+:
antiaffinity('alertmanager', [$._config.alertmanager.name]),
antiaffinity('alertmanager', [$._config.alertmanager.name], $._config.namespace),
},
},
prometheus+: {
local p = self,
prometheus+: {
spec+:
antiaffinity('prometheus', [$._config.prometheus.name]),
antiaffinity('prometheus', [p.name], p.namespace),
},
},
}

View File

@@ -0,0 +1,76 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
prometheus+: {
serviceMonitorCoreDNS+: {
spec+: {
endpoints: [
{
bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token",
interval: "15s",
targetPort: 9153
}
]
},
},
AwsEksCniMetricService:
service.new('aws-node', { 'k8s-app' : 'aws-node' } , servicePort.newNamed('cni-metrics-port', 61678, 61678)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'aws-node' }) +
service.mixin.spec.withClusterIp('None'),
serviceMonitorAwsEksCNI:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'awsekscni',
namespace: $._config.namespace,
labels: {
'k8s-app': 'eks-cni',
},
},
spec: {
jobLabel: 'k8s-app',
selector: {
matchLabels: {
'k8s-app': 'aws-node',
},
},
namespaceSelector: {
matchNames: [
'kube-system',
],
},
endpoints: [
{
port: 'cni-metrics-port',
interval: '30s',
path: '/metrics',
},
],
},
},
},
prometheusRules+: {
groups+: [
{
name: 'kube-prometheus-eks.rules',
rules: [
{
expr: 'sum by(instance) (awscni_total_ip_addresses) - sum by(instance) (awscni_assigned_ip_addresses) < 10',
labels: {
severity: 'critical',
},
annotations: {
message: 'Instance {{ $labels.instance }} has less than 10 IPs available.'
},
'for': '10m',
alert: 'EksAvailableIPs'
},
],
},
],
},
}

View File

@@ -9,6 +9,12 @@
scheme: 'http',
interval: '30s',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path'
},
],
},
{
port: 'http-metrics',
@@ -17,6 +23,21 @@
interval: '30s',
honorLabels: true,
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path'
},
],
metricRelabelings: [
// Drop a bunch of metrics which are disabled but still sent, see
// https://github.com/google/cadvisor/issues/1925.
{
sourceLabels: ['__name__'],
regex: 'container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)',
action: 'drop',
},
],
},
],
},

View File

@@ -1,6 +1,5 @@
// On managed Kubernetes clusters some of the control plane components are not exposed to customers.
// Disable scrape jobs and service monitors for these components by overwriting 'kube-prometheus.libsonnet' defaults
// Note this doesn't disable generation of associated alerting rules but the rules don't trigger
// Disable scrape jobs, service monitors, and alert groups for these components by overwriting 'kube-prometheus.libsonnet' defaults
{
_config+:: {
@@ -12,6 +11,18 @@
for k in std.objectFields(j)
if !std.setMember(k, ['KubeControllerManager', 'KubeScheduler'])
},
// Skip alerting rules too
prometheus+:: {
rules+:: {
local g = super.groups,
groups: [
h
for h in g
if !std.setMember(h.name, ['kubernetes-system-controller-manager', 'kubernetes-system-scheduler'])
],
},
},
},
// Same as above but for ServiceMonitor's
@@ -21,8 +32,4 @@
for q in std.objectFields(p)
if !std.setMember(q, ['serviceMonitorKubeControllerManager', 'serviceMonitorKubeScheduler'])
},
// TODO: disable generationg of alerting rules
// manifests/prometheus-rules.yaml:52: - name: kube-scheduler.rules
}

View File

@@ -9,6 +9,9 @@
'kube-rbac-proxy'+: {
limits: {},
},
'kube-state-metrics'+: {
limits: {},
},
'node-exporter'+: {
limits: {},
},

View File

@@ -46,7 +46,7 @@ local configMapList = k3.core.v1.configMapList;
namespace: 'default',
versions+:: {
grafana: '6.2.2',
grafana: '6.4.3',
},
tlsCipherSuites: [
@@ -78,8 +78,8 @@ local configMapList = k3.core.v1.configMapList;
// 'TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305', // TODO: Might not work with h2
],
cadvisorSelector: 'job="kubelet"',
kubeletSelector: 'job="kubelet"',
cadvisorSelector: 'job="kubelet", metrics_path="/metrics/cadvisor"',
kubeletSelector: 'job="kubelet", metrics_path="/metrics"',
kubeStateMetricsSelector: 'job="kube-state-metrics"',
nodeExporterSelector: 'job="node-exporter"',
notKubeDnsSelector: 'job!="kube-dns"',
@@ -116,6 +116,10 @@ local configMapList = k3.core.v1.configMapList;
requests: { cpu: '10m', memory: '20Mi' },
limits: { cpu: '20m', memory: '40Mi' },
},
'kube-state-metrics': {
requests: { cpu: '100m', memory: '150Mi' },
limits: { cpu: '100m', memory: '150Mi' },
},
'node-exporter': {
requests: { cpu: '102m', memory: '180Mi' },
limits: { cpu: '250m', memory: '180Mi' },

View File

@@ -8,23 +8,16 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
collectors: '', // empty string gets a default set
scrapeInterval: '30s',
scrapeTimeout: '30s',
baseCPU: '100m',
baseMemory: '150Mi',
cpuPerNode: '2m',
memoryPerNode: '30Mi',
},
versions+:: {
kubeStateMetrics: 'v1.7.2',
kubeStateMetrics: 'v1.9.2',
kubeRbacProxy: 'v0.4.1',
addonResizer: '1.8.4',
},
imageRepos+:: {
kubeStateMetrics: 'quay.io/coreos/kube-state-metrics',
kubeRbacProxy: 'quay.io/coreos/kube-rbac-proxy',
addonResizer: 'k8s.gcr.io/addon-resizer',
},
},
@@ -129,6 +122,22 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
rulesType.withApiGroups(['storage.k8s.io']) +
rulesType.withResources([
'storageclasses',
'volumeattachments',
]) +
rulesType.withVerbs(['list', 'watch']),
rulesType.new() +
rulesType.withApiGroups(['admissionregistration.k8s.io']) +
rulesType.withResources([
'validatingwebhookconfigurations',
'mutatingwebhookconfigurations',
]) +
rulesType.withVerbs(['list', 'watch']),
rulesType.new() +
rulesType.withApiGroups(['networking.k8s.io']) +
rulesType.withResources([
'networkpolicies',
]) +
rulesType.withVerbs(['list', 'watch']),
];
@@ -178,39 +187,10 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
'--telemetry-host=127.0.0.1',
'--telemetry-port=8082',
] + if $._config.kubeStateMetrics.collectors != '' then ['--collectors=' + $._config.kubeStateMetrics.collectors] else []) +
container.mixin.resources.withRequests({ cpu: $._config.kubeStateMetrics.baseCPU, memory: $._config.kubeStateMetrics.baseMemory }) +
container.mixin.resources.withLimits({ cpu: $._config.kubeStateMetrics.baseCPU, memory: $._config.kubeStateMetrics.baseMemory });
container.mixin.resources.withRequests($._config.resources['kube-state-metrics'].requests) +
container.mixin.resources.withLimits($._config.resources['kube-state-metrics'].limits);
local addonResizer =
container.new('addon-resizer', $._config.imageRepos.addonResizer + ':' + $._config.versions.addonResizer) +
container.withCommand([
'/pod_nanny',
'--container=kube-state-metrics',
'--cpu=' + $._config.kubeStateMetrics.baseCPU,
'--extra-cpu=' + $._config.kubeStateMetrics.cpuPerNode,
'--memory=' + $._config.kubeStateMetrics.baseMemory,
'--extra-memory=' + $._config.kubeStateMetrics.memoryPerNode,
'--threshold=5',
'--deployment=kube-state-metrics',
]) +
container.withEnv([
{
name: 'MY_POD_NAME',
valueFrom: {
fieldRef: { apiVersion: 'v1', fieldPath: 'metadata.name' },
},
},
{
name: 'MY_POD_NAMESPACE',
valueFrom: {
fieldRef: { apiVersion: 'v1', fieldPath: 'metadata.namespace' },
},
},
]) +
container.mixin.resources.withRequests($._config.resources['addon-resizer'].requests) +
container.mixin.resources.withLimits($._config.resources['addon-resizer'].limits);
local c = [proxyClusterMetrics, proxySelfMetrics, kubeStateMetrics, addonResizer];
local c = [proxyClusterMetrics, proxySelfMetrics, kubeStateMetrics];
deployment.new('kube-state-metrics', 1, c, podLabels) +
deployment.mixin.metadata.withNamespace($._config.namespace) +
@@ -310,6 +290,12 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
scrapeTimeout: $._config.kubeStateMetrics.scrapeTimeout,
honorLabels: true,
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
regex: '(pod|service|endpoint|namespace)',
action: 'labeldrop',
},
],
tlsConfig: {
insecureSkipVerify: true,
},

View File

@@ -89,7 +89,8 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
'--path.procfs=/host/proc',
'--path.sysfs=/host/sys',
'--path.rootfs=/host/root',
'--no-collector.wifi',
'--no-collector.hwmon',
// The following settings have been taken from
// https://github.com/prometheus/node_exporter/blob/0662673/collector/filesystem_linux.go#L30-L31
// Once node exporter is being released with those settings, this can be removed.
@@ -105,7 +106,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
container.new('kube-rbac-proxy', $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy) +
container.withArgs([
'--logtostderr',
'--secure-listen-address=$(IP):' + $._config.nodeExporter.port,
'--secure-listen-address=[$(IP)]:' + $._config.nodeExporter.port,
'--tls-cipher-suites=' + std.join(',', $._config.tlsCipherSuites),
'--upstream=http://127.0.0.1:' + $._config.nodeExporter.port + '/',
]) +
@@ -118,8 +119,8 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
// forgo declaring the host port, however it is important to declare
// it so that the scheduler can decide if the pod is schedulable.
container.withPorts(containerPort.new($._config.nodeExporter.port) + containerPort.withHostPort($._config.nodeExporter.port) + containerPort.withName('https')) +
container.mixin.resources.withRequests({ cpu: '10m', memory: '20Mi' }) +
container.mixin.resources.withLimits({ cpu: '20m', memory: '60Mi' }) +
container.mixin.resources.withRequests($._config.resources['kube-rbac-proxy'].requests) +
container.mixin.resources.withLimits($._config.resources['kube-rbac-proxy'].limits) +
container.withEnv([ip]);
local c = [nodeExporter, proxy];
@@ -168,13 +169,13 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
port: 'https',
scheme: 'https',
interval: '30s',
interval: '15s',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
action: 'replace',
regex: '(.*)',
replacment: '$1',
replacement: '$1',
sourceLabels: ['__meta_kubernetes_pod_node_name'],
targetLabel: 'instance',
},

View File

@@ -5,7 +5,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
namespace: 'default',
versions+:: {
prometheusAdapter: 'v0.4.1',
prometheusAdapter: 'v0.5.0',
},
imageRepos+:: {
@@ -19,19 +19,19 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
config: |||
resourceRules:
cpu:
containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container_name!="POD",container_name!="",pod_name!=""}[1m])) by (<<.GroupBy>>)
nodeQuery: sum(1 - rate(node_cpu_seconds_total{mode="idle"}[1m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)
containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}[5m])) by (<<.GroupBy>>)
nodeQuery: sum(1 - rate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)
resources:
overrides:
node:
resource: node
namespace:
resource: namespace
pod_name:
pod:
resource: pod
containerLabel: container_name
containerLabel: container
memory:
containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container_name!="POD",container_name!="",pod_name!=""}) by (<<.GroupBy>>)
containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>)
nodeQuery: sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>)
resources:
overrides:
@@ -39,10 +39,10 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
resource: node
namespace:
resource: namespace
pod_name:
pod:
resource: pod
containerLabel: container_name
window: 1m
containerLabel: container
window: 5m
|||,
},
},
@@ -191,7 +191,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local rules =
policyRule.new() +
policyRule.withApiGroups(['metrics.k8s.io']) +
policyRule.withResources(['pods']) +
policyRule.withResources(['pods', 'nodes']) +
policyRule.withVerbs(['get','list','watch']);
clusterRole.new() +

View File

@@ -21,57 +21,67 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
name: 'k8s',
replicas: 2,
rules: {},
renderedRules: {},
namespaces: ['default', 'kube-system', $._config.namespace],
},
},
prometheus+:: {
local p = self,
name:: $._config.prometheus.name,
namespace:: $._config.namespace,
roleBindingNamespaces:: $._config.prometheus.namespaces,
replicas:: $._config.prometheus.replicas,
prometheusRules:: $._config.prometheus.rules,
alertmanagerName:: $.alertmanager.service.metadata.name,
serviceAccount:
local serviceAccount = k.core.v1.serviceAccount;
serviceAccount.new('prometheus-' + $._config.prometheus.name) +
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
serviceAccount.new('prometheus-' + p.name) +
serviceAccount.mixin.metadata.withNamespace(p.namespace),
service:
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local prometheusPort = servicePort.newNamed('web', 9090, 'web');
service.new('prometheus-' + $._config.prometheus.name, { app: 'prometheus', prometheus: $._config.prometheus.name }, prometheusPort) +
service.new('prometheus-' + p.name, { app: 'prometheus', prometheus: p.name }, prometheusPort) +
service.mixin.spec.withSessionAffinity('ClientIP') +
service.mixin.metadata.withNamespace($._config.namespace) +
service.mixin.metadata.withLabels({ prometheus: $._config.prometheus.name }),
[if $._config.prometheus.rules != null && $._config.prometheus.rules != {} then 'rules']:
service.mixin.metadata.withNamespace(p.namespace) +
service.mixin.metadata.withLabels({ prometheus: p.name }),
rules:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'PrometheusRule',
metadata: {
labels: {
prometheus: $._config.prometheus.name,
prometheus: p.name,
role: 'alert-rules',
},
name: 'prometheus-' + $._config.prometheus.name + '-rules',
namespace: $._config.namespace,
name: 'prometheus-' + p.name + '-rules',
namespace: p.namespace,
},
spec: {
groups: $._config.prometheus.rules.groups,
groups: p.prometheusRules.groups,
},
},
roleBindingSpecificNamespaces:
local roleBinding = k.rbac.v1.roleBinding;
local newSpecificRoleBinding(namespace) =
roleBinding.new() +
roleBinding.mixin.metadata.withName('prometheus-' + $._config.prometheus.name) +
roleBinding.mixin.metadata.withName('prometheus-' + p.name) +
roleBinding.mixin.metadata.withNamespace(namespace) +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withName('prometheus-' + $._config.prometheus.name) +
roleBinding.mixin.roleRef.withName('prometheus-' + p.name) +
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + $._config.prometheus.name, namespace: $._config.namespace }]);
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + p.name, namespace: p.namespace }]);
local roleBindingList = k3.rbac.v1.roleBindingList;
roleBindingList.new([newSpecificRoleBinding(x) for x in $._config.prometheus.namespaces]),
roleBindingList.new([newSpecificRoleBinding(x) for x in p.roleBindingNamespaces]),
clusterRole:
local clusterRole = k.rbac.v1.clusterRole;
local policyRule = clusterRole.rulesType;
@@ -88,7 +98,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local rules = [nodeMetricsRule, metricsRule];
clusterRole.new() +
clusterRole.mixin.metadata.withName('prometheus-' + $._config.prometheus.name) +
clusterRole.mixin.metadata.withName('prometheus-' + p.name) +
clusterRole.withRules(rules),
roleConfig:
local role = k.rbac.v1.role;
@@ -102,28 +112,28 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
policyRule.withVerbs(['get']);
role.new() +
role.mixin.metadata.withName('prometheus-' + $._config.prometheus.name + '-config') +
role.mixin.metadata.withNamespace($._config.namespace) +
role.mixin.metadata.withName('prometheus-' + p.name + '-config') +
role.mixin.metadata.withNamespace(p.namespace) +
role.withRules(configmapRule),
roleBindingConfig:
local roleBinding = k.rbac.v1.roleBinding;
roleBinding.new() +
roleBinding.mixin.metadata.withName('prometheus-' + $._config.prometheus.name + '-config') +
roleBinding.mixin.metadata.withNamespace($._config.namespace) +
roleBinding.mixin.metadata.withName('prometheus-' + p.name + '-config') +
roleBinding.mixin.metadata.withNamespace(p.namespace) +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withName('prometheus-' + $._config.prometheus.name + '-config') +
roleBinding.mixin.roleRef.withName('prometheus-' + p.name + '-config') +
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + $._config.prometheus.name, namespace: $._config.namespace }]),
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + p.name, namespace: p.namespace }]),
clusterRoleBinding:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withName('prometheus-' + $._config.prometheus.name) +
clusterRoleBinding.mixin.metadata.withName('prometheus-' + p.name) +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName('prometheus-' + $._config.prometheus.name) +
clusterRoleBinding.mixin.roleRef.withName('prometheus-' + p.name) +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + $._config.prometheus.name, namespace: $._config.namespace }]),
clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + p.name, namespace: p.namespace }]),
roleSpecificNamespaces:
local role = k.rbac.v1.role;
local policyRule = role.rulesType;
@@ -138,18 +148,19 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local newSpecificRole(namespace) =
role.new() +
role.mixin.metadata.withName('prometheus-' + $._config.prometheus.name) +
role.mixin.metadata.withName('prometheus-' + p.name) +
role.mixin.metadata.withNamespace(namespace) +
role.withRules(coreRule);
local roleList = k3.rbac.v1.roleList;
roleList.new([newSpecificRole(x) for x in $._config.prometheus.namespaces]),
roleList.new([newSpecificRole(x) for x in p.roleBindingNamespaces]),
prometheus:
local statefulSet = k.apps.v1.statefulSet;
local container = statefulSet.mixin.spec.template.spec.containersType;
local resourceRequirements = container.mixin.resourcesType;
local selector = statefulSet.mixin.spec.selectorType;
local resources =
resourceRequirements.new() +
resourceRequirements.withRequests({ memory: '400Mi' });
@@ -158,31 +169,32 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
apiVersion: 'monitoring.coreos.com/v1',
kind: 'Prometheus',
metadata: {
name: $._config.prometheus.name,
namespace: $._config.namespace,
name: p.name,
namespace: p.namespace,
labels: {
prometheus: $._config.prometheus.name,
prometheus: p.name,
},
},
spec: {
replicas: $._config.prometheus.replicas,
replicas: p.replicas,
version: $._config.versions.prometheus,
baseImage: $._config.imageRepos.prometheus,
serviceAccountName: 'prometheus-' + $._config.prometheus.name,
serviceAccountName: 'prometheus-' + p.name,
serviceMonitorSelector: {},
podMonitorSelector: {},
serviceMonitorNamespaceSelector: {},
podMonitorNamespaceSelector: {},
nodeSelector: { 'kubernetes.io/os': 'linux' },
ruleSelector: selector.withMatchLabels({
role: 'alert-rules',
prometheus: $._config.prometheus.name,
prometheus: p.name,
}),
resources: resources,
alerting: {
alertmanagers: [
{
namespace: $._config.namespace,
name: 'alertmanager-' + $._config.alertmanager.name,
namespace: p.namespace,
name: p.alertmanagerName,
port: 'web',
},
],
@@ -200,7 +212,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
kind: 'ServiceMonitor',
metadata: {
name: 'prometheus',
namespace: $._config.namespace,
namespace: p.namespace,
labels: {
'k8s-app': 'prometheus',
},
@@ -208,7 +220,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
spec: {
selector: {
matchLabels: {
prometheus: $._config.prometheus.name,
prometheus: p.name,
},
},
endpoints: [
@@ -225,7 +237,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
kind: 'ServiceMonitor',
metadata: {
name: 'kube-scheduler',
namespace: $._config.namespace,
namespace: p.namespace,
labels: {
'k8s-app': 'kube-scheduler',
},
@@ -256,7 +268,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
kind: 'ServiceMonitor',
metadata: {
name: 'kubelet',
namespace: $._config.namespace,
namespace: p.namespace,
labels: {
'k8s-app': 'kubelet',
},
@@ -273,6 +285,13 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
insecureSkipVerify: true,
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
metricRelabelings: (import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet'),
relabelings: [
{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path',
},
],
},
{
port: 'https-metrics',
@@ -284,6 +303,12 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
insecureSkipVerify: true,
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path',
},
],
metricRelabelings: [
// Drop a bunch of metrics which are disabled but still sent, see
// https://github.com/google/cadvisor/issues/1925.
@@ -313,7 +338,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
kind: 'ServiceMonitor',
metadata: {
name: 'kube-controller-manager',
namespace: $._config.namespace,
namespace: p.namespace,
labels: {
'k8s-app': 'kube-controller-manager',
},
@@ -324,7 +349,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
port: 'http-metrics',
interval: '30s',
metricRelabelings: [
metricRelabelings: (import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet') + [
{
sourceLabels: ['__name__'],
regex: 'etcd_(debugging|disk|request|server).*',
@@ -351,7 +376,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
kind: 'ServiceMonitor',
metadata: {
name: 'kube-apiserver',
namespace: $._config.namespace,
namespace: p.namespace,
labels: {
'k8s-app': 'apiserver',
},
@@ -379,7 +404,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
serverName: 'kubernetes',
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
metricRelabelings: [
metricRelabelings: (import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet') + [
{
sourceLabels: ['__name__'],
regex: 'etcd_(debugging|disk|request|server).*',
@@ -395,6 +420,11 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
regex: 'apiserver_admission_step_admission_latencies_seconds_.*',
action: 'drop',
},
{
sourceLabels: ['__name__', 'le'],
regex: 'apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)',
action: 'drop',
},
],
},
],
@@ -406,7 +436,7 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
kind: 'ServiceMonitor',
metadata: {
name: 'coredns',
namespace: $._config.namespace,
namespace: p.namespace,
labels: {
'k8s-app': 'coredns',
},

View File

@@ -0,0 +1,19 @@
{
prometheusRules+:: {
groups+: [
{
name: 'kube-prometheus-general.rules',
rules: [
{
expr: 'count without(instance, pod, node) (up == 1)',
record: 'count:up1',
},
{
expr: 'count without(instance, pod, node) (up == 0)',
record: 'count:up0',
},
],
},
],
},
}

View File

@@ -1 +1,2 @@
(import 'node-rules.libsonnet')
(import 'node-rules.libsonnet') +
(import 'general.libsonnet')

View File

@@ -1,13 +1,13 @@
{
"dependencies": [
{
"name": "kube-prometheus",
"source": {
"local": {
"directory": "jsonnet/kube-prometheus"
}
},
"version": ""
"dependencies": [
{
"name": "kube-prometheus",
"source": {
"local": {
"directory": "jsonnet/kube-prometheus"
}
]
},
"version": ""
}
]
}

View File

@@ -1,113 +1,134 @@
{
"dependencies": [
{
"name": "kube-prometheus",
"source": {
"local": {
"directory": "jsonnet/kube-prometheus"
}
},
"version": ""
},
{
"name": "ksonnet",
"source": {
"git": {
"remote": "https://github.com/ksonnet/ksonnet-lib",
"subdir": ""
}
},
"version": "0d2f82676817bbf9e4acf6495b2090205f323b9f"
},
{
"name": "kubernetes-mixin",
"source": {
"git": {
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
"subdir": ""
}
},
"version": "e3d6d8ebb1789af0e17fb1f60171aaf64926a3a1"
},
{
"name": "grafonnet",
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib",
"subdir": "grafonnet"
}
},
"version": "69bc267211790a1c3f4ea6e6211f3e8ffe22f987"
},
{
"name": "grafana-builder",
"source": {
"git": {
"remote": "https://github.com/kausalco/public",
"subdir": "grafana-builder"
}
},
"version": "3c44dfa9bfe2b66985733d4b16e0afd29094b4a0"
},
{
"name": "grafana",
"source": {
"git": {
"remote": "https://github.com/brancz/kubernetes-grafana",
"subdir": "grafana"
}
},
"version": "c27d2792764867cdaf6484f067cc875cb8aef2f6"
},
{
"name": "prometheus-operator",
"source": {
"git": {
"remote": "https://github.com/coreos/prometheus-operator",
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "908ee0372a9ac2c6574d589fdc56a4f3cb5f12d1"
},
{
"name": "etcd-mixin",
"source": {
"git": {
"remote": "https://github.com/coreos/etcd",
"subdir": "Documentation/etcd-mixin"
}
},
"version": "7948f39790fbbc979729ca6f990740a20d4a2a76"
},
{
"name": "prometheus",
"source": {
"git": {
"remote": "https://github.com/prometheus/prometheus",
"subdir": "documentation/prometheus-mixin"
}
},
"version": "3638e4ab18ac320c3ed0b607f07aea309dadee45"
},
{
"name": "node-mixin",
"source": {
"git": {
"remote": "https://github.com/prometheus/node_exporter",
"subdir": "docs/node-mixin"
}
},
"version": "e7c2dbed4e0278731b59e9870eb9a9d046047aa8"
},
{
"name": "promgrafonnet",
"source": {
"git": {
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
"subdir": "lib/promgrafonnet"
}
},
"version": "24ea0d6e33a415e07ec7b675d74dea3cf01fde73"
"dependencies": [
{
"name": "etcd-mixin",
"source": {
"git": {
"remote": "https://github.com/coreos/etcd",
"subdir": "Documentation/etcd-mixin"
}
]
},
"version": "5770a6d286fe48682e29b54ce0df37e7d24b3280",
"sum": "Ko3qhNfC2vN/houLh6C0Ryacjv70gl0DVPGU/PQ4OD0="
},
{
"name": "grafana",
"source": {
"git": {
"remote": "https://github.com/brancz/kubernetes-grafana",
"subdir": "grafana"
}
},
"version": "539a90dbf63c812ad0194d8078dd776868a11c81",
"sum": "b8faWX1qqLGyN67sA36oRqYZ5HX+tHBRMPtrWRqIysE="
},
{
"name": "grafana-builder",
"source": {
"git": {
"remote": "https://github.com/grafana/jsonnet-libs",
"subdir": "grafana-builder"
}
},
"version": "676ff4b4fe9135f85a5d6e30523d64d2d3713087",
"sum": "ELsYwK+kGdzX1mee2Yy+/b2mdO4Y503BOCDkFzwmGbE="
},
{
"name": "grafonnet",
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib",
"subdir": "grafonnet"
}
},
"version": "f3ee1d810858cf556d25f045b53cb0f1fd10b94e",
"sum": "14YBZUP/cl8qi9u86xiuUS4eXQrEAam+4GSg6i9n9Ys="
},
{
"name": "ksonnet",
"source": {
"git": {
"remote": "https://github.com/ksonnet/ksonnet-lib",
"subdir": ""
}
},
"version": "0d2f82676817bbf9e4acf6495b2090205f323b9f",
"sum": "h28BXZ7+vczxYJ2sCt8JuR9+yznRtU/iA6DCpQUrtEg="
},
{
"name": "kube-prometheus",
"source": {
"local": {
"directory": "jsonnet/kube-prometheus"
}
},
"version": ""
},
{
"name": "kubernetes-mixin",
"source": {
"git": {
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
"subdir": ""
}
},
"version": "68f82d2a428d91df57e9af43739981a6a8ede897",
"sum": "J/tuXi0Z8GRHo63pM17YFIyk4QgkFuMcQ20mAxi1flM="
},
{
"name": "node-mixin",
"source": {
"git": {
"remote": "https://github.com/prometheus/node_exporter",
"subdir": "docs/node-mixin"
}
},
"version": "2cae917bb7e0b6379221e8a24da012b16e63d661",
"sum": "7vEamDTP9AApeiF4Zu9ZyXzDIs3rYHzwf9k7g8X+wsg="
},
{
"name": "prometheus",
"source": {
"git": {
"remote": "https://github.com/prometheus/prometheus",
"subdir": "documentation/prometheus-mixin"
}
},
"version": "31700a05df64c2b4e32bb0ecd8baa25279144778",
"sum": "/cohvDTaIiLElG66tKeQsi4v1M9mlGDKjOBSWivL9TU="
},
{
"name": "prometheus-operator",
"source": {
"git": {
"remote": "https://github.com/coreos/prometheus-operator",
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "8d44e0990230144177f97cf62ae4f43b1c4e3168",
"sum": "5U7/8MD3pF9O0YDTtUhg4vctkUBRVFxZxWUyhtNiBM8="
},
{
"name": "promgrafonnet",
"source": {
"git": {
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
"subdir": "lib/promgrafonnet"
}
},
"version": "a7ee9d1abe1b1a3670a02ede1135cadb660b9d0c",
"sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc="
},
{
"name": "slo-libsonnet",
"source": {
"git": {
"remote": "https://github.com/metalmatze/slo-libsonnet",
"subdir": "slo-libsonnet"
}
},
"version": "437c402c5f3ad86c3c16db8471f1649284fef0ee",
"sum": "2Zcyku1f558VrUpMaJnI78fahDksPLcS1idmxxwcQ7Q="
}
]
}

View File

@@ -1,18 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./manifests/00namespace-namespace.yaml
- ./manifests/0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml
- ./manifests/0prometheus-operator-0podmonitorCustomResourceDefinition.yaml
- ./manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml
- ./manifests/0prometheus-operator-0prometheusruleCustomResourceDefinition.yaml
- ./manifests/0prometheus-operator-0servicemonitorCustomResourceDefinition.yaml
- ./manifests/0prometheus-operator-clusterRole.yaml
- ./manifests/0prometheus-operator-clusterRoleBinding.yaml
- ./manifests/0prometheus-operator-deployment.yaml
- ./manifests/0prometheus-operator-service.yaml
- ./manifests/0prometheus-operator-serviceAccount.yaml
- ./manifests/0prometheus-operator-serviceMonitor.yaml
- ./manifests/alertmanager-alertmanager.yaml
- ./manifests/alertmanager-secret.yaml
- ./manifests/alertmanager-service.yaml
@@ -52,6 +40,7 @@ resources:
- ./manifests/prometheus-adapter-serviceAccount.yaml
- ./manifests/prometheus-clusterRole.yaml
- ./manifests/prometheus-clusterRoleBinding.yaml
- ./manifests/prometheus-operator-serviceMonitor.yaml
- ./manifests/prometheus-prometheus.yaml
- ./manifests/prometheus-roleBindingConfig.yaml
- ./manifests/prometheus-roleBindingSpecificNamespaces.yaml
@@ -66,3 +55,14 @@ resources:
- ./manifests/prometheus-serviceMonitorKubeControllerManager.yaml
- ./manifests/prometheus-serviceMonitorKubeScheduler.yaml
- ./manifests/prometheus-serviceMonitorKubelet.yaml
- ./manifests/setup/0namespace-namespace.yaml
- ./manifests/setup/prometheus-operator-0alertmanagerCustomResourceDefinition.yaml
- ./manifests/setup/prometheus-operator-0podmonitorCustomResourceDefinition.yaml
- ./manifests/setup/prometheus-operator-0prometheusCustomResourceDefinition.yaml
- ./manifests/setup/prometheus-operator-0prometheusruleCustomResourceDefinition.yaml
- ./manifests/setup/prometheus-operator-0servicemonitorCustomResourceDefinition.yaml
- ./manifests/setup/prometheus-operator-clusterRole.yaml
- ./manifests/setup/prometheus-operator-clusterRoleBinding.yaml
- ./manifests/setup/prometheus-operator-deployment.yaml
- ./manifests/setup/prometheus-operator-service.yaml
- ./manifests/setup/prometheus-operator-serviceAccount.yaml

View File

@@ -1,6 +1,6 @@
apiVersion: v1
data:
alertmanager.yaml: Imdsb2JhbCI6CiAgInJlc29sdmVfdGltZW91dCI6ICI1bSIKInJlY2VpdmVycyI6Ci0gIm5hbWUiOiAibnVsbCIKInJvdXRlIjoKICAiZ3JvdXBfYnkiOgogIC0gImpvYiIKICAiZ3JvdXBfaW50ZXJ2YWwiOiAiNW0iCiAgImdyb3VwX3dhaXQiOiAiMzBzIgogICJyZWNlaXZlciI6ICJudWxsIgogICJyZXBlYXRfaW50ZXJ2YWwiOiAiMTJoIgogICJyb3V0ZXMiOgogIC0gIm1hdGNoIjoKICAgICAgImFsZXJ0bmFtZSI6ICJXYXRjaGRvZyIKICAgICJyZWNlaXZlciI6ICJudWxsIg==
alertmanager.yaml: Imdsb2JhbCI6CiAgInJlc29sdmVfdGltZW91dCI6ICI1bSIKInJlY2VpdmVycyI6Ci0gIm5hbWUiOiAibnVsbCIKInJvdXRlIjoKICAiZ3JvdXBfYnkiOgogIC0gIm5hbWVzcGFjZSIKICAiZ3JvdXBfaW50ZXJ2YWwiOiAiNW0iCiAgImdyb3VwX3dhaXQiOiAiMzBzIgogICJyZWNlaXZlciI6ICJudWxsIgogICJyZXBlYXRfaW50ZXJ2YWwiOiAiMTJoIgogICJyb3V0ZXMiOgogIC0gIm1hdGNoIjoKICAgICAgImFsZXJ0bmFtZSI6ICJXYXRjaGRvZyIKICAgICJyZWNlaXZlciI6ICJudWxsIg==
kind: Secret
metadata:
name: alertmanager-main

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,4 @@
apiVersion: apps/v1beta2
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
@@ -16,7 +16,7 @@ spec:
app: grafana
spec:
containers:
- image: grafana/grafana:6.2.2
- image: grafana/grafana:6.4.3
name: grafana
ports:
- containerPort: 3000
@@ -45,6 +45,9 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/apiserver
name: grafana-dashboard-apiserver
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/cluster-total
name: grafana-dashboard-cluster-total
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/controller-manager
name: grafana-dashboard-controller-manager
readOnly: false
@@ -54,6 +57,9 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-namespace
name: grafana-dashboard-k8s-resources-namespace
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-node
name: grafana-dashboard-k8s-resources-node
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-pod
name: grafana-dashboard-k8s-resources-pod
readOnly: false
@@ -66,6 +72,12 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/kubelet
name: grafana-dashboard-kubelet
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/namespace-by-pod
name: grafana-dashboard-namespace-by-pod
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/namespace-by-workload
name: grafana-dashboard-namespace-by-workload
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/node-cluster-rsrc-use
name: grafana-dashboard-node-cluster-rsrc-use
readOnly: false
@@ -78,6 +90,9 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/persistentvolumesusage
name: grafana-dashboard-persistentvolumesusage
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/pod-total
name: grafana-dashboard-pod-total
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/pods
name: grafana-dashboard-pods
readOnly: false
@@ -96,6 +111,9 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/statefulset
name: grafana-dashboard-statefulset
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/workload-total
name: grafana-dashboard-workload-total
readOnly: false
nodeSelector:
beta.kubernetes.io/os: linux
securityContext:
@@ -114,6 +132,9 @@ spec:
- configMap:
name: grafana-dashboard-apiserver
name: grafana-dashboard-apiserver
- configMap:
name: grafana-dashboard-cluster-total
name: grafana-dashboard-cluster-total
- configMap:
name: grafana-dashboard-controller-manager
name: grafana-dashboard-controller-manager
@@ -123,6 +144,9 @@ spec:
- configMap:
name: grafana-dashboard-k8s-resources-namespace
name: grafana-dashboard-k8s-resources-namespace
- configMap:
name: grafana-dashboard-k8s-resources-node
name: grafana-dashboard-k8s-resources-node
- configMap:
name: grafana-dashboard-k8s-resources-pod
name: grafana-dashboard-k8s-resources-pod
@@ -135,6 +159,12 @@ spec:
- configMap:
name: grafana-dashboard-kubelet
name: grafana-dashboard-kubelet
- configMap:
name: grafana-dashboard-namespace-by-pod
name: grafana-dashboard-namespace-by-pod
- configMap:
name: grafana-dashboard-namespace-by-workload
name: grafana-dashboard-namespace-by-workload
- configMap:
name: grafana-dashboard-node-cluster-rsrc-use
name: grafana-dashboard-node-cluster-rsrc-use
@@ -147,6 +177,9 @@ spec:
- configMap:
name: grafana-dashboard-persistentvolumesusage
name: grafana-dashboard-persistentvolumesusage
- configMap:
name: grafana-dashboard-pod-total
name: grafana-dashboard-pod-total
- configMap:
name: grafana-dashboard-pods
name: grafana-dashboard-pods
@@ -165,3 +198,6 @@ spec:
- configMap:
name: grafana-dashboard-statefulset
name: grafana-dashboard-statefulset
- configMap:
name: grafana-dashboard-workload-total
name: grafana-dashboard-workload-total

View File

@@ -86,6 +86,22 @@ rules:
- storage.k8s.io
resources:
- storageclasses
- volumeattachments
verbs:
- list
- watch
- apiGroups:
- admissionregistration.k8s.io
resources:
- validatingwebhookconfigurations
- mutatingwebhookconfigurations
verbs:
- list
- watch
- apiGroups:
- networking.k8s.io
resources:
- networkpolicies
verbs:
- list
- watch

View File

@@ -55,7 +55,7 @@ spec:
- --port=8081
- --telemetry-host=127.0.0.1
- --telemetry-port=8082
image: quay.io/coreos/kube-state-metrics:v1.7.2
image: quay.io/coreos/kube-state-metrics:v1.9.2
name: kube-state-metrics
resources:
limits:
@@ -64,35 +64,6 @@ spec:
requests:
cpu: 100m
memory: 150Mi
- command:
- /pod_nanny
- --container=kube-state-metrics
- --cpu=100m
- --extra-cpu=2m
- --memory=150Mi
- --extra-memory=30Mi
- --threshold=5
- --deployment=kube-state-metrics
env:
- name: MY_POD_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
- name: MY_POD_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
image: k8s.gcr.io/addon-resizer:1.8.4
name: addon-resizer
resources:
limits:
cpu: 50m
memory: 30Mi
requests:
cpu: 10m
memory: 30Mi
nodeSelector:
kubernetes.io/os: linux
securityContext:

View File

@@ -11,6 +11,9 @@ spec:
honorLabels: true
interval: 30s
port: https-main
relabelings:
- action: labeldrop
regex: (pod|service|endpoint|namespace)
scheme: https
scrapeTimeout: 30s
tlsConfig:

View File

@@ -20,6 +20,8 @@ spec:
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
- --path.rootfs=/host/root
- --no-collector.wifi
- --no-collector.hwmon
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/)
- --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$
image: quay.io/prometheus/node-exporter:v0.18.1
@@ -44,7 +46,7 @@ spec:
readOnly: true
- args:
- --logtostderr
- --secure-listen-address=$(IP):9100
- --secure-listen-address=[$(IP)]:9100
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
- --upstream=http://127.0.0.1:9100/
env:
@@ -61,7 +63,7 @@ spec:
resources:
limits:
cpu: 20m
memory: 60Mi
memory: 40Mi
requests:
cpu: 10m
memory: 20Mi

View File

@@ -8,12 +8,12 @@ metadata:
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 30s
interval: 15s
port: https
relabelings:
- action: replace
regex: (.*)
replacment: $1
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: instance

View File

@@ -11,6 +11,7 @@ rules:
- metrics.k8s.io
resources:
- pods
- nodes
verbs:
- get
- list

View File

@@ -3,19 +3,19 @@ data:
config.yaml: |
resourceRules:
cpu:
containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container_name!="POD",container_name!="",pod_name!=""}[1m])) by (<<.GroupBy>>)
nodeQuery: sum(1 - rate(node_cpu_seconds_total{mode="idle"}[1m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)
containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}[5m])) by (<<.GroupBy>>)
nodeQuery: sum(1 - rate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)
resources:
overrides:
node:
resource: node
namespace:
resource: namespace
pod_name:
pod:
resource: pod
containerLabel: container_name
containerLabel: container
memory:
containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container_name!="POD",container_name!="",pod_name!=""}) by (<<.GroupBy>>)
containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>)
nodeQuery: sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>)
resources:
overrides:
@@ -23,10 +23,10 @@ data:
resource: node
namespace:
resource: namespace
pod_name:
pod:
resource: pod
containerLabel: container_name
window: 1m
containerLabel: container
window: 5m
kind: ConfigMap
metadata:
name: adapter-config

View File

@@ -25,7 +25,7 @@ spec:
- --metrics-relist-interval=1m
- --prometheus-url=http://prometheus-k8s.monitoring.svc:9090/
- --secure-port=6443
image: quay.io/coreos/k8s-prometheus-adapter-amd64:v0.4.1
image: quay.io/coreos/k8s-prometheus-adapter-amd64:v0.5.0
name: prometheus-adapter
ports:
- containerPort: 6443

View File

@@ -4,7 +4,7 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.33.0
app.kubernetes.io/version: v0.34.0
name: prometheus-operator
namespace: monitoring
spec:
@@ -15,4 +15,4 @@ spec:
matchLabels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.33.0
app.kubernetes.io/version: v0.34.0

View File

@@ -14,6 +14,7 @@ spec:
baseImage: quay.io/prometheus/prometheus
nodeSelector:
kubernetes.io/os: linux
podMonitorNamespaceSelector: {}
podMonitorSelector: {}
replicas: 2
resources:

View File

@@ -37,12 +37,8 @@ spec:
)
record: instance:node_memory_utilisation:ratio
- expr: |
(
rate(node_vmstat_pgpgin{job="node-exporter"}[1m])
+
rate(node_vmstat_pgpgout{job="node-exporter"}[1m])
)
record: instance:node_memory_swap_io_pages:rate1m
rate(node_vmstat_pgmajfault{job="node-exporter"}[1m])
record: instance:node_vmstat_pgmajfault:rate1m
- expr: |
rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])
record: instance_device:node_disk_io_time_seconds:rate1m
@@ -69,31 +65,77 @@ spec:
rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[1m])
)
record: instance:node_network_transmit_drop_excluding_lo:rate1m
- name: kube-apiserver.rules
rules:
- expr: |
sum(rate(apiserver_request_duration_seconds_sum{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|PROXY|CONNECT"}[5m])) without(instance, pod)
/
sum(rate(apiserver_request_duration_seconds_count{subresource!="log",verb!~"LIST|WATCH|WATCHLIST|PROXY|CONNECT"}[5m])) without(instance, pod)
record: cluster:apiserver_request_duration_seconds:mean5m
- expr: |
histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|PROXY|CONNECT"}[5m])) without(instance, pod))
labels:
quantile: "0.99"
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- expr: |
histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|PROXY|CONNECT"}[5m])) without(instance, pod))
labels:
quantile: "0.9"
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- expr: |
histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",subresource!="log",verb!~"LIST|WATCH|WATCHLIST|PROXY|CONNECT"}[5m])) without(instance, pod))
labels:
quantile: "0.5"
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- name: k8s.rules
rules:
- expr: |
sum(rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container!="POD"}[5m])) by (namespace)
sum(rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])) by (namespace)
record: namespace:container_cpu_usage_seconds_total:sum_rate
- expr: |
sum by (namespace, pod, container) (
rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container!="POD"}[5m])
)
record: namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
sum by (cluster, namespace, pod, container) (
rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])
) * on (cluster, namespace, pod) group_left(node) max by(cluster, namespace, pod, node) (kube_pod_info)
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
- expr: |
sum(container_memory_usage_bytes{job="kubelet", image!="", container!="POD"}) by (namespace)
container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
record: node_namespace_pod_container:container_memory_working_set_bytes
- expr: |
container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
record: node_namespace_pod_container:container_memory_rss
- expr: |
container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
record: node_namespace_pod_container:container_memory_cache
- expr: |
container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
record: node_namespace_pod_container:container_memory_swap
- expr: |
sum(container_memory_usage_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}) by (namespace)
record: namespace:container_memory_usage_bytes:sum
- expr: |
sum by (namespace, label_name) (
sum(kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)) by (namespace, pod)
* on (namespace, pod)
group_left(label_name) kube_pod_labels{job="kube-state-metrics"}
sum by (namespace) (
sum by (namespace, pod) (
max by (namespace, pod, container) (
kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"}
) * on(namespace, pod) group_left() max by (namespace, pod) (
kube_pod_status_phase{phase=~"Pending|Running"} == 1
)
)
)
record: namespace:kube_pod_container_resource_requests_memory_bytes:sum
- expr: |
sum by (namespace, label_name) (
sum(kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)) by (namespace, pod)
* on (namespace, pod)
group_left(label_name) kube_pod_labels{job="kube-state-metrics"}
sum by (namespace) (
sum by (namespace, pod) (
max by (namespace, pod, container) (
kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"}
) * on(namespace, pod) group_left() max by (namespace, pod) (
kube_pod_status_phase{phase=~"Pending|Running"} == 1
)
)
)
record: namespace:kube_pod_container_resource_requests_cpu_cores:sum
- expr: |
@@ -105,7 +147,7 @@ spec:
) * on(replicaset, namespace) group_left(owner_name) kube_replicaset_owner{job="kube-state-metrics"},
"workload", "$1", "owner_name", "(.*)"
)
) by (namespace, workload, pod)
) by (cluster, namespace, workload, pod)
labels:
workload_type: deployment
record: mixin_pod_workload
@@ -115,7 +157,7 @@ spec:
kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"},
"workload", "$1", "owner_name", "(.*)"
)
) by (namespace, workload, pod)
) by (cluster, namespace, workload, pod)
labels:
workload_type: daemonset
record: mixin_pod_workload
@@ -125,7 +167,7 @@ spec:
kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"},
"workload", "$1", "owner_name", "(.*)"
)
) by (namespace, workload, pod)
) by (cluster, namespace, workload, pod)
labels:
workload_type: statefulset
record: mixin_pod_workload
@@ -176,40 +218,32 @@ spec:
labels:
quantile: "0.5"
record: cluster_quantile:scheduler_binding_duration_seconds:histogram_quantile
- name: kube-apiserver.rules
rules:
- expr: |
histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod))
labels:
quantile: "0.99"
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- expr: |
histogram_quantile(0.9, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod))
labels:
quantile: "0.9"
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- expr: |
histogram_quantile(0.5, sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver"}[5m])) without(instance, pod))
labels:
quantile: "0.5"
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- name: node.rules
rules:
- expr: sum(min(kube_pod_info) by (node))
- expr: |
sum(min(kube_pod_info) by (cluster, node))
record: ':kube_pod_info_node_count:'
- expr: |
max(label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")) by (node, namespace, pod)
record: 'node_namespace_pod:kube_pod_info:'
- expr: |
count by (node) (sum by (node, cpu) (
count by (cluster, node) (sum by (node, cpu) (
node_cpu_seconds_total{job="node-exporter"}
* on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:
))
record: node:node_num_cpu:sum
- expr: |
sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
record: :node_memory_MemFreeCachedBuffers_bytes:sum
sum(
node_memory_MemAvailable_bytes{job="node-exporter"} or
(
node_memory_Buffers_bytes{job="node-exporter"} +
node_memory_Cached_bytes{job="node-exporter"} +
node_memory_MemFree_bytes{job="node-exporter"} +
node_memory_Slab_bytes{job="node-exporter"}
)
) by (cluster)
record: :node_memory_MemAvailable_bytes:sum
- name: kube-prometheus-node-recording.rules
rules:
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY
@@ -231,6 +265,12 @@ spec:
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total)
BY (instance, cpu))
record: cluster:node_cpu:ratio
- name: kube-prometheus-general.rules
rules:
- expr: count without(instance, pod, node) (up == 1)
record: count:up1
- expr: count without(instance, pod, node) (up == 0)
record: count:up0
- name: node-exporter
rules:
- alert: NodeFilesystemSpaceFillingUp
@@ -242,7 +282,7 @@ spec:
summary: Filesystem is predicted to run out of space within the next 24 hours.
expr: |
(
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} < 0.4
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 40
and
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0
and
@@ -260,7 +300,7 @@ spec:
summary: Filesystem is predicted to run out of space within the next 4 hours.
expr: |
(
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} < 0.2
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 20
and
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
and
@@ -308,7 +348,7 @@ spec:
summary: Filesystem is predicted to run out of inodes within the next 24 hours.
expr: |
(
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} < 0.4
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 40
and
predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0
and
@@ -326,7 +366,7 @@ spec:
summary: Filesystem is predicted to run out of inodes within the next 4 hours.
expr: |
(
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} < 0.2
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 20
and
predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
and
@@ -387,98 +427,6 @@ spec:
for: 1h
labels:
severity: warning
- name: kubernetes-absent
rules:
- alert: AlertmanagerDown
annotations:
message: Alertmanager has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-alertmanagerdown
expr: |
absent(up{job="alertmanager-main",namespace="monitoring"} == 1)
for: 15m
labels:
severity: critical
- alert: CoreDNSDown
annotations:
message: CoreDNS has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-corednsdown
expr: |
absent(up{job="kube-dns"} == 1)
for: 15m
labels:
severity: critical
- alert: KubeAPIDown
annotations:
message: KubeAPI has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown
expr: |
absent(up{job="apiserver"} == 1)
for: 15m
labels:
severity: critical
- alert: KubeControllerManagerDown
annotations:
message: KubeControllerManager has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown
expr: |
absent(up{job="kube-controller-manager"} == 1)
for: 15m
labels:
severity: critical
- alert: KubeSchedulerDown
annotations:
message: KubeScheduler has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown
expr: |
absent(up{job="kube-scheduler"} == 1)
for: 15m
labels:
severity: critical
- alert: KubeStateMetricsDown
annotations:
message: KubeStateMetrics has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricsdown
expr: |
absent(up{job="kube-state-metrics"} == 1)
for: 15m
labels:
severity: critical
- alert: KubeletDown
annotations:
message: Kubelet has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletdown
expr: |
absent(up{job="kubelet"} == 1)
for: 15m
labels:
severity: critical
- alert: NodeExporterDown
annotations:
message: NodeExporter has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeexporterdown
expr: |
absent(up{job="node-exporter"} == 1)
for: 15m
labels:
severity: critical
- alert: PrometheusDown
annotations:
message: Prometheus has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusdown
expr: |
absent(up{job="prometheus-k8s",namespace="monitoring"} == 1)
for: 15m
labels:
severity: critical
- alert: PrometheusOperatorDown
annotations:
message: PrometheusOperator has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatordown
expr: |
absent(up{job="prometheus-operator",namespace="monitoring"} == 1)
for: 15m
labels:
severity: critical
- name: kubernetes-apps
rules:
- alert: KubePodCrashLooping
@@ -497,7 +445,7 @@ spec:
state for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
expr: |
sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Failed|Pending|Unknown"}) > 0
sum by (namespace, pod) (max by(namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}) * on(namespace, pod) group_left(owner_kind) max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!="Job"})) > 0
for: 15m
labels:
severity: critical
@@ -573,16 +521,26 @@ spec:
severity: critical
- alert: KubeDaemonSetRolloutStuck
annotations:
message: Only {{ $value }}% of the desired Pods of DaemonSet {{ $labels.namespace
}}/{{ $labels.daemonset }} are scheduled and ready.
message: Only {{ $value | humanizePercentage }} of the desired Pods of DaemonSet
{{ $labels.namespace }}/{{ $labels.daemonset }} are scheduled and ready.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck
expr: |
kube_daemonset_status_number_ready{job="kube-state-metrics"}
/
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} * 100 < 100
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} < 1.00
for: 15m
labels:
severity: critical
- alert: KubeContainerWaiting
annotations:
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}}
has been in waiting state for longer than 1 hour.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting
expr: |
sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
for: 1h
labels:
severity: warning
- alert: KubeDaemonSetNotScheduled
annotations:
message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
@@ -630,7 +588,33 @@ spec:
message: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed
expr: |
kube_job_status_failed{job="kube-state-metrics"} > 0
kube_job_failed{job="kube-state-metrics"} > 0
for: 15m
labels:
severity: warning
- alert: KubeHpaReplicasMismatch
annotations:
message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched the
desired number of replicas for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpareplicasmismatch
expr: |
(kube_hpa_status_desired_replicas{job="kube-state-metrics"}
!=
kube_hpa_status_current_replicas{job="kube-state-metrics"})
and
changes(kube_hpa_status_current_replicas[15m]) == 0
for: 15m
labels:
severity: warning
- alert: KubeHpaMaxedOut
annotations:
message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running at
max replicas for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpamaxedout
expr: |
kube_hpa_status_current_replicas{job="kube-state-metrics"}
==
kube_hpa_spec_max_replicas{job="kube-state-metrics"}
for: 15m
labels:
severity: warning
@@ -692,25 +676,28 @@ spec:
severity: warning
- alert: KubeQuotaExceeded
annotations:
message: Namespace {{ $labels.namespace }} is using {{ printf "%0.0f" $value
}}% of its {{ $labels.resource }} quota.
message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
}} of its {{ $labels.resource }} quota.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded
expr: |
100 * kube_resourcequota{job="kube-state-metrics", type="used"}
kube_resourcequota{job="kube-state-metrics", type="used"}
/ ignoring(instance, job, type)
(kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
> 90
> 0.90
for: 15m
labels:
severity: warning
- alert: CPUThrottlingHigh
annotations:
message: '{{ printf "%0.0f" $value }}% throttling of CPU in namespace {{ $labels.namespace
}} for container {{ $labels.container }} in pod {{ $labels.pod }}.'
message: '{{ $value | humanizePercentage }} throttling of CPU in namespace
{{ $labels.namespace }} for container {{ $labels.container }} in pod {{
$labels.pod }}.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
expr: "100 * sum(increase(container_cpu_cfs_throttled_periods_total{container!=\"\",
}[5m])) by (container, pod, namespace)\n /\nsum(increase(container_cpu_cfs_periods_total{}[5m]))
by (container, pod, namespace)\n > 25 \n"
expr: |
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container, pod, namespace)
/
sum(increase(container_cpu_cfs_periods_total{}[5m])) by (container, pod, namespace)
> ( 25 / 100 )
for: 15m
labels:
severity: warning
@@ -719,14 +706,14 @@ spec:
- alert: KubePersistentVolumeUsageCritical
annotations:
message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
}} in Namespace {{ $labels.namespace }} is only {{ printf "%0.2f" $value
}}% free.
}} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage
}} free.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeusagecritical
expr: |
100 * kubelet_volume_stats_available_bytes{job="kubelet"}
kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}
/
kubelet_volume_stats_capacity_bytes{job="kubelet"}
< 3
kubelet_volume_stats_capacity_bytes{job="kubelet", metrics_path="/metrics"}
< 0.03
for: 1m
labels:
severity: critical
@@ -734,17 +721,17 @@ spec:
annotations:
message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
}} in Namespace {{ $labels.namespace }} is expected to fill up within four
days. Currently {{ printf "%0.2f" $value }}% is available.
days. Currently {{ $value | humanizePercentage }} is available.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefullinfourdays
expr: |
100 * (
kubelet_volume_stats_available_bytes{job="kubelet"}
(
kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}
/
kubelet_volume_stats_capacity_bytes{job="kubelet"}
) < 15
kubelet_volume_stats_capacity_bytes{job="kubelet", metrics_path="/metrics"}
) < 0.15
and
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet"}[6h], 4 * 24 * 3600) < 0
for: 5m
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
for: 1h
labels:
severity: critical
- alert: KubePersistentVolumeErrors
@@ -759,15 +746,6 @@ spec:
severity: critical
- name: kubernetes-system
rules:
- alert: KubeNodeNotReady
annotations:
message: '{{ $labels.node }} has been unready for more than an hour.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready
expr: |
kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
for: 15m
labels:
severity: warning
- alert: KubeVersionMismatch
annotations:
message: There are {{ $value }} different semantic versions of Kubernetes
@@ -781,34 +759,190 @@ spec:
- alert: KubeClientErrors
annotations:
message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
}}' is experiencing {{ printf "%0.0f" $value }}% errors.'
}}' is experiencing {{ $value | humanizePercentage }} errors.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
expr: |
(sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
/
sum(rate(rest_client_requests_total[5m])) by (instance, job))
* 100 > 1
> 0.01
for: 15m
labels:
severity: warning
- alert: KubeClientErrors
- name: kube-apiserver-error
rules:
- alert: ErrorBudgetBurn
annotations:
message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
}}' is experiencing {{ printf "%0.0f" $value }} errors / second.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-errorbudgetburn
expr: |
sum(rate(ksm_scrape_error_total{job="kube-state-metrics"}[5m])) by (instance, job) > 0.1
for: 15m
(
status_class_5xx:apiserver_request_total:ratio_rate1h{job="apiserver"} > (14.4*0.010000)
and
status_class_5xx:apiserver_request_total:ratio_rate5m{job="apiserver"} > (14.4*0.010000)
)
or
(
status_class_5xx:apiserver_request_total:ratio_rate6h{job="apiserver"} > (6*0.010000)
and
status_class_5xx:apiserver_request_total:ratio_rate30m{job="apiserver"} > (6*0.010000)
)
labels:
severity: warning
- alert: KubeletTooManyPods
job: apiserver
severity: critical
- alert: ErrorBudgetBurn
annotations:
message: Kubelet {{ $labels.instance }} is running {{ $value }} Pods, close
to the limit of 110.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-errorbudgetburn
expr: |
kubelet_running_pod_count{job="kubelet"} > 110 * 0.9
for: 15m
(
status_class_5xx:apiserver_request_total:ratio_rate1d{job="apiserver"} > (3*0.010000)
and
status_class_5xx:apiserver_request_total:ratio_rate2h{job="apiserver"} > (3*0.010000)
)
or
(
status_class_5xx:apiserver_request_total:ratio_rate3d{job="apiserver"} > (0.010000)
and
status_class_5xx:apiserver_request_total:ratio_rate6h{job="apiserver"} > (0.010000)
)
labels:
job: apiserver
severity: warning
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[5m]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate5m
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[30m]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate30m
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[1h]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate1h
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[2h]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate2h
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[6h]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate6h
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[1d]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate1d
- expr: |
sum by (status_class) (
label_replace(
rate(apiserver_request_total{job="apiserver"}[3d]
), "status_class", "${1}xx", "code", "([0-9])..")
)
labels:
job: apiserver
record: status_class:apiserver_request_total:rate3d
- expr: |
sum(status_class:apiserver_request_total:rate5m{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate5m{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate5m
- expr: |
sum(status_class:apiserver_request_total:rate30m{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate30m{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate30m
- expr: |
sum(status_class:apiserver_request_total:rate1h{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate1h{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate1h
- expr: |
sum(status_class:apiserver_request_total:rate2h{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate2h{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate2h
- expr: |
sum(status_class:apiserver_request_total:rate6h{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate6h{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate6h
- expr: |
sum(status_class:apiserver_request_total:rate1d{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate1d{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate1d
- expr: |
sum(status_class:apiserver_request_total:rate3d{job="apiserver",status_class="5xx"})
/
sum(status_class:apiserver_request_total:rate3d{job="apiserver"})
labels:
job: apiserver
record: status_class_5xx:apiserver_request_total:ratio_rate3d
- name: kubernetes-system-apiserver
rules:
- alert: KubeAPILatencyHigh
annotations:
message: The API server has an abnormal latency of {{ $value }} seconds for
{{ $labels.verb }} {{ $labels.resource }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
expr: |
(
cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"}
>
on (verb) group_left()
(
avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
+
2*stddev by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
)
) > on (verb) group_left()
1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
and on (verb,resource)
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"}
>
1
for: 5m
labels:
severity: warning
- alert: KubeAPILatencyHigh
@@ -817,63 +951,57 @@ spec:
for {{ $labels.verb }} {{ $labels.resource }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
expr: |
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 1
for: 10m
labels:
severity: warning
- alert: KubeAPILatencyHigh
annotations:
message: The API server has a 99th percentile latency of {{ $value }} seconds
for {{ $labels.verb }} {{ $labels.resource }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
expr: |
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99",subresource!="log",verb!~"^(?:LIST|WATCH|WATCHLIST|PROXY|CONNECT)$"} > 4
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"} > 4
for: 10m
labels:
severity: critical
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{ $value }}% of requests.
message: API server is returning errors for {{ $value | humanizePercentage
}} of requests.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: |
sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m]))
sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m]))
/
sum(rate(apiserver_request_total{job="apiserver"}[5m])) * 100 > 3
sum(rate(apiserver_request_total{job="apiserver"}[5m])) > 0.03
for: 10m
labels:
severity: critical
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{ $value }}% of requests.
message: API server is returning errors for {{ $value | humanizePercentage
}} of requests.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: |
sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m]))
sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m]))
/
sum(rate(apiserver_request_total{job="apiserver"}[5m])) * 100 > 1
sum(rate(apiserver_request_total{job="apiserver"}[5m])) > 0.01
for: 10m
labels:
severity: warning
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{ $value }}% of requests for
{{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}.
message: API server is returning errors for {{ $value | humanizePercentage
}} of requests for {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource
}}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: |
sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m])) by (resource,subresource,verb)
/
sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 10
sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) > 0.10
for: 10m
labels:
severity: critical
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{ $value }}% of requests for
{{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}.
message: API server is returning errors for {{ $value | humanizePercentage
}} of requests for {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource
}}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: |
sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m])) by (resource,subresource,verb)
/
sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 5
sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) > 0.05
for: 10m
labels:
severity: warning
@@ -895,6 +1023,75 @@ spec:
apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
labels:
severity: critical
- alert: KubeAPIDown
annotations:
message: KubeAPI has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapidown
expr: |
absent(up{job="apiserver"} == 1)
for: 15m
labels:
severity: critical
- name: kubernetes-system-kubelet
rules:
- alert: KubeNodeNotReady
annotations:
message: '{{ $labels.node }} has been unready for more than 15 minutes.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready
expr: |
kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
for: 15m
labels:
severity: warning
- alert: KubeNodeUnreachable
annotations:
message: '{{ $labels.node }} is unreachable and some workloads may be rescheduled.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable
expr: |
kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} == 1
labels:
severity: warning
- alert: KubeletTooManyPods
annotations:
message: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage
}} of its Pod capacity.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
expr: |
max(max(kubelet_running_pod_count{job="kubelet", metrics_path="/metrics"}) by(instance) * on(instance) group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}) by(node) / max(kube_node_status_capacity_pods{job="kube-state-metrics"}) by(node) > 0.95
for: 15m
labels:
severity: warning
- alert: KubeletDown
annotations:
message: Kubelet has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletdown
expr: |
absent(up{job="kubelet", metrics_path="/metrics"} == 1)
for: 15m
labels:
severity: critical
- name: kubernetes-system-scheduler
rules:
- alert: KubeSchedulerDown
annotations:
message: KubeScheduler has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeschedulerdown
expr: |
absent(up{job="kube-scheduler"} == 1)
for: 15m
labels:
severity: critical
- name: kubernetes-system-controller-manager
rules:
- alert: KubeControllerManagerDown
annotations:
message: KubeControllerManager has disappeared from Prometheus target discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown
expr: |
absent(up{job="kube-controller-manager"} == 1)
for: 15m
labels:
severity: critical
- name: prometheus
rules:
- alert: PrometheusBadConfig
@@ -991,17 +1188,6 @@ spec:
for: 4h
labels:
severity: warning
- alert: PrometheusTSDBWALCorruptions
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected
{{$value | humanize}} corruptions of the write-ahead log (WAL) over the
last 3h.
summary: Prometheus is detecting WAL corruptions.
expr: |
increase(tsdb_wal_corruptions_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
for: 4h
labels:
severity: warning
- alert: PrometheusNotIngestingSamples
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting
@@ -1015,7 +1201,8 @@ spec:
- alert: PrometheusDuplicateTimestamps
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping
{{$value | humanize}} samples/s with different values but duplicated timestamp.
{{ printf "%.4g" $value }} samples/s with different values but duplicated
timestamp.
summary: Prometheus is dropping samples with duplicate timestamps.
expr: |
rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
@@ -1025,7 +1212,7 @@ spec:
- alert: PrometheusOutOfOrderTimestamps
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping
{{$value | humanize}} samples/s with timestamps arriving out of order.
{{ printf "%.4g" $value }} samples/s with timestamps arriving out of order.
summary: Prometheus drops samples with out-of-order timestamps.
expr: |
rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
@@ -1069,6 +1256,25 @@ spec:
for: 15m
labels:
severity: critical
- alert: PrometheusRemoteWriteDesiredShards
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
desired shards calculation wants to run {{ $value }} shards, which is more
than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}`
$labels.instance | query | first | value }}.
summary: Prometheus remote write desired shards calculation wants to run more
than configured max shards.
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
(
max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-k8s",namespace="monitoring"}[5m])
>
max_over_time(prometheus_remote_storage_shards_max{job="prometheus-k8s",namespace="monitoring"}[5m])
)
for: 15m
labels:
severity: warning
- alert: PrometheusRuleFailures
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to
@@ -1123,7 +1329,8 @@ spec:
rules:
- alert: TargetDown
annotations:
message: '{{ $value }}% of the {{ $labels.job }} targets are down.'
message: '{{ printf "%.4g" $value }}% of the {{ $labels.job }} targets in
{{ $labels.namespace }} namespace are down.'
expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job,
namespace, service)) > 10
for: 10m

View File

@@ -10,6 +10,38 @@ spec:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 30s
metricRelabelings:
- action: drop
regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
sourceLabels:
- __name__
- action: drop
regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
sourceLabels:
- __name__
- action: drop
regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
sourceLabels:
- __name__
- action: drop
regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
sourceLabels:
- __name__
- action: drop
regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
sourceLabels:
- __name__
- action: drop
regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
sourceLabels:
- __name__
- action: drop
regex: transformation_(transformation_latencies_microseconds|failures_total)
sourceLabels:
- __name__
- action: drop
regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
sourceLabels:
- __name__
- action: drop
regex: etcd_(debugging|disk|request|server).*
sourceLabels:
@@ -22,6 +54,11 @@ spec:
regex: apiserver_admission_step_admission_latencies_seconds_.*
sourceLabels:
- __name__
- action: drop
regex: apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)
sourceLabels:
- __name__
- le
port: https
scheme: https
tlsConfig:

View File

@@ -9,6 +9,38 @@ spec:
endpoints:
- interval: 30s
metricRelabelings:
- action: drop
regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
sourceLabels:
- __name__
- action: drop
regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
sourceLabels:
- __name__
- action: drop
regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
sourceLabels:
- __name__
- action: drop
regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
sourceLabels:
- __name__
- action: drop
regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
sourceLabels:
- __name__
- action: drop
regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
sourceLabels:
- __name__
- action: drop
regex: transformation_(transformation_latencies_microseconds|failures_total)
sourceLabels:
- __name__
- action: drop
regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
sourceLabels:
- __name__
- action: drop
regex: etcd_(debugging|disk|request|server).*
sourceLabels:

View File

@@ -10,7 +10,44 @@ spec:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
honorLabels: true
interval: 30s
metricRelabelings:
- action: drop
regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
sourceLabels:
- __name__
- action: drop
regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
sourceLabels:
- __name__
- action: drop
regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
sourceLabels:
- __name__
- action: drop
regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
sourceLabels:
- __name__
- action: drop
regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
sourceLabels:
- __name__
- action: drop
regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
sourceLabels:
- __name__
- action: drop
regex: transformation_(transformation_latencies_microseconds|failures_total)
sourceLabels:
- __name__
- action: drop
regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
sourceLabels:
- __name__
port: https-metrics
relabelings:
- sourceLabels:
- __metrics_path__
targetLabel: metrics_path
scheme: https
tlsConfig:
insecureSkipVerify: true
@@ -24,6 +61,10 @@ spec:
- __name__
path: /metrics/cadvisor
port: https-metrics
relabelings:
- sourceLabels:
- __metrics_path__
targetLabel: metrics_path
scheme: https
tlsConfig:
insecureSkipVerify: true

View File

@@ -15,16 +15,16 @@ spec:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
spec:
description: 'AlertmanagerSpec is a specification of the desired behavior
of the Alertmanager cluster. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status'
of the Alertmanager cluster. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#spec-and-status'
properties:
additionalPeers:
description: AdditionalPeers allows injecting a set of additional Alertmanagers
@@ -626,6 +626,12 @@ spec:
items:
type: string
type: array
configSecret:
description: ConfigSecret is the name of a Kubernetes Secret in the
same namespace as the Alertmanager object, which contains configuration
for this Alertmanager instance. Defaults to 'alertmanager-<alertmanager-name>'
The secret is mounted into /etc/alertmanager/config.
type: string
containers:
description: Containers allows injecting additional containers. This
is meant to allow adding an authentication proxy to an Alertmanager
@@ -1051,7 +1057,8 @@ spec:
successThreshold:
description: Minimum consecutive successes for the probe to
be considered successful after having failed. Defaults to
1. Must be 1 for liveness. Minimum value is 1.
1. Must be 1 for liveness and startup. Minimum value is
1.
format: int32
type: integer
tcpSocket:
@@ -1203,7 +1210,8 @@ spec:
successThreshold:
description: Minimum consecutive successes for the probe to
be considered successful after having failed. Defaults to
1. Must be 1 for liveness. Minimum value is 1.
1. Must be 1 for liveness and startup. Minimum value is
1.
format: int32
type: integer
tcpSocket:
@@ -1352,8 +1360,125 @@ spec:
and is only honored by servers that enable the WindowsGMSA
feature flag.
type: string
runAsUserName:
description: The UserName in Windows to run the entrypoint
of the container process. Defaults to the user specified
in image metadata if unspecified. May also be set in
PodSecurityContext. If set in both SecurityContext and
PodSecurityContext, the value specified in SecurityContext
takes precedence. This field is alpha-level and it is
only honored by servers that enable the WindowsRunAsUserName
feature flag.
type: string
type: object
type: object
startupProbe:
description: Probe describes a health check to be performed against
a container to determine whether it is alive or ready to receive
traffic.
properties:
exec:
description: ExecAction describes a "run in container" action.
properties:
command:
description: Command is the command line to execute inside
the container, the working directory for the command is
root ('/') in the container's filesystem. The command
is simply exec'd, it is not run inside a shell, so traditional
shell instructions ('|', etc) won't work. To use a shell,
you need to explicitly call out to that shell. Exit
status of 0 is treated as live/healthy and non-zero
is unhealthy.
items:
type: string
type: array
type: object
failureThreshold:
description: Minimum consecutive failures for the probe to
be considered failed after having succeeded. Defaults to
3. Minimum value is 1.
format: int32
type: integer
httpGet:
description: HTTPGetAction describes an action based on HTTP
Get requests.
properties:
host:
description: Host name to connect to, defaults to the
pod IP. You probably want to set "Host" in httpHeaders
instead.
type: string
httpHeaders:
description: Custom headers to set in the request. HTTP
allows repeated headers.
items:
description: HTTPHeader describes a custom header to
be used in HTTP probes
properties:
name:
description: The header field name
type: string
value:
description: The header field value
type: string
required:
- name
- value
type: object
type: array
path:
description: Path to access on the HTTP server.
type: string
port:
anyOf:
- type: string
- type: integer
scheme:
description: Scheme to use for connecting to the host.
Defaults to HTTP.
type: string
required:
- port
type: object
initialDelaySeconds:
description: 'Number of seconds after the container has started
before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
format: int32
type: integer
periodSeconds:
description: How often (in seconds) to perform the probe.
Default to 10 seconds. Minimum value is 1.
format: int32
type: integer
successThreshold:
description: Minimum consecutive successes for the probe to
be considered successful after having failed. Defaults to
1. Must be 1 for liveness and startup. Minimum value is
1.
format: int32
type: integer
tcpSocket:
description: TCPSocketAction describes an action based on
opening a socket
properties:
host:
description: 'Optional: Host name to connect to, defaults
to the pod IP.'
type: string
port:
anyOf:
- type: string
- type: integer
required:
- port
type: object
timeoutSeconds:
description: 'Number of seconds after which the probe times
out. Defaults to 1 second. Minimum value is 1. More info:
https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
format: int32
type: integer
type: object
stdin:
description: Whether this container should allocate a buffer for
stdin in the container runtime. If this is not set, reads from
@@ -1918,7 +2043,8 @@ spec:
successThreshold:
description: Minimum consecutive successes for the probe to
be considered successful after having failed. Defaults to
1. Must be 1 for liveness. Minimum value is 1.
1. Must be 1 for liveness and startup. Minimum value is
1.
format: int32
type: integer
tcpSocket:
@@ -2070,7 +2196,8 @@ spec:
successThreshold:
description: Minimum consecutive successes for the probe to
be considered successful after having failed. Defaults to
1. Must be 1 for liveness. Minimum value is 1.
1. Must be 1 for liveness and startup. Minimum value is
1.
format: int32
type: integer
tcpSocket:
@@ -2219,8 +2346,125 @@ spec:
and is only honored by servers that enable the WindowsGMSA
feature flag.
type: string
runAsUserName:
description: The UserName in Windows to run the entrypoint
of the container process. Defaults to the user specified
in image metadata if unspecified. May also be set in
PodSecurityContext. If set in both SecurityContext and
PodSecurityContext, the value specified in SecurityContext
takes precedence. This field is alpha-level and it is
only honored by servers that enable the WindowsRunAsUserName
feature flag.
type: string
type: object
type: object
startupProbe:
description: Probe describes a health check to be performed against
a container to determine whether it is alive or ready to receive
traffic.
properties:
exec:
description: ExecAction describes a "run in container" action.
properties:
command:
description: Command is the command line to execute inside
the container, the working directory for the command is
root ('/') in the container's filesystem. The command
is simply exec'd, it is not run inside a shell, so traditional
shell instructions ('|', etc) won't work. To use a shell,
you need to explicitly call out to that shell. Exit
status of 0 is treated as live/healthy and non-zero
is unhealthy.
items:
type: string
type: array
type: object
failureThreshold:
description: Minimum consecutive failures for the probe to
be considered failed after having succeeded. Defaults to
3. Minimum value is 1.
format: int32
type: integer
httpGet:
description: HTTPGetAction describes an action based on HTTP
Get requests.
properties:
host:
description: Host name to connect to, defaults to the
pod IP. You probably want to set "Host" in httpHeaders
instead.
type: string
httpHeaders:
description: Custom headers to set in the request. HTTP
allows repeated headers.
items:
description: HTTPHeader describes a custom header to
be used in HTTP probes
properties:
name:
description: The header field name
type: string
value:
description: The header field value
type: string
required:
- name
- value
type: object
type: array
path:
description: Path to access on the HTTP server.
type: string
port:
anyOf:
- type: string
- type: integer
scheme:
description: Scheme to use for connecting to the host.
Defaults to HTTP.
type: string
required:
- port
type: object
initialDelaySeconds:
description: 'Number of seconds after the container has started
before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
format: int32
type: integer
periodSeconds:
description: How often (in seconds) to perform the probe.
Default to 10 seconds. Minimum value is 1.
format: int32
type: integer
successThreshold:
description: Minimum consecutive successes for the probe to
be considered successful after having failed. Defaults to
1. Must be 1 for liveness and startup. Minimum value is
1.
format: int32
type: integer
tcpSocket:
description: TCPSocketAction describes an action based on
opening a socket
properties:
host:
description: 'Optional: Host name to connect to, defaults
to the pod IP.'
type: string
port:
anyOf:
- type: string
- type: integer
required:
- port
type: object
timeoutSeconds:
description: 'Number of seconds after which the probe times
out. Defaults to 1 second. Minimum value is 1. More info:
https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes'
format: int32
type: integer
type: object
stdin:
description: Whether this container should allocate a buffer for
stdin in the container runtime. If this is not set, reads from
@@ -2397,179 +2641,13 @@ spec:
If this field is specified and the generated name exists, the server will NOT return a 409 - instead, it will either return 201 Created or 500 with Reason ServerTimeout indicating a unique name could not be found in the time allotted, and the client should retry (optionally after the time indicated in the Retry-After header).
Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#idempotency
Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#idempotency
type: string
generation:
description: A sequence number representing a specific generation
of the desired state. Populated by the system. Read-only.
format: int64
type: integer
initializers:
description: Initializers tracks the progress of initialization.
properties:
pending:
description: Pending is a list of initializers that must execute
in order before this object is visible. When the last pending
initializer is removed, and no failing result is set, the
initializers struct will be set to nil and the object is considered
as initialized and visible to all clients.
items:
description: Initializer is information about an initializer
that has not yet completed.
properties:
name:
description: name of the process that is responsible for
initializing this object.
type: string
required:
- name
type: object
type: array
result:
description: Status is a return value for calls that don't return
other objects.
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of
this representation of an object. Servers should convert
recognized schemas to the latest internal value, and may
reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
type: string
code:
description: Suggested HTTP return code for this status,
0 if not set.
format: int32
type: integer
details:
description: StatusDetails is a set of additional properties
that MAY be set by the server to provide additional information
about a response. The Reason field of a Status object
defines what attributes will be set. Clients must ignore
fields that do not match the defined type of each attribute,
and should assume that any attribute may be empty, invalid,
or under defined.
properties:
causes:
description: The Causes array includes more details
associated with the StatusReason failure. Not all
StatusReasons may provide detailed causes.
items:
description: StatusCause provides more information
about an api.Status failure, including cases when
multiple errors are encountered.
properties:
field:
description: |-
The field of the resource that has caused this error, as named by its JSON serialization. May include dot and postfix notation for nested attributes. Arrays are zero-indexed. Fields may appear more than once in an array of causes due to fields having multiple errors. Optional.
Examples:
"name" - the field "name" on the current resource
"items[0].name" - the field "name" on the first array entry in "items"
type: string
message:
description: A human-readable description of the
cause of the error. This field may be presented
as-is to a reader.
type: string
reason:
description: A machine-readable description of
the cause of the error. If this value is empty
there is no information available.
type: string
type: object
type: array
group:
description: The group attribute of the resource associated
with the status StatusReason.
type: string
kind:
description: 'The kind attribute of the resource associated
with the status StatusReason. On some operations may
differ from the requested resource Kind. More info:
https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
type: string
name:
description: The name attribute of the resource associated
with the status StatusReason (when there is a single
name which can be described).
type: string
retryAfterSeconds:
description: If specified, the time in seconds before
the operation should be retried. Some errors may indicate
the client must take an alternate action - for those
errors this field may indicate how long to wait before
taking the alternate action.
format: int32
type: integer
uid:
description: 'UID of the resource. (when there is a
single resource which can be described). More info:
http://kubernetes.io/docs/user-guide/identifiers#uids'
type: string
type: object
kind:
description: 'Kind is a string value representing the REST
resource this object represents. Servers may infer this
from the endpoint the client submits requests to. Cannot
be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
type: string
message:
description: A human-readable description of the status
of this operation.
type: string
metadata:
description: ListMeta describes metadata that synthetic
resources must have, including lists and various status
objects. A resource may have only one of {ObjectMeta,
ListMeta}.
properties:
continue:
description: continue may be set if the user set a limit
on the number of items returned, and indicates that
the server has more data available. The value is opaque
and may be used to issue another request to the endpoint
that served this list to retrieve the next set of
available objects. Continuing a consistent list may
not be possible if the server configuration has changed
or more than a few minutes have passed. The resourceVersion
field returned when using this continue value will
be identical to the value in the first response, unless
you have received this token from an error message.
type: string
remainingItemCount:
description: |-
remainingItemCount is the number of subsequent items in the list which are not included in this list response. If the list request contained label or field selectors, then the number of remaining items is unknown and the field will be left unset and omitted during serialization. If the list is complete (either because it is not chunking or because this is the last chunk), then there are no more remaining items and this field will be left unset and omitted during serialization. Servers older than v1.15 do not set this field. The intended use of the remainingItemCount is *estimating* the size of a collection. Clients should not rely on the remainingItemCount to be set or to be exact.
This field is alpha and can be changed or removed without notice.
format: int64
type: integer
resourceVersion:
description: 'String that identifies the server''s internal
version of this object that can be used by clients
to determine when objects have changed. Value must
be treated as opaque by clients and passed unmodified
back to the server. Populated by the system. Read-only.
More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency'
type: string
selfLink:
description: selfLink is a URL representing this object.
Populated by the system. Read-only.
type: string
type: object
reason:
description: A machine-readable description of why this
operation is in the "Failure" status. If this value is
empty there is no information available. A Reason clarifies
an HTTP status code but does not override it.
type: string
status:
description: 'Status of the operation. One of: "Success"
or "Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status'
type: string
type: object
required:
- pending
type: object
labels:
description: 'Map of string keys and values that can be used to
organize and categorize (scope and select) objects. May match
@@ -2577,10 +2655,13 @@ spec:
http://kubernetes.io/docs/user-guide/labels'
type: object
managedFields:
description: |-
ManagedFields maps workflow-id and version to the set of fields that are managed by that workflow. This is mostly for internal housekeeping, and users typically shouldn't need to set or understand this field. A workflow can be the user's name, a controller's name, or the name of a specific apply path like "ci-cd". The set of fields is always in the version that the workflow used when modifying the object.
This field is alpha and can be changed or removed without notice.
description: ManagedFields maps workflow-id and version to the set
of fields that are managed by that workflow. This is mostly for
internal housekeeping, and users typically shouldn't need to set
or understand this field. A workflow can be the user's name, a
controller's name, or the name of a specific apply path like "ci-cd".
The set of fields is always in the version that the workflow used
when modifying the object.
items:
description: ManagedFieldsEntry is a workflow-id, a FieldSet and
the group version of the resource that the fieldset applies
@@ -2593,9 +2674,18 @@ spec:
to track the version of a field set because it cannot be
automatically converted.
type: string
fields:
description: 'Fields stores a set of fields in a data structure
like a Trie. To understand how this is used, see: https://github.com/kubernetes-sigs/structured-merge-diff'
fieldsType:
description: 'FieldsType is the discriminator for the different
fields format and version. There is currently only one possible
value: "FieldsV1"'
type: string
fieldsV1:
description: |-
FieldsV1 stores a set of fields in a data structure like a Trie, in JSON format.
Each key is either a '.' representing the field itself, and will always map to an empty set, or a string representing a sub-field or item. The string will follow one of these four formats: 'f:<name>', where <name> is the name of a field in a struct, or key in a map 'v:<value>', where <value> is the exact json formatted value of a list item 'i:<index>', where <index> is position of a item in a list 'k:<keys>', where <keys> is a map of a list item's key fields to their unique values If a key maps to an empty Fields value, the field that key represents is part of the set.
The exact format is defined in sigs.k8s.io/structured-merge-diff
type: object
manager:
description: Manager is an identifier of the workflow managing
@@ -2654,7 +2744,7 @@ spec:
controller.
type: boolean
kind:
description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
name:
description: 'Name of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#names'
@@ -2673,11 +2763,13 @@ spec:
description: |-
An opaque value that represents the internal version of this object that can be used by clients to determine when objects have changed. May be used for optimistic concurrency, change detection, and the watch operation on a resource or set of resources. Clients must treat these values as opaque and passed unmodified back to the server. They may only be valid for a particular resource or set of resources.
Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency
Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
type: string
selfLink:
description: SelfLink is a URL representing this object. Populated
by the system. Read-only.
description: |-
SelfLink is a URL representing this object. Populated by the system. Read-only.
DEPRECATED Kubernetes will stop propagating this field in 1.20 release and the field is planned to be removed in 1.21 release.
type: string
uid:
description: |-
@@ -2835,6 +2927,15 @@ spec:
credential spec to use. This field is alpha-level and is only
honored by servers that enable the WindowsGMSA feature flag.
type: string
runAsUserName:
description: The UserName in Windows to run the entrypoint of
the container process. Defaults to the user specified in image
metadata if unspecified. May also be set in PodSecurityContext.
If set in both SecurityContext and PodSecurityContext, the
value specified in SecurityContext takes precedence. This
field is alpha-level and it is only honored by servers that
enable the WindowsRunAsUserName feature flag.
type: string
type: object
type: object
serviceAccountName:
@@ -2872,13 +2973,13 @@ spec:
description: 'APIVersion defines the versioned schema of this
representation of an object. Servers should convert recognized
schemas to the latest internal value, and may reject unrecognized
values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource
this object represents. Servers may infer this from the endpoint
the client submits requests to. Cannot be updated. In CamelCase.
More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
description: ObjectMeta is metadata that all persisted resources
@@ -2934,190 +3035,13 @@ spec:
If this field is specified and the generated name exists, the server will NOT return a 409 - instead, it will either return 201 Created or 500 with Reason ServerTimeout indicating a unique name could not be found in the time allotted, and the client should retry (optionally after the time indicated in the Retry-After header).
Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#idempotency
Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#idempotency
type: string
generation:
description: A sequence number representing a specific generation
of the desired state. Populated by the system. Read-only.
format: int64
type: integer
initializers:
description: Initializers tracks the progress of initialization.
properties:
pending:
description: Pending is a list of initializers that
must execute in order before this object is visible.
When the last pending initializer is removed, and
no failing result is set, the initializers struct
will be set to nil and the object is considered as
initialized and visible to all clients.
items:
description: Initializer is information about an initializer
that has not yet completed.
properties:
name:
description: name of the process that is responsible
for initializing this object.
type: string
required:
- name
type: object
type: array
result:
description: Status is a return value for calls that
don't return other objects.
properties:
apiVersion:
description: 'APIVersion defines the versioned schema
of this representation of an object. Servers should
convert recognized schemas to the latest internal
value, and may reject unrecognized values. More
info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
type: string
code:
description: Suggested HTTP return code for this
status, 0 if not set.
format: int32
type: integer
details:
description: StatusDetails is a set of additional
properties that MAY be set by the server to provide
additional information about a response. The Reason
field of a Status object defines what attributes
will be set. Clients must ignore fields that do
not match the defined type of each attribute,
and should assume that any attribute may be empty,
invalid, or under defined.
properties:
causes:
description: The Causes array includes more
details associated with the StatusReason failure.
Not all StatusReasons may provide detailed
causes.
items:
description: StatusCause provides more information
about an api.Status failure, including cases
when multiple errors are encountered.
properties:
field:
description: |-
The field of the resource that has caused this error, as named by its JSON serialization. May include dot and postfix notation for nested attributes. Arrays are zero-indexed. Fields may appear more than once in an array of causes due to fields having multiple errors. Optional.
Examples:
"name" - the field "name" on the current resource
"items[0].name" - the field "name" on the first array entry in "items"
type: string
message:
description: A human-readable description
of the cause of the error. This field
may be presented as-is to a reader.
type: string
reason:
description: A machine-readable description
of the cause of the error. If this value
is empty there is no information available.
type: string
type: object
type: array
group:
description: The group attribute of the resource
associated with the status StatusReason.
type: string
kind:
description: 'The kind attribute of the resource
associated with the status StatusReason. On
some operations may differ from the requested
resource Kind. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
type: string
name:
description: The name attribute of the resource
associated with the status StatusReason (when
there is a single name which can be described).
type: string
retryAfterSeconds:
description: If specified, the time in seconds
before the operation should be retried. Some
errors may indicate the client must take an
alternate action - for those errors this field
may indicate how long to wait before taking
the alternate action.
format: int32
type: integer
uid:
description: 'UID of the resource. (when there
is a single resource which can be described).
More info: http://kubernetes.io/docs/user-guide/identifiers#uids'
type: string
type: object
kind:
description: 'Kind is a string value representing
the REST resource this object represents. Servers
may infer this from the endpoint the client submits
requests to. Cannot be updated. In CamelCase.
More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
type: string
message:
description: A human-readable description of the
status of this operation.
type: string
metadata:
description: ListMeta describes metadata that synthetic
resources must have, including lists and various
status objects. A resource may have only one of
{ObjectMeta, ListMeta}.
properties:
continue:
description: continue may be set if the user
set a limit on the number of items returned,
and indicates that the server has more data
available. The value is opaque and may be
used to issue another request to the endpoint
that served this list to retrieve the next
set of available objects. Continuing a consistent
list may not be possible if the server configuration
has changed or more than a few minutes have
passed. The resourceVersion field returned
when using this continue value will be identical
to the value in the first response, unless
you have received this token from an error
message.
type: string
remainingItemCount:
description: |-
remainingItemCount is the number of subsequent items in the list which are not included in this list response. If the list request contained label or field selectors, then the number of remaining items is unknown and the field will be left unset and omitted during serialization. If the list is complete (either because it is not chunking or because this is the last chunk), then there are no more remaining items and this field will be left unset and omitted during serialization. Servers older than v1.15 do not set this field. The intended use of the remainingItemCount is *estimating* the size of a collection. Clients should not rely on the remainingItemCount to be set or to be exact.
This field is alpha and can be changed or removed without notice.
format: int64
type: integer
resourceVersion:
description: 'String that identifies the server''s
internal version of this object that can be
used by clients to determine when objects
have changed. Value must be treated as opaque
by clients and passed unmodified back to the
server. Populated by the system. Read-only.
More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency'
type: string
selfLink:
description: selfLink is a URL representing
this object. Populated by the system. Read-only.
type: string
type: object
reason:
description: A machine-readable description of why
this operation is in the "Failure" status. If
this value is empty there is no information available.
A Reason clarifies an HTTP status code but does
not override it.
type: string
status:
description: 'Status of the operation. One of: "Success"
or "Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status'
type: string
type: object
required:
- pending
type: object
labels:
description: 'Map of string keys and values that can be
used to organize and categorize (scope and select) objects.
@@ -3125,10 +3049,14 @@ spec:
More info: http://kubernetes.io/docs/user-guide/labels'
type: object
managedFields:
description: |-
ManagedFields maps workflow-id and version to the set of fields that are managed by that workflow. This is mostly for internal housekeeping, and users typically shouldn't need to set or understand this field. A workflow can be the user's name, a controller's name, or the name of a specific apply path like "ci-cd". The set of fields is always in the version that the workflow used when modifying the object.
This field is alpha and can be changed or removed without notice.
description: ManagedFields maps workflow-id and version
to the set of fields that are managed by that workflow.
This is mostly for internal housekeeping, and users typically
shouldn't need to set or understand this field. A workflow
can be the user's name, a controller's name, or the name
of a specific apply path like "ci-cd". The set of fields
is always in the version that the workflow used when modifying
the object.
items:
description: ManagedFieldsEntry is a workflow-id, a FieldSet
and the group version of the resource that the fieldset
@@ -3141,10 +3069,18 @@ spec:
field. It is necessary to track the version of a
field set because it cannot be automatically converted.
type: string
fields:
description: 'Fields stores a set of fields in a data
structure like a Trie. To understand how this is
used, see: https://github.com/kubernetes-sigs/structured-merge-diff'
fieldsType:
description: 'FieldsType is the discriminator for
the different fields format and version. There is
currently only one possible value: "FieldsV1"'
type: string
fieldsV1:
description: |-
FieldsV1 stores a set of fields in a data structure like a Trie, in JSON format.
Each key is either a '.' representing the field itself, and will always map to an empty set, or a string representing a sub-field or item. The string will follow one of these four formats: 'f:<name>', where <name> is the name of a field in a struct, or key in a map 'v:<value>', where <value> is the exact json formatted value of a list item 'i:<index>', where <index> is position of a item in a list 'k:<keys>', where <keys> is a map of a list item's key fields to their unique values If a key maps to an empty Fields value, the field that key represents is part of the set.
The exact format is defined in sigs.k8s.io/structured-merge-diff
type: object
manager:
description: Manager is an identifier of the workflow
@@ -3208,7 +3144,7 @@ spec:
managing controller.
type: boolean
kind:
description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
name:
description: 'Name of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#names'
@@ -3227,11 +3163,13 @@ spec:
description: |-
An opaque value that represents the internal version of this object that can be used by clients to determine when objects have changed. May be used for optimistic concurrency, change detection, and the watch operation on a resource or set of resources. Clients must treat these values as opaque and passed unmodified back to the server. They may only be valid for a particular resource or set of resources.
Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency
Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
type: string
selfLink:
description: SelfLink is a URL representing this object.
Populated by the system. Read-only.
description: |-
SelfLink is a URL representing this object. Populated by the system. Read-only.
DEPRECATED Kubernetes will stop propagating this field in 1.20 release and the field is planned to be removed in 1.21 release.
type: string
uid:
description: |-
@@ -3597,7 +3535,7 @@ spec:
properties:
monitors:
description: 'Required: Monitors is a collection of Ceph monitors
More info: https://releases.k8s.io/HEAD/examples/volumes/cephfs/README.md#how-to-use-it'
More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it'
items:
type: string
type: array
@@ -3608,11 +3546,11 @@ spec:
readOnly:
description: 'Optional: Defaults to false (read/write). ReadOnly
here will force the ReadOnly setting in VolumeMounts. More
info: https://releases.k8s.io/HEAD/examples/volumes/cephfs/README.md#how-to-use-it'
info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it'
type: boolean
secretFile:
description: 'Optional: SecretFile is the path to key ring
for User, default is /etc/ceph/user.secret More info: https://releases.k8s.io/HEAD/examples/volumes/cephfs/README.md#how-to-use-it'
for User, default is /etc/ceph/user.secret More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it'
type: string
secretRef:
description: LocalObjectReference contains enough information
@@ -3625,7 +3563,7 @@ spec:
type: object
user:
description: 'Optional: User is the rados user name, default
is admin More info: https://releases.k8s.io/HEAD/examples/volumes/cephfs/README.md#how-to-use-it'
is admin More info: https://examples.k8s.io/volumes/cephfs/README.md#how-to-use-it'
type: string
required:
- monitors
@@ -3640,12 +3578,12 @@ spec:
description: 'Filesystem type to mount. Must be a filesystem
type supported by the host operating system. Examples: "ext4",
"xfs", "ntfs". Implicitly inferred to be "ext4" if unspecified.
More info: https://releases.k8s.io/HEAD/examples/mysql-cinder-pd/README.md'
More info: https://examples.k8s.io/mysql-cinder-pd/README.md'
type: string
readOnly:
description: 'Optional: Defaults to false (read/write). ReadOnly
here will force the ReadOnly setting in VolumeMounts. More
info: https://releases.k8s.io/HEAD/examples/mysql-cinder-pd/README.md'
info: https://examples.k8s.io/mysql-cinder-pd/README.md'
type: boolean
secretRef:
description: LocalObjectReference contains enough information
@@ -3657,8 +3595,8 @@ spec:
type: string
type: object
volumeID:
description: 'volume id used to identify the volume in cinder
More info: https://releases.k8s.io/HEAD/examples/mysql-cinder-pd/README.md'
description: 'volume id used to identify the volume in cinder.
More info: https://examples.k8s.io/mysql-cinder-pd/README.md'
type: string
required:
- volumeID
@@ -3981,16 +3919,16 @@ spec:
properties:
endpoints:
description: 'EndpointsName is the endpoint name that details
Glusterfs topology. More info: https://releases.k8s.io/HEAD/examples/volumes/glusterfs/README.md#create-a-pod'
Glusterfs topology. More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod'
type: string
path:
description: 'Path is the Glusterfs volume path. More info:
https://releases.k8s.io/HEAD/examples/volumes/glusterfs/README.md#create-a-pod'
https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod'
type: string
readOnly:
description: 'ReadOnly here will force the Glusterfs volume
to be mounted with read-only permissions. Defaults to false.
More info: https://releases.k8s.io/HEAD/examples/volumes/glusterfs/README.md#create-a-pod'
More info: https://examples.k8s.io/volumes/glusterfs/README.md#create-a-pod'
type: boolean
required:
- endpoints
@@ -4437,24 +4375,24 @@ spec:
https://kubernetes.io/docs/concepts/storage/volumes#rbd'
type: string
image:
description: 'The rados image name. More info: https://releases.k8s.io/HEAD/examples/volumes/rbd/README.md#how-to-use-it'
description: 'The rados image name. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it'
type: string
keyring:
description: 'Keyring is the path to key ring for RBDUser.
Default is /etc/ceph/keyring. More info: https://releases.k8s.io/HEAD/examples/volumes/rbd/README.md#how-to-use-it'
Default is /etc/ceph/keyring. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it'
type: string
monitors:
description: 'A collection of Ceph monitors. More info: https://releases.k8s.io/HEAD/examples/volumes/rbd/README.md#how-to-use-it'
description: 'A collection of Ceph monitors. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it'
items:
type: string
type: array
pool:
description: 'The rados pool name. Default is rbd. More info:
https://releases.k8s.io/HEAD/examples/volumes/rbd/README.md#how-to-use-it'
https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it'
type: string
readOnly:
description: 'ReadOnly here will force the ReadOnly setting
in VolumeMounts. Defaults to false. More info: https://releases.k8s.io/HEAD/examples/volumes/rbd/README.md#how-to-use-it'
in VolumeMounts. Defaults to false. More info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it'
type: boolean
secretRef:
description: LocalObjectReference contains enough information
@@ -4467,7 +4405,7 @@ spec:
type: object
user:
description: 'The rados user name. Default is admin. More
info: https://releases.k8s.io/HEAD/examples/volumes/rbd/README.md#how-to-use-it'
info: https://examples.k8s.io/volumes/rbd/README.md#how-to-use-it'
type: string
required:
- monitors
@@ -4654,7 +4592,7 @@ spec:
status:
description: 'AlertmanagerStatus is the most recent observed status of the
Alertmanager cluster. Read-only. Not included when requesting from the
apiserver, only from the Prometheus Operator API itself. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/api-conventions.md#spec-and-status'
apiserver, only from the Prometheus Operator API itself. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#spec-and-status'
properties:
availableReplicas:
description: Total number of available pods (ready for at least minReadySeconds)

View File

@@ -15,12 +15,12 @@ spec:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
spec:
description: PodMonitorSpec contains specification parameters for a PodMonitor.
@@ -52,6 +52,10 @@ spec:
description: HonorLabels chooses the metric's labels on collisions
with target labels.
type: boolean
honorTimestamps:
description: HonorTimestamps controls whether Prometheus respects
the timestamps present in scraped data.
type: boolean
interval:
description: Interval at which metrics should be scraped
type: string

View File

@@ -15,12 +15,12 @@ spec:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
description: ObjectMeta is metadata that all persisted resources must have,
@@ -70,186 +70,26 @@ spec:
If this field is specified and the generated name exists, the server will NOT return a 409 - instead, it will either return 201 Created or 500 with Reason ServerTimeout indicating a unique name could not be found in the time allotted, and the client should retry (optionally after the time indicated in the Retry-After header).
Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#idempotency
Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#idempotency
type: string
generation:
description: A sequence number representing a specific generation of
the desired state. Populated by the system. Read-only.
format: int64
type: integer
initializers:
description: Initializers tracks the progress of initialization.
properties:
pending:
description: Pending is a list of initializers that must execute
in order before this object is visible. When the last pending
initializer is removed, and no failing result is set, the initializers
struct will be set to nil and the object is considered as initialized
and visible to all clients.
items:
description: Initializer is information about an initializer that
has not yet completed.
properties:
name:
description: name of the process that is responsible for initializing
this object.
type: string
required:
- name
type: object
type: array
result:
description: Status is a return value for calls that don't return
other objects.
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this
representation of an object. Servers should convert recognized
schemas to the latest internal value, and may reject unrecognized
values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
type: string
code:
description: Suggested HTTP return code for this status, 0 if
not set.
format: int32
type: integer
details:
description: StatusDetails is a set of additional properties
that MAY be set by the server to provide additional information
about a response. The Reason field of a Status object defines
what attributes will be set. Clients must ignore fields that
do not match the defined type of each attribute, and should
assume that any attribute may be empty, invalid, or under
defined.
properties:
causes:
description: The Causes array includes more details associated
with the StatusReason failure. Not all StatusReasons may
provide detailed causes.
items:
description: StatusCause provides more information about
an api.Status failure, including cases when multiple
errors are encountered.
properties:
field:
description: |-
The field of the resource that has caused this error, as named by its JSON serialization. May include dot and postfix notation for nested attributes. Arrays are zero-indexed. Fields may appear more than once in an array of causes due to fields having multiple errors. Optional.
Examples:
"name" - the field "name" on the current resource
"items[0].name" - the field "name" on the first array entry in "items"
type: string
message:
description: A human-readable description of the cause
of the error. This field may be presented as-is
to a reader.
type: string
reason:
description: A machine-readable description of the
cause of the error. If this value is empty there
is no information available.
type: string
type: object
type: array
group:
description: The group attribute of the resource associated
with the status StatusReason.
type: string
kind:
description: 'The kind attribute of the resource associated
with the status StatusReason. On some operations may differ
from the requested resource Kind. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
type: string
name:
description: The name attribute of the resource associated
with the status StatusReason (when there is a single name
which can be described).
type: string
retryAfterSeconds:
description: If specified, the time in seconds before the
operation should be retried. Some errors may indicate
the client must take an alternate action - for those errors
this field may indicate how long to wait before taking
the alternate action.
format: int32
type: integer
uid:
description: 'UID of the resource. (when there is a single
resource which can be described). More info: http://kubernetes.io/docs/user-guide/identifiers#uids'
type: string
type: object
kind:
description: 'Kind is a string value representing the REST resource
this object represents. Servers may infer this from the endpoint
the client submits requests to. Cannot be updated. In CamelCase.
More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
type: string
message:
description: A human-readable description of the status of this
operation.
type: string
metadata:
description: ListMeta describes metadata that synthetic resources
must have, including lists and various status objects. A resource
may have only one of {ObjectMeta, ListMeta}.
properties:
continue:
description: continue may be set if the user set a limit
on the number of items returned, and indicates that the
server has more data available. The value is opaque and
may be used to issue another request to the endpoint that
served this list to retrieve the next set of available
objects. Continuing a consistent list may not be possible
if the server configuration has changed or more than a
few minutes have passed. The resourceVersion field returned
when using this continue value will be identical to the
value in the first response, unless you have received
this token from an error message.
type: string
remainingItemCount:
description: |-
remainingItemCount is the number of subsequent items in the list which are not included in this list response. If the list request contained label or field selectors, then the number of remaining items is unknown and the field will be left unset and omitted during serialization. If the list is complete (either because it is not chunking or because this is the last chunk), then there are no more remaining items and this field will be left unset and omitted during serialization. Servers older than v1.15 do not set this field. The intended use of the remainingItemCount is *estimating* the size of a collection. Clients should not rely on the remainingItemCount to be set or to be exact.
This field is alpha and can be changed or removed without notice.
format: int64
type: integer
resourceVersion:
description: 'String that identifies the server''s internal
version of this object that can be used by clients to
determine when objects have changed. Value must be treated
as opaque by clients and passed unmodified back to the
server. Populated by the system. Read-only. More info:
https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency'
type: string
selfLink:
description: selfLink is a URL representing this object.
Populated by the system. Read-only.
type: string
type: object
reason:
description: A machine-readable description of why this operation
is in the "Failure" status. If this value is empty there is
no information available. A Reason clarifies an HTTP status
code but does not override it.
type: string
status:
description: 'Status of the operation. One of: "Success" or
"Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status'
type: string
type: object
required:
- pending
type: object
labels:
description: 'Map of string keys and values that can be used to organize
and categorize (scope and select) objects. May match selectors of
replication controllers and services. More info: http://kubernetes.io/docs/user-guide/labels'
type: object
managedFields:
description: |-
ManagedFields maps workflow-id and version to the set of fields that are managed by that workflow. This is mostly for internal housekeeping, and users typically shouldn't need to set or understand this field. A workflow can be the user's name, a controller's name, or the name of a specific apply path like "ci-cd". The set of fields is always in the version that the workflow used when modifying the object.
This field is alpha and can be changed or removed without notice.
description: ManagedFields maps workflow-id and version to the set of
fields that are managed by that workflow. This is mostly for internal
housekeeping, and users typically shouldn't need to set or understand
this field. A workflow can be the user's name, a controller's name,
or the name of a specific apply path like "ci-cd". The set of fields
is always in the version that the workflow used when modifying the
object.
items:
description: ManagedFieldsEntry is a workflow-id, a FieldSet and the
group version of the resource that the fieldset applies to.
@@ -261,9 +101,18 @@ spec:
the version of a field set because it cannot be automatically
converted.
type: string
fields:
description: 'Fields stores a set of fields in a data structure
like a Trie. To understand how this is used, see: https://github.com/kubernetes-sigs/structured-merge-diff'
fieldsType:
description: 'FieldsType is the discriminator for the different
fields format and version. There is currently only one possible
value: "FieldsV1"'
type: string
fieldsV1:
description: |-
FieldsV1 stores a set of fields in a data structure like a Trie, in JSON format.
Each key is either a '.' representing the field itself, and will always map to an empty set, or a string representing a sub-field or item. The string will follow one of these four formats: 'f:<name>', where <name> is the name of a field in a struct, or key in a map 'v:<value>', where <value> is the exact json formatted value of a list item 'i:<index>', where <index> is position of a item in a list 'k:<keys>', where <keys> is a map of a list item's key fields to their unique values If a key maps to an empty Fields value, the field that key represents is part of the set.
The exact format is defined in sigs.k8s.io/structured-merge-diff
type: object
manager:
description: Manager is an identifier of the workflow managing
@@ -321,7 +170,7 @@ spec:
description: If true, this reference points to the managing controller.
type: boolean
kind:
description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
name:
description: 'Name of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#names'
@@ -340,11 +189,13 @@ spec:
description: |-
An opaque value that represents the internal version of this object that can be used by clients to determine when objects have changed. May be used for optimistic concurrency, change detection, and the watch operation on a resource or set of resources. Clients must treat these values as opaque and passed unmodified back to the server. They may only be valid for a particular resource or set of resources.
Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency
Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
type: string
selfLink:
description: SelfLink is a URL representing this object. Populated by
the system. Read-only.
description: |-
SelfLink is a URL representing this object. Populated by the system. Read-only.
DEPRECATED Kubernetes will stop propagating this field in 1.20 release and the field is planned to be removed in 1.21 release.
type: string
uid:
description: |-

View File

@@ -15,12 +15,12 @@ spec:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
spec:
description: ServiceMonitorSpec contains specification parameters for a
@@ -74,10 +74,31 @@ spec:
bearerTokenFile:
description: File to read bearer token for scraping targets.
type: string
bearerTokenSecret:
description: SecretKeySelector selects a key of a Secret.
properties:
key:
description: The key of the secret to select from. Must be
a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
type: string
optional:
description: Specify whether the Secret or its key must be
defined
type: boolean
required:
- key
type: object
honorLabels:
description: HonorLabels chooses the metric's labels on collisions
with target labels.
type: boolean
honorTimestamps:
description: HonorTimestamps controls whether Prometheus respects
the timestamps present in scraped data.
type: boolean
interval:
description: Interval at which metrics should be scraped
type: string
@@ -199,18 +220,40 @@ spec:
tlsConfig:
description: TLSConfig specifies TLS configuration parameters.
properties:
ca: {}
caFile:
description: The CA cert to use for the targets.
description: Path to the CA cert in the Prometheus container
to use for the targets.
type: string
cert: {}
certFile:
description: The client cert file for the targets.
description: Path to the client cert file in the Prometheus
container for the targets.
type: string
insecureSkipVerify:
description: Disable target certificate validation.
type: boolean
keyFile:
description: The client key file for the targets.
description: Path to the client key file in the Prometheus
container for the targets.
type: string
keySecret:
description: SecretKeySelector selects a key of a Secret.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
serverName:
description: Used to verify the hostname for the targets.
type: string

View File

@@ -4,7 +4,7 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.33.0
app.kubernetes.io/version: v0.34.0
name: prometheus-operator
rules:
- apiGroups:

View File

@@ -4,7 +4,7 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.33.0
app.kubernetes.io/version: v0.34.0
name: prometheus-operator
roleRef:
apiGroup: rbac.authorization.k8s.io

View File

@@ -4,7 +4,7 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.33.0
app.kubernetes.io/version: v0.34.0
name: prometheus-operator
namespace: monitoring
spec:
@@ -18,15 +18,15 @@ spec:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.33.0
app.kubernetes.io/version: v0.34.0
spec:
containers:
- args:
- --kubelet-service=kube-system/kubelet
- --logtostderr=true
- --config-reloader-image=quay.io/coreos/configmap-reload:v0.0.1
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.33.0
image: quay.io/coreos/prometheus-operator:v0.33.0
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.34.0
image: quay.io/coreos/prometheus-operator:v0.34.0
name: prometheus-operator
ports:
- containerPort: 8080

View File

@@ -4,7 +4,7 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.33.0
app.kubernetes.io/version: v0.34.0
name: prometheus-operator
namespace: monitoring
spec:

View File

@@ -4,6 +4,6 @@ metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.33.0
app.kubernetes.io/version: v0.34.0
name: prometheus-operator
namespace: monitoring

12
scripts/minikube-start-kvm.sh Executable file
View File

@@ -0,0 +1,12 @@
#!/bin/bash
minikube delete
minikube addons disable metrics-server
minikube start \
--vm-driver=kvm2 \
--kubernetes-version=v1.16.0 \
--memory=6g \
--bootstrapper=kubeadm \
--extra-config=kubelet.authentication-token-webhook=true \
--extra-config=kubelet.authorization-mode=Webhook \
--extra-config=scheduler.address=0.0.0.0 \
--extra-config=controller-manager.address=0.0.0.0

11
scripts/minikube-start.sh Executable file
View File

@@ -0,0 +1,11 @@
#!/bin/bash
minikube delete
minikube addons disable metrics-server
minikube start \
--kubernetes-version=v1.16.0 \
--memory=6g \
--bootstrapper=kubeadm \
--extra-config=kubelet.authentication-token-webhook=true \
--extra-config=kubelet.authorization-mode=Webhook \
--extra-config=scheduler.address=0.0.0.0 \
--extra-config=controller-manager.address=0.0.0.0

11
scripts/monitoring-deploy.sh Executable file
View File

@@ -0,0 +1,11 @@
#!/bin/bash
# create namespace and CRDs
kubectl create -f manifests/setup
# wait for CRD creation to complete
until kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done
# create monitoring components
kubectl create -f manifests/

View File

@@ -17,6 +17,7 @@ package e2e
import (
"log"
"os"
"strings"
"testing"
"time"
@@ -57,23 +58,22 @@ func testMain(m *testing.M) int {
}
func TestQueryPrometheus(t *testing.T) {
t.Parallel()
queries := []struct {
query string
expectN int
}{
{
// query: `up{job="node-exporter"} == 1`,
// expectN: 1,
// }, {
query: `up{job="node-exporter"} == 1`,
expectN: 1,
}, {
// query: `up{job="kubelet"} == 1`,
// expectN: 1,
// }, {
query: `up{job="apiserver"} == 1`,
expectN: 1,
// }, {
// query: `up{job="kube-state-metrics"} == 1`,
// expectN: 1,
}, {
query: `up{job="kube-state-metrics"} == 1`,
expectN: 1,
}, {
query: `up{job="prometheus-k8s"} == 1`,
expectN: 1,
@@ -87,7 +87,7 @@ func TestQueryPrometheus(t *testing.T) {
}
// Wait for pod to respond at queries at all. Then start verifying their results.
err := wait.Poll(5*time.Second, 1*time.Minute, func() (bool, error) {
err := wait.Poll(5*time.Second, 2*time.Minute, func() (bool, error) {
_, err := promClient.query("up")
return err == nil, nil
})
@@ -116,3 +116,25 @@ func TestQueryPrometheus(t *testing.T) {
t.Fatal(err)
}
}
func TestDroppedMetrics(t *testing.T) {
// query metadata for all metrics and their metadata
md, err := promClient.metadata("{job=~\".+\"}")
if err != nil {
log.Fatal(err)
}
for _, k := range md.Data {
// check if the metric' help text contains Deprecated
if strings.Contains(k.Help, "Deprecated") {
// query prometheus for the Deprecated metric
n, err := promClient.query(k.Metric)
if err != nil {
log.Fatal(err)
}
if n > 0 {
t.Fatalf("deprecated metric with name: %s and help text: %s exists.", k.Metric, k.Help)
}
}
}
}

View File

@@ -15,6 +15,10 @@
package e2e
import (
"bytes"
"encoding/json"
"fmt"
"k8s.io/client-go/kubernetes"
"github.com/Jeffail/gabs"
@@ -50,3 +54,41 @@ func (c *prometheusClient) query(query string) (int, error) {
n, err := res.ArrayCountP("data.result")
return n, err
}
type Metadata struct {
Status string `json:"status,omitempty"`
Data []Data `json:"data,omitempty"`
}
type Data struct {
Metric string `json:"metric,omitempty"`
Help string `json:"help,omitempty"`
}
// metadata makes a request against the Prometheus /api/v1/targets/metadata endpoint.
// It returns all the metrics and its metadata.
func (c *prometheusClient) metadata(query string) (Metadata, error) {
req := c.kubeClient.CoreV1().RESTClient().Get().
Namespace("monitoring").
Resource("pods").
SubResource("proxy").
Name("prometheus-k8s-0:9090").
Suffix("/api/v1/targets/metadata").Param("match_target", query)
var data Metadata
b, err := req.DoRaw()
if err != nil {
return data, err
}
r := bytes.NewReader(b)
decoder := json.NewDecoder(r)
err = decoder.Decode(&data)
if err != nil {
return data, err
}
if data.Status != "success" {
return data, fmt.Errorf("status of returned response was not successful; status: %s", data.Status)
}
return data, err
}

View File

@@ -10,19 +10,33 @@ set -x
curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl
chmod +x kubectl
curl -Lo kind https://github.com/kubernetes-sigs/kind/releases/download/v0.4.0/kind-linux-amd64
curl -Lo kind https://github.com/kubernetes-sigs/kind/releases/download/v0.6.1/kind-linux-amd64
chmod +x kind
./kind create cluster
export KUBECONFIG="$(./kind get kubeconfig-path)"
run_e2e_tests() {
cluster_version=$1
./kubectl apply -f manifests/0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml
./kubectl apply -f manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml
./kubectl apply -f manifests/0prometheus-operator-0prometheusruleCustomResourceDefinition.yaml
./kubectl apply -f manifests/0prometheus-operator-0servicemonitorCustomResourceDefinition.yaml
./kind create cluster --image=kindest/node:$cluster_version
export KUBECONFIG="$(./kind get kubeconfig-path)"
# Wait for CRDs to be successfully registered
sleep 10
# create namespace, permissions, and CRDs
./kubectl create -f manifests/setup
# wait for CRD creation to complete
until ./kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done
# create monitoring components
./kubectl create -f manifests/
make test-e2e
./kind delete cluster
}
cluster_compatible_versions=("v1.16.1" "v1.17.0")
for cluster_version in "${cluster_compatible_versions[@]}"
do
run_e2e_tests $cluster_version
done
./kubectl apply -f manifests
make test-e2e