Compare commits

..

345 Commits

Author SHA1 Message Date
Lili Cosic
ee8077db04 Merge pull request #476 from dgrisonnet/ci-test-compat-04
ci: update release-0.4 e2e tests according to compat matrix
2020-04-01 19:02:30 +02:00
Damien Grisonnet
d3bee7fa1a test: increase pod polling time
The original polling time was a bit short for all pods to be up which made e2e
tests fail half of the time.

Signed-off-by: Damien Grisonnet <dgrisonn@redhat.com>
2020-03-31 18:26:56 +02:00
Damien Grisonnet
106132ac18 Makefile: pin jsonnet-ci to 0.36
Signed-off-by: Damien Grisonnet <dgrisonn@redhat.com>
2020-03-31 16:21:06 +02:00
Damien Grisonnet
8961be9639 ci: update e2e tests according to compat matrix
Signed-off-by: Damien Grisonnet <dgrisonn@redhat.com>
2020-03-31 14:38:20 +02:00
Frederic Branczyk
ae589e91ce Merge pull request #401 from s-urbaniak/up-down-0.4
[backport] jsonnet: add general rules for up/down targets
2020-01-30 16:05:49 +01:00
Sergiusz Urbaniak
8367575768 manifests: regenerate 2020-01-30 14:34:58 +01:00
Sergiusz Urbaniak
6b5033d65e jsonnet: add general rules for up/down targets 2020-01-30 14:29:40 +01:00
Paweł Krupa
68d6e611c6 Fast forward release-0.4 to master (#389)
Fast forward release-0.4 to master
2020-01-23 15:36:04 +01:00
Frederic Branczyk
f2b4528b63 Merge pull request #387 from brancz/reduce-histogram-buckets
*: Throw away unused high cardinality apiserver duration buckets
2020-01-23 15:32:18 +01:00
Krasi Georgiev
be8eb39024 re-added most collectors
Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com>
2020-01-23 15:18:59 +01:00
Krasi Georgiev
629e86e53a remove some unused collectors
Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com>
2020-01-23 15:18:59 +01:00
Frederic Branczyk
a7628e0223 Merge pull request #381 from krasi-georgiev/remove-collectors
remove some unused collectors
2020-01-23 14:50:47 +01:00
Krasi Georgiev
8984606f5d re-added most collectors
Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com>
2020-01-23 15:17:56 +02:00
Frederic Branczyk
48d95f0b9f *: Throw away unused high cardinality apiserver duration buckets 2020-01-23 13:24:42 +01:00
Frederic Branczyk
e410043b6b Merge pull request #386 from paulfantom/bump_kube-mix
Bump kubernetes-mixins
2020-01-23 12:22:40 +01:00
paulfantom
894069f24d manifests: regenerate 2020-01-23 12:01:21 +01:00
paulfantom
d074ea1427 bump kubernetes-mixins dependency 2020-01-23 12:01:10 +01:00
Frederic Branczyk
269aef6e37 Merge pull request #384 from s-urbaniak/agg
prometheus-adapter: add nodes resource to aggregated-metrics-reader
2020-01-22 09:45:38 +01:00
Sergiusz Urbaniak
90e5982de4 manifests: regenerate 2020-01-21 20:43:47 +01:00
Sergiusz Urbaniak
7165938b39 prometheus-adapter: add nodes resource to aggregated-metrics-reader 2020-01-21 18:36:52 +01:00
Frederic Branczyk
9ebe632d5d Merge pull request #380 from omerlh/prom-all-namespaces
added patch to allow prom to watch all namespaces
2020-01-20 14:16:29 +01:00
Lili Cosic
72ae778bfc Merge pull request #382 from tlereste/update_kube_state_metrics
bump kube-state-metrics to version 1.9.2
2020-01-17 11:17:57 +01:00
Thibault Le Reste
0608c96bf6 bump kube-state-metrics to version 1.9.2 2020-01-15 13:12:35 +01:00
Krasi Georgiev
44f3c61010 remove some unused collectors
Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com>
2020-01-15 12:03:04 +02:00
omerlh
f517b35a42 added patch to allow prom to watch all namespaces 2020-01-14 17:55:27 +02:00
Frederic Branczyk
54c0fda307 Merge pull request #378 from LiliC/drop-less
jsonnet,manifests: Do not drop not all metrics
2020-01-14 14:55:54 +01:00
Lili Cosic
6a3d667d3e manifests: Regenerate files 2020-01-14 10:34:46 +01:00
Lili Cosic
d9d3139dc8 jsonnet: Drop exact metrics 2020-01-14 10:26:42 +01:00
Frederic Branczyk
67ed0f63c2 Merge pull request #371 from tlereste/update_kube_state_metrics_version
update kube-state-metrics version to 1.9.1
2020-01-10 14:47:42 +01:00
Thibault Le Reste
7788d0d327 update kube-state-metrics version to 1.9.1 2020-01-10 14:23:52 +01:00
Lili Cosic
fca505f2a2 Merge pull request #368 from jfassad/master
jsonnet/kube-prometheus/kube-state-metrics: Add missing clusterRole permissions
2020-01-10 11:47:45 +01:00
João Assad
d40548d3a0 manifests: Regenerate manifests 2020-01-09 15:24:50 -03:00
João Assad
dba42d3477 jsonnet/kube-prometheus/kube-state-metrics: add missing clusterRole permissions 2020-01-09 15:12:59 -03:00
Lili Cosic
ee37661c34 Merge pull request #367 from LiliC/bump-k8s
tests/e2e/travis-e2e.sh: Switch to 1.17 k8s cluster
2020-01-09 13:13:39 +01:00
Lili Cosic
8b36950f0e tests/e2e/travis-e2e.sh: Switch to 1.17 k8s cluster 2020-01-09 13:03:01 +01:00
Frederic Branczyk
932745172d Merge pull request #365 from LiliC/drop-kubelet
Drop correct deprecated metrics and add e2e test to ensure that
2020-01-08 17:39:26 +01:00
Lili Cosic
1af59f3130 tests/e2e: Add e2e test to make sure all deprecated metrics are being
dropped
2020-01-08 12:35:21 +01:00
Lili Cosic
6562b02da8 manifests/*: Regenerate manifests 2020-01-08 12:35:21 +01:00
Lili Cosic
23999e44df jsonnet/kube-prometheus/prometheus: Drop correct deprecated metrics 2020-01-08 12:35:21 +01:00
Frederic Branczyk
69d3357892 Merge pull request #362 from pgier/lock-version-of-prometheus-operator-jsonnet-dependency
lock prometheus-operator jsonnet dependencies to v0.34.0
2020-01-07 08:06:46 +01:00
Frederic Branczyk
3465b0fa0d Merge pull request #346 from omerlh/patch-1
fix coredns monitoring on EKS
2020-01-06 16:19:16 +01:00
Paul Gier
1d1ce4967f lock prometheus-operator jsonnet dependencies to release-0.34 branch
This prevents mismatch between prometheus-operator binary and related
CRD yaml files.
2020-01-06 09:16:42 -06:00
Frederic Branczyk
3a0e6ba91f Merge pull request #360 from omerlh/patch-2
added metric_path to kublet/cadvisor selector
2020-01-06 13:24:23 +01:00
omerlh
81e2d19398 run make 2020-01-06 13:49:57 +02:00
Omer Levi Hevroni
92d4cbae08 added metric_path to kublet/cadvisor selector 2020-01-06 11:52:48 +02:00
Omer Levi Hevroni
2e72a8a832 fix coredns monitoring on EKS 2019-12-23 12:39:21 +02:00
Lili Cosic
9493a1a5f7 Merge pull request #342 from tlereste/update_kube_state_metrics
update kube-state-metrics version to 1.9.0
2019-12-20 16:57:17 +01:00
Thibault LE RESTE
0a48577bb7 update kube-state-metrics version to 1.9.0 2019-12-20 16:21:52 +01:00
Frederic Branczyk
9211c42df0 Merge pull request #336 from LiliC/change-dropped-metrics
jsonnet/kube-prometheus: Adjust dropped deprecated metrics names
2019-12-19 13:05:37 +01:00
Lili Cosic
5cddfd8da7 manifests: Regenerate manifests 2019-12-19 10:10:46 +01:00
Lili Cosic
bd69007c8c jsonnet/kube-prometheus: Adjust dropped deprecated metrics names
The names were not complete in the kubernetes CHANGELOG.
2019-12-19 10:09:34 +01:00
Frederic Branczyk
4f2b9c1ec8 Merge pull request #332 from LiliC/remove-pin-release
jsonnet/kube-prometheus/jsonnetfile.json: Pin prometheus-operator version to master instead
2019-12-18 13:16:03 +01:00
Lili Cosic
0be63d47fc manifests: Regenerate manifests 2019-12-18 11:18:21 +01:00
Lili Cosic
5fe60f37a2 jsonnetfile.lock.json: Update 2019-12-18 11:18:21 +01:00
Lili Cosic
200fee8d7c jsonnet/kube-prometheus/jsonnetfile.json: Pin prometheus-operator
version to master instead
2019-12-18 11:18:21 +01:00
Frederic Branczyk
1b9be6d00b Merge pull request #330 from LiliC/remove-depr-metrics
jsonnet,manifests: Drop all metrics which are deprecated in kubernetes
2019-12-17 16:51:40 +01:00
Lili Cosic
ce68c4b392 manifests/*: Regenerate manifest 2019-12-17 15:13:04 +01:00
Lili Cosic
5e9b883528 jsonnet/kube-prometheus*: Drop deprecated kubernetes metrics
These metrics were deprecated in kubernetes from 1.14 and 1.15 onwards.
2019-12-17 15:13:04 +01:00
Paweł Krupa
69b0ba03f1 Merge pull request #329 from paulfantom/e2e
tests/e2e: reenable checking targets availability
2019-12-16 14:40:43 +01:00
paulfantom
3279f222a0 tests/e2e: reenable checking targets availability 2019-12-16 14:23:43 +01:00
Paweł Krupa
543ccec970 Fix typo in node-exporter DaemonSet (#328)
Fix typo in node-exporter DaemonSet
2019-12-16 12:56:49 +01:00
paulfantom
f17ddfd293 assets: regenerate 2019-12-16 12:53:49 +01:00
paulfantom
3b8530d742 jsonnet/kube-prometheus/node-exporter: fix typo 2019-12-16 12:53:39 +01:00
Frederic Branczyk
44fe363211 Merge pull request #327 from paulfantom/deps
Update dependencies
2019-12-16 12:14:26 +01:00
paulfantom
326453cf47 manifests: regenerate 2019-12-16 11:24:04 +01:00
paulfantom
159a14ef47 update jsonnet dependencies 2019-12-16 11:20:37 +01:00
Frederic Branczyk
d03d57e6bb Merge pull request #326 from paulfantom/ipv6
IPv6 compatibility
2019-12-16 10:34:51 +01:00
Frederic Branczyk
31cb71fcd9 Merge pull request #317 from josqu4red/podmonitor-default-ns
Enable discovery of Podmonitors across namespaces
2019-12-12 16:54:39 +01:00
paulfantom
4474b24a32 manifests: regenerate 2019-12-12 16:26:58 +01:00
paulfantom
339ade5a81 jsonnet/kube-prometheus/node-exporter: wrap pod ip address in square brackets for ipv6 compatibility reasons 2019-12-12 16:14:08 +01:00
Frederic Branczyk
ce7c5fa3b4 Merge pull request #325 from sereinity-forks/master
Make limits/requests resources of kube-state-metrics removable
2019-12-12 16:06:58 +01:00
Sereinity
3f388b797d Make limits/requests resources of kube-state-metrics removable, unify tunning 2019-12-12 15:50:34 +01:00
Frederic Branczyk
20abdf3b72 Merge pull request #323 from simonpasquier/bump-kubernetes-mixin
Bump kubernetes mixin
2019-12-10 17:05:35 +01:00
Simon Pasquier
cd0f3c641e regenerate
Signed-off-by: Simon Pasquier <spasquie@redhat.com>
2019-12-10 16:48:51 +01:00
Simon Pasquier
408fde189b Bump kubernetes-mixin
Signed-off-by: Simon Pasquier <spasquie@redhat.com>
2019-12-10 16:48:28 +01:00
Jonathan Amiez
90cf0ae21c Update generated manifests 2019-12-05 15:12:18 +01:00
Jonathan Amiez
3ba4b5602a Enable PodMonitors discovery across namespaces 2019-12-05 15:09:40 +01:00
Frederic Branczyk
cb0e6e2c89 Merge pull request #309 from benjaminhuo/master
Group alert by namespace instead of job
2019-12-04 08:38:04 +01:00
Benjamin
03f7adcf92 regenerate
Signed-off-by: Benjamin <benjamin@yunify.com>
2019-12-04 10:14:42 +08:00
Benjamin
fd267aebeb Merge remote-tracking branch 'upstream/master' 2019-12-04 10:09:14 +08:00
Benjamin
420425d88e regenerate
Signed-off-by: Benjamin <benjamin@yunify.com>
2019-12-03 23:46:08 +08:00
Benjamin
965bec0ad7 Change Alertmanager group by condition
Signed-off-by: Benjamin <benjamin@yunify.com>
2019-12-03 20:02:47 +08:00
Frederic Branczyk
d22bad8293 Merge pull request #313 from yeya24/update-apiverison
Update apiversion
2019-12-03 11:22:47 +01:00
Frederic Branczyk
8c255e9e6c Merge pull request #310 from paulfantom/node-exporter-scrape-interval
Change node-exporter scrape interval to follow best practices
2019-12-03 10:15:52 +01:00
yeya24
56027ac757 update apiversion
Signed-off-by: yeya24 <yb532204897@gmail.com>
2019-12-01 09:33:11 -05:00
paulfantom
50b06b0d33 manifests: regenerate 2019-11-27 15:11:06 +01:00
paulfantom
6f6fd65a48 jsonnet/kube-prometheus/node-exporter: follow node-exporter best practices and scrape data every 15s 2019-11-27 15:09:04 +01:00
Frederic Branczyk
f48fe057dc Merge pull request #307 from EricHorst/patch-1
Update README.md with apply clarification.
2019-11-21 17:41:53 -08:00
Eric Horst
8487871388 Update README.md with apply clarification.
Update the kubectl apply commands in the customizing section to match those the quickstart section. The customizing section did not account for the recently introduced setup/ subdirectory.
2019-11-17 21:10:32 -08:00
Sergiusz Urbaniak
ce5fe790ee Merge pull request #299 from coreos/fix-jb
Fix jb
2019-11-06 08:33:48 +01:00
Frederic Branczyk
3b82c11944 manifests: Re-generate with latest deps 2019-11-06 07:09:00 +01:00
Frederic Branczyk
65e57d8ec7 Adapt to new jb version 2019-11-06 07:06:18 +01:00
Frederic Branczyk
223c163915 Merge pull request #298 from dctrwatson/disable-alerts-managed
Disable controller and scheduler alerts in managed clusters
2019-11-06 06:50:11 +01:00
John Watson
235761f915 Disable controller and scheduler alerts in managed clusters 2019-11-05 21:17:24 -08:00
Frederic Branczyk
6a6a43e227 Merge pull request #272 from karancode/aws_eks_cni
AWS EKS CNI Monitoring Support
2019-11-05 15:53:46 +01:00
karancode
8ee17e6735 with jsonnet-ci:latest image 2019-11-05 21:10:40 +09:00
karancode
528f338477 revert jsonnetfile json 2019-11-05 20:30:50 +09:00
karancode
78edcc0276 make clean generate 2019-11-05 20:25:55 +09:00
karancode
f05e73881a update dependency 2019-11-05 18:03:23 +09:00
karancode
60bd13b34b remove example 2019-11-05 18:02:46 +09:00
karancode
9072e3530a fix: remove garbage character 2019-11-05 17:15:45 +09:00
karancode
737720c119 test 2019-11-05 16:57:39 +09:00
karancode
4bd3cb586a add prometheus rule to patch 2019-11-05 16:39:45 +09:00
karancode
01f944aa30 sync master with current fork 2019-11-05 16:36:56 +09:00
Sergiusz Urbaniak
b8f513e4d4 Merge pull request #293 from s-urbaniak/update
jsonnet/kube-prometheus: bump prometheus-operator
2019-11-01 15:44:44 +01:00
Sergiusz Urbaniak
c8f0471279 jsonnet: regenerate 2019-11-01 15:27:14 +01:00
Sergiusz Urbaniak
5e75f27ae2 jsonnet: pin node-mixin due to upstream bug 2019-11-01 15:26:56 +01:00
Sergiusz Urbaniak
02369dd1da jsonnet/kube-prometheus: bump prometheus-operator 2019-11-01 14:20:12 +01:00
Sergiusz Urbaniak
a3b1121562 Merge pull request #290 from LiliC/imrove-targets-down
jsonnet/kube-prometheus/alerts: Add namespace to TargetDown msg
2019-10-29 15:44:02 +01:00
Lili Cosic
78b9183837 manifests/prometheus-rules.yaml: Regenerate file 2019-10-29 14:59:13 +01:00
Lili Cosic
01d30382aa jsonnet/kube-prometheus/alerts: Add namespace to TargetDown msg 2019-10-29 14:36:14 +01:00
Frederic Branczyk
cc389a718c Merge pull request #289 from BenoitKnecht/fix-memory-saturation-dashboard
Fix memory saturation dashboard
2019-10-29 10:38:31 +01:00
Benoît Knecht
20eb5b312a manifests: Regenerate with new node-mixin dependency
Generate manifests for fd5b77c.
2019-10-28 16:35:12 +01:00
Benoît Knecht
fd5b77cadf jsonnetfile.lock.json: Update node-mixin dependency
Upgrade to prometheus/node_exporter@d574b4b, which includes a better
metric for memory saturation.

See prometheus/node_exporter#1524.
2019-10-28 16:30:11 +01:00
Frederic Branczyk
7f2e669d46 Merge pull request #263 from Deepak1100/patch-1
Docs for rawGrafanaDashboards field
2019-10-28 14:11:56 +01:00
Deepak Jain
d99aefe276 CLOUD-3031| Deepak Jain| adding example file 2019-10-26 22:55:35 +05:30
Deepak Jain
059e74d156 Docs for rawGrafanaDashboards filed
Propose in this https://github.com/brancz/kubernetes-grafana/pull/78
2019-10-26 22:55:35 +05:30
Sergiusz Urbaniak
03b36af546 Merge pull request #282 from BenoitKnecht/prometheus-config
prometheus: Let name and replicas be set in _config
2019-10-25 09:45:11 +02:00
Frederic Branczyk
2f54bcb4c6 Merge pull request #286 from pgier/update-kube-state-metrics
Update kube-state-metrics to 1.8.0
2019-10-25 09:15:25 +02:00
Frederic Branczyk
1129dd7fb7 Merge pull request #287 from smarterclayton/bump_prometheus
jsonnet: Update to latest kubernetes-mixin
2019-10-25 09:12:45 +02:00
Clayton Coleman
6e0ca7565f jsonnet: Update to latest kubernetes-mixin
Pick up new alerts for unreachable nodes.
2019-10-25 00:23:55 -04:00
Paul Gier
ebb960ee5e update generated files for kube-state-metrics 1.7.2 -> 1.8.0 2019-10-24 15:02:29 -05:00
Paul Gier
f72d49ca26 update kube-state-metrics 1.7.2 -> 1.8.0 2019-10-24 15:01:46 -05:00
Benoît Knecht
5686d7b439 Makefile: Fix manifests target dependency (#285)
Since 1664600, manifests are built using `examples/kustomize.jsonnet`
instead of `example.jsonnet`.

This commit updates the dependencies in the `manifests` target to
reflect that change.
2019-10-24 13:37:11 +02:00
Benoît Knecht
dc77f255de prometheus: Let name and replicas be set in _config
Before #260, the Prometheus name and number of replicas could be
configured in `_config.prometheus.name` and
`_config.prometheus.replicas` respectively.

It isn't the case anymore, which means that configurations that did set
a custom name for Prometheus will get a second Prometheus instance
called `k8s` when they upgrade kube-prometheus.

This commit adds back the ability to configure both of these parameters
in `_config`.
2019-10-24 09:39:04 +02:00
Jake Utley
4e5b454ba8 Add metrics_path label to kubelet servicemonitor endpoints (#277)
* Add metrics_path label to kubelet servicemonitor endpoints

* Set kubelet metric_path label in jsonnet

* Add generated kubelet servicemonitor
2019-10-24 09:16:57 +02:00
karancode
a3ab6bd49b add available_ip rule 2019-10-24 04:12:07 +09:00
Benoît Knecht
a7884a6c18 node-exporter: Use configured resources for kube-rbac-proxy (#279)
Since #132, `kube-rbac-proxy` resources can be configured in
`config.resources['kube-rbac-proxy']`, but the node-exporter daemonset
was still using hard-coded values.

This commit sets the request and limit resources to the configured
values for the `kube-rbac-proxy` container in the node-exporter pods.
2019-10-23 10:58:24 +02:00
Paul Gier
c8273cf9e9 Scripts and readme (#258)
* Avoid race condition when deploying quickstart example

The namespace and CRD creation must happen before any dependent objects
are created.  So we can put these in a separate directory (manifest/setup)
so they can be created before the other objects.

Some minor updates to the README and added a couple of scripts
for the quickstarts

Update travis script to avoid race condition

Signed-off-by: Paul Gier <pgier@redhat.com>

* simplify the example quickstart script and improve readme

Signed-off-by: Paul Gier <pgier@redhat.com>

* increase minikube memory to 6g for quickstart example
2019-10-23 08:38:31 +02:00
karancode
d4ba158f9b bugfix final 2019-10-23 01:26:35 +09:00
karancode
3c4dbc52d9 bugfix eexamples/eks-cni-example 2019-10-23 01:10:12 +09:00
karancode
79c670bcd0 revert examples/kustomize.jsonnet 2019-10-23 00:37:05 +09:00
karancode
b03ff4f593 embedmd for doc 2019-10-23 00:24:00 +09:00
karancode
8228ebd2ba fix example 2019-10-23 00:22:59 +09:00
karancode
edb327531d update example 2019-10-23 00:15:25 +09:00
karancode
13c114a72f catch all eks 2019-10-23 00:15:05 +09:00
karancode
9249256b4a revert examples to original 2019-10-23 00:00:16 +09:00
karancode
e2b7e7f17d remove yamls from general mamnifests 2019-10-22 20:58:05 +09:00
karancode
d32e859a11 remove example 2019-10-22 20:52:09 +09:00
karancode
1cbc994344 fix namespace 2019-10-22 20:44:28 +09:00
karancode
cbbfa0cad5 move eks serviceMonitor to patch files 2019-10-22 20:33:03 +09:00
karancode
6ef4b3d330 remove local version 2019-10-22 03:26:02 +09:00
karancode
648db9d544 add readme 2019-10-22 03:24:31 +09:00
karancode
5cc6daab4a add aws eks cni service yaml 2019-10-22 02:57:40 +09:00
karancode
c156f21d50 bugfix service name 2019-10-22 02:44:57 +09:00
karancode
55db3208da fix names for service 2019-10-22 02:33:55 +09:00
karancode
3b8e685082 add aws-eks-cni service 2019-10-22 02:24:07 +09:00
karancode
3640448229 fix name 2019-10-22 02:03:58 +09:00
karancode
19624d9def add aws_eks_cni serviceMonitor 2019-10-22 01:45:55 +09:00
albertdb
24aebaf985 Bump prometheus-adapter and Grafana versions (#270)
* Bumping prometheus-adapter and Grafana versions

* Bumping prometheus-adapter version in libsonnet file

* Regenerating lockfile

* Bumping Grafana version in libsonnet file

* Updating kustomization.yaml
2019-10-21 12:42:19 +02:00
Frederic Branczyk
3d5fc3e38d Merge pull request #252 from benjaminhuo/ksm-autoscaler
Add cluster-proportional-vertical-autoscaler as a seperate deployment for ksm
2019-10-16 15:38:29 +02:00
Frederic Branczyk
86cd3bc703 Merge pull request #262 from liuxu623/master
Add k8s-resources-node dashboard
2019-10-16 13:12:48 +02:00
liuxu
ace8b4d6d3 add k8s-resources-node dashboard 2019-10-16 16:49:33 +08:00
Benjamin
a16d5b69ab Add seperate autoscaler for ksm
Signed-off-by: Benjamin <benjamin@yunify.com>
2019-10-16 16:02:44 +08:00
Frederic Branczyk
4e846a146f Merge pull request #265 from kpucynski/grafana-dashboards-update
Grafana dashboards update
2019-10-14 16:24:35 +02:00
Karol Pucynski
e7c3ca314d Grafana dashboards update 2019-10-14 16:00:06 +02:00
Frederic Branczyk
1ebce4955a Merge pull request #264 from paulfantom/ci_fixes
Do not download tooling when it is already available
2019-10-14 14:13:01 +02:00
paulfantom
7a2befe7fa *: Assume jb and embedmd are already available 2019-10-14 13:49:33 +02:00
paulfantom
bbd991a3b2 *: add names to CI jobs 2019-10-14 12:32:37 +02:00
Frederic Branczyk
8405360a46 Merge pull request #261 from s-urbaniak/prometheus-anti-affinity
jsonnet/kube-prometheus/kube-promehtues-anti-affinity: fix construction
2019-10-08 17:52:06 +02:00
Sergiusz Urbaniak
bd8d597f8d jsonnet/kube-prometheus/kube-promehtues-anti-affinity: fix construction
Currently, anti affinity sources configuration from the global configuration,
not respecting local prometheus settings.

This fixes it.
2019-10-08 17:30:12 +02:00
Sergiusz Urbaniak
bcadf3ae05 Merge pull request #260 from s-urbaniak/prometheus-constructor
kube-prometheus/prometheus: Add local configuration
2019-10-08 17:16:01 +02:00
Sergiusz Urbaniak
40a5dc2b71 kube-prometheus/prometheus: Add local configuration
This adds constructor'esque configuration options for prometheus assets.
They still reference global _config default values for backwards compatibility
but allow overriding values for new instances of prometheus assets.
2019-10-08 11:37:26 +02:00
Paweł Krupa
52685175f2 Merge pull request #259 from gitfool/fix-nodequery
Fix nodeQuery rate for window
2019-10-08 10:06:58 +02:00
Sean Fausett
6ec81661fa manifests: regenerate 2019-10-08 08:51:14 +13:00
Sean Fausett
5155e57141 jsonnet/kube-prometheus/prometheus-adapter: fix nodeQuery rate for window 2019-10-08 08:48:55 +13:00
Frederic Branczyk
73395e6d78 Merge pull request #257 from paulfantom/fix_window
fix incorrect window in containerQuery
2019-10-07 10:47:54 +02:00
paulfantom
a9f7b03f27 manifests: regenerate 2019-10-07 10:34:24 +02:00
paulfantom
d2dd84bc0f jsonnet/kube-prometheus/prometheus-adapter: fix incorrect window in containerQuery 2019-10-06 18:57:14 +02:00
Paweł Krupa
21ace9b55e increase time period for rate over cadvisor metrics (#254)
increase time period for rate over cadvisor metrics
2019-10-02 17:18:30 +02:00
paulfantom
dfb626837f manifests: regenerate 2019-10-02 16:38:20 +02:00
paulfantom
c72ae7b63c increase time period for rate over cadvisor metrics 2019-10-02 16:31:55 +02:00
Sergiusz Urbaniak
f458e85e5d Merge pull request #248 from s-urbaniak/fix_116
jsonnet/prometheus-adapter: Fix query for k8s 1.16
2019-09-27 11:20:29 +02:00
Sergiusz Urbaniak
ee7d0d367f jsonnet/prometheus-adapter: Fix query for k8s 1.16 2019-09-27 11:02:32 +02:00
Lili Cosic
139df678f0 Merge pull request #247 from LiliC/bump-mixins
Bump dependencies
2019-09-26 15:14:25 +02:00
Lili Cosic
0f5400e5fe manifests: Regenerate files 2019-09-26 14:53:40 +02:00
Lili Cosic
3924379e84 jsonnetfile.lock.json: Bump all deps 2019-09-26 14:46:11 +02:00
Lili Cosic
be47e4a7c2 Merge pull request #245 from LiliC/add-ksm-job
jsonnet/kube-prometheus/kube-state-metrics: Do not drop job label
2019-09-26 14:40:40 +02:00
Lili Cosic
5839b8c1cb manifests/kube-state-metrics-serviceMonitor.yaml: Regenerate 2019-09-26 14:22:13 +02:00
Lili Cosic
48eefc51d9 jsonnet/kube-prometheus/kube-state-metrics: Do not drop job label
Currently a lot of alerts relly on the job='kube-state-metrics' label.
2019-09-26 14:14:08 +02:00
Matthias Loibl
9486ec2bc1 Merge pull request #244 from brancz/fix-additional-rules
examples: Fix additional rules snippets
2019-09-26 14:07:52 +02:00
Frederic Branczyk
2e996fce91 examples: Fix additional rules snippets 2019-09-26 11:27:31 +02:00
Frederic Branczyk
e304d2a60f Merge pull request #240 from simonpasquier/support-jsonnetfmt
Update Makefile to support jsonnet >= 0.13
2019-09-25 10:20:10 +02:00
Frederic Branczyk
4da422095b Merge pull request #239 from simonpasquier/improve-target-down-message
jsonnet/kube-prometheus/alerts: improve TargetDown message
2019-09-25 09:56:53 +02:00
Simon Pasquier
e0c232df8b Update Makefile to support jsonnet >= 0.13 2019-09-25 09:16:23 +02:00
Frederic Branczyk
2dcc928425 Merge pull request #238 from LiliC/fix-replacment
manifests/node-exporter-serviceMonitor.yaml: Fix typo
2019-09-25 09:12:27 +02:00
Simon Pasquier
b9504efef7 jsonnet/kube-prometheus/alerts: improve TargetDown message
Signed-off-by: Simon Pasquier <spasquie@redhat.com>
2019-09-25 09:11:03 +02:00
Lili Cosic
8d7db8862d manifests/node-exporter-serviceMonitor.yaml: Regenerate manifest 2019-09-25 08:41:14 +02:00
Lili Cosic
b6c6d39fb9 sonnet/kube-prometheus/node-exporter: Fix typo 2019-09-24 17:02:03 +02:00
Frederic Branczyk
dc20838d65 Merge pull request #235 from guusvw/remove-addon-resizer-from-ksm
removing addonResizer from ksm
2019-09-24 14:59:32 +02:00
Guus van Weelden
87a4567faf update generated manifests
Signed-off-by: Guus van Weelden <guus.vanweelden@moia.io>
2019-09-24 14:36:22 +02:00
Guus van Weelden
b6becc0936 removing addonResizer from ksm
the addonresizer could lead to problems with the kube-state-metrics
it is also removed from the ksm maintained kubernetes manifests
https://github.com/kubernetes/kube-state-metrics/pull/750

Signed-off-by: Guus van Weelden <guus.vanweelden@moia.io>
2019-09-24 14:19:59 +02:00
Matthias Loibl
0fc41a075a Merge pull request #233 from LiliC/remove-labels
jsonnet/kube-prometheus/kube-state-metrics: Drop ksm own labels
2019-09-24 09:56:36 +02:00
Lili Cosic
cb227144e2 manifests/kube-state-metrics-serviceMonitor.yaml: Regenerate manifest 2019-09-23 18:37:29 +02:00
Lili Cosic
974d3a70be jsonnet/kube-prometheus/kube-state-metrics: Drop ksm own labels
These labels are confusing and misleading, as they describe
kube-state-metrics itself not the target itself.
2019-09-23 18:28:14 +02:00
Frederic Branczyk
0739c11ebb Merge pull request #230 from dparkar/dev/dparkar/aks/issue213
adding note for k8s before v1.14.0
2019-09-20 08:30:03 +02:00
Dhawal Parkar
c0b4e45bb4 adding note for k8s before v1.14.0 2019-09-19 15:18:50 -07:00
Matthias Loibl
3f3236d050 Merge pull request #221 from LiliC/bump-node-mixin
Bump node-mixin
2019-09-13 12:00:35 +02:00
Lili Cosic
6a11f9c3bc manifests: Regenerate files 2019-09-13 11:44:20 +02:00
Lili Cosic
7e33e90fb1 jsonnetfile.lock.json: Bump node-mixin 2019-09-13 11:30:55 +02:00
Lili Cosic
6458803cee Merge pull request #220 from LiliC/bump-prom-op-33
Bump prometheus-operator to 0.33 release
2019-09-12 18:15:07 +02:00
Lili Cosic
d04ddf5a98 manifests: Regenerate files 2019-09-12 17:54:45 +02:00
Lili Cosic
26750eadf5 Bump prometheus-operator to 0.33 release 2019-09-12 17:49:29 +02:00
Frederic Branczyk
0038e25165 Merge pull request #216 from aveyrenc/kubespray-coredns
Kubespray deploys CoreDNS with label k8s-app=kube-dns
2019-09-09 14:28:17 +02:00
Alexandre Veyrenc
81038c7f3c Kubespray deploys CoreDNS with label k8s-app=kube-dns 2019-09-09 11:39:00 +02:00
Lili Cosic
fe11d190d6 Merge pull request #209 from yeya24/fix/quote
fix: add the missing quote
2019-09-04 18:32:16 +02:00
yeya24
53ebff6465 add the missing quote
Signed-off-by: yeya24 <yb532204897@gmail.com>
2019-09-04 10:33:02 -04:00
Matthias Loibl
6710ef8739 Merge pull request #208 from yeya24/bump-thanos
bump thanos image version
2019-09-03 21:02:30 +02:00
yeya24
223a3be924 bump thanos image version
Signed-off-by: yeya24 <yb532204897@gmail.com>
2019-09-03 10:24:48 -04:00
Frederic Branczyk
506a591d0e Merge pull request #204 from LiliC/bump-prom-operaotr
Bump prometheus-operator to release 0.32
2019-08-30 18:38:26 +02:00
Lili Cosic
c34bbb21e5 manifests: Regenerate files 2019-08-30 18:17:27 +02:00
Lili Cosic
b4c941cfd0 jsonnetfile.lock.json: Regenerate lock file 2019-08-30 18:12:07 +02:00
Lili Cosic
907d6071e6 jsonnet/kube-prometheus/jsonnetfile.json: Bump prometheus-operator
release
2019-08-30 18:09:07 +02:00
Sergiusz Urbaniak
f6774fb7e6 Merge pull request #202 from s-urbaniak/fix-instance
jsonnet/prometheus-adapter: fix resource override for raw node query
2019-08-29 18:21:43 +02:00
Sergiusz Urbaniak
e2ba988f8f manifests: regenerate 2019-08-29 17:38:07 +02:00
Sergiusz Urbaniak
5bea571be1 jsonnet/prometheus-adapter: fix resource override for raw node query 2019-08-29 17:33:26 +02:00
Frederic Branczyk
5310aef112 Merge pull request #200 from lanmarti/patch-1
#199 Clarify externalUrl difference for Grafana
2019-08-29 16:48:02 +02:00
Matthias Loibl
ea6c1195a6 Merge pull request #201 from s-urbaniak/fix-node-query
jsonnet/kube-prometheus-prometheus-adapter: fix node query
2019-08-29 13:47:53 +02:00
Sergiusz Urbaniak
ea5790f965 manifests: regenerate 2019-08-29 13:26:47 +02:00
Sergiusz Urbaniak
f383664170 jsonnet/kube-prometheus-prometheus-adapter: fix node query
Currently, we use the node:node_memory_bytes_total:sum and node:node_memory_bytes_available:sum
recording rules for the memory node query.
These recording rules have been removed in https://github.com/coreos/kube-prometheus/pull/191.

This fixes it by using raw queries.
2019-08-29 13:19:33 +02:00
Laurens
3eda46c36c #199 Clarify externalUrl difference for Grafana 2019-08-29 10:14:25 +02:00
Frederic Branczyk
c5f22b61d4 Merge pull request #198 from kylebyerly-hp/add-proxy
Add proxy for those that are behind one
2019-08-29 08:17:55 +02:00
Frederic Branczyk
fa6d6833cd Merge pull request #189 from kylebyerly-hp/patch-2
Add labels for namespace and service to TargetDown
2019-08-29 08:12:06 +02:00
Kyle Byerly
3d40f41381 Add proxy for those that are behind one 2019-08-28 15:35:22 -06:00
Kyle Byerly
fda5811975 Add labels for namespace and service to TargetDown 2019-08-28 15:29:26 -06:00
Frederic Branczyk
e9341231bc Merge pull request #195 from prune998/patch-1
use real alertmanager name
2019-08-28 10:06:11 +02:00
Prune Sebastien THOMAS
2511a6c529 use real alertmanager name
`alertmanager-main` is hardcoded where it should derive from the real alertmanager name as defined by the user
2019-08-27 11:53:12 -04:00
Matthias Loibl
9c82f86770 Merge pull request #194 from metalmatze/swap-typo
Update kubernetes-mixin to fix typo in resource dashboard
2019-08-27 16:06:34 +02:00
Matthias Loibl
31513f5b69 Update kubernetes-mixin to fix typo in resource dashboard 2019-08-27 14:53:55 +02:00
Sergiusz Urbaniak
4ee8f93c49 Merge pull request #193 from s-urbaniak/bump-kubernetes-mixin
jsonnet: bump kubernetes mixin
2019-08-27 14:28:25 +02:00
Sergiusz Urbaniak
621a9054d6 jsonnet: bump kubernetes mixin 2019-08-27 13:30:50 +02:00
Paweł Krupa
a8927ac86d Merge pull request #191 from paulfantom/node-mixins
Add node_exporter mixin
2019-08-26 13:46:01 +02:00
paulfantom
ec8920043d manifests: regenerate 2019-08-26 10:56:47 +02:00
paulfantom
2396fa9483 jsonnet/kube-prometheus: add node-mixin 2019-08-26 10:56:30 +02:00
paulfantom
bcbf54805a jsonnet/kube-prometheus/alerts: remove alerts provided by node_exporter mixin 2019-08-26 10:56:05 +02:00
Matthias Loibl
da959c6436 Merge pull request #185 from bgagnon/fix-72
Fix wrong config used for node-exporter container limits
2019-08-19 11:11:22 +02:00
Frederic Branczyk
c4b89c9414 Merge pull request #186 from LiliC/fix-many2many
*: Fix AlertmanagerConfigInconsistent many-to-many matching errors
2019-08-16 16:25:27 +02:00
Frederic Branczyk
a2875bc637 Merge pull request #187 from RiRa12621/patch-1
ignore swp files
2019-08-16 16:18:51 +02:00
Lili Cosic
c6e6f2e74f manifests/prometheus-rules.yaml: Regenerate files 2019-08-16 16:13:43 +02:00
Lili Cosic
909f51b3bd jsonnet/kube-prometheus: Prevent many-to-many matching
If there is more than one prometheus-operator pod, which happens briefly
when we delete the prometheus-operator pod, we can see the errors of
many-to-many matching, this whitelists the labels matching, and excluded
the pod.
2019-08-16 15:55:17 +02:00
Frederic Branczyk
2c7191d11c Merge pull request #184 from metalmatze/local-dependency
Use local dependency for kube-prometheus jsonnet
2019-08-16 15:47:59 +02:00
Matthias Loibl
1635c2cd23 Regenerate manifests 2019-08-16 15:18:46 +02:00
Rick Rackow
f22cb8f797 ignore swp files
there's never a need to commit swap files, so just ignore them
2019-08-16 15:07:50 +02:00
Benoit Gagnon
04309dcdb6 fix wrong config used for node-exporter container limits 2019-08-12 13:23:10 -04:00
Matthias Loibl
41a6549171 Use local dependency for kube-prometheus jsonnet 2019-08-12 14:14:01 +02:00
Frederic Branczyk
636f2b6e81 Merge pull request #132 from bgagnon/fix-72
Add mixin to strip cpu/memory limits (issue #72)
2019-08-12 10:29:31 +02:00
Benoit Gagnon
da644166a3 use proper operator so requests are not stripped 2019-08-10 14:35:06 -04:00
Benoit Gagnon
16c22b7250 move resources requests/limits to _config and simplify mixin 2019-08-10 14:31:28 -04:00
Frederic Branczyk
517aded231 Merge pull request #183 from brancz/re-gen
manifests: Re-generate
2019-08-09 17:10:36 +02:00
Frederic Branczyk
1bba75da80 manifests: Re-generate 2019-08-09 16:56:50 +02:00
Frederic Branczyk
34cdedde43 Merge pull request #182 from guusvw/bump-kube-state-metrics-to-1_7_2
set kubeStateMetrics version to 1.7.2
2019-08-09 16:19:44 +02:00
Guus van Weelden
c793073789 set kubeStateMetrics version to 1.7.2
Signed-off-by: Guus van Weelden <guus.vanweelden@moia.io>
2019-08-09 16:09:03 +02:00
Frederic Branczyk
6d07ef87cd Merge pull request #180 from lord63-forks/fix-io-time-yaxe
Fix io time yaxe
2019-08-09 09:03:33 +02:00
lord63
3c64cc935c Fix io time yaxe 2019-08-09 14:45:32 +08:00
Frederic Branczyk
4adb70b017 Merge pull request #177 from minhdanh/fix-kubemismatchversion-incorrect
Fix incorrect KubeVersionMismatch alert when `{job=coredns}`
2019-08-07 16:01:20 +02:00
Minh Danh
b5ebde0586 Update with new jsonnetfile.lock.json 2019-08-07 17:17:43 +07:00
Minh Danh
7b2138be7d Fix https://github.com/kubernetes-monitoring/kubernetes-mixin/pull/231 2019-08-07 16:11:03 +07:00
Frederic Branczyk
e7d1ada775 Merge pull request #141 from dominikhahn/master
Increase ntp clockskewdetected accuracy to 5ms
2019-08-05 10:31:31 +02:00
Frederic Branczyk
14b893c212 Merge pull request #114 from rafaeltuelho/patch-1
adding a note about ServiceMonitor definition
2019-08-05 09:42:24 +02:00
Sergiusz Urbaniak
31cf273922 Merge pull request #151 from metalmatze/kube-state-metrics-1.7.0-rc.1
Bump kube-state-metrics to v1.7.0
2019-08-01 09:34:36 +02:00
Lili Cosic
8865f275dd Regenerate jsonnetfile.lock.json 2019-07-31 17:01:41 +02:00
Lili Cosic
eb132e923e Regenerate manifests 2019-07-31 16:53:22 +02:00
Lili Cosic
8b570f5fbb jsonnet/kube-state-metrics: Fix storageclass role
The resource is called storageclasses not storageclass.
2019-07-31 16:53:06 +02:00
Lili Cosic
ca6ca5f65d Regenerate manifests 2019-07-31 16:53:06 +02:00
Lili Cosic
62caa7adc9 jsonnet/kube-prometheus/kube-state-metrics: Bump 1.7.1 2019-07-31 16:52:03 +02:00
Matthias Loibl
3c652101f1 Generate manifests with kube-state-metrics 1.7.0 2019-07-31 16:52:03 +02:00
Matthias Loibl
6faecd63b7 Bump kube-state-metrics to 1.7.0-rc.1 and add storageclass rules 2019-07-31 16:51:27 +02:00
Matthias Loibl
7d6183a9ec jsonnet/kube-prometheus/kube-state-metrics: Move rules into array 2019-07-31 16:51:27 +02:00
Rafael T. C. Soares
a821a80a41 improving servicemonitor definition section 2019-07-30 23:28:52 -03:00
Rafael T. C. Soares (A.K.A Tuelho)
ad2b941e77 adding a note about ServiceMonitor definition
In the **Adding additional namespaces to monitor** section I appended a note showing the need for ServiceMonitor when adding additional namespaces... 

see: https://github.com/coreos/prometheus-operator/issues/2557#issuecomment-498996568
2019-07-30 23:28:52 -03:00
Frederic Branczyk
2f1083be7e Merge pull request #168 from karancode/update_minikube_quickstart_documentation
update README, change minikube k8s-version to 1.14.4
2019-07-24 22:38:40 -07:00
karancode
4761cac933 update README, change minikube k8s-version to 1.14.4 2019-07-25 13:20:08 +09:00
Frederic Branczyk
5157534678 Merge pull request #162 from LiliC/relabel-nodename
Include node name in NodeDiskRunningFull alert
2019-07-24 12:11:25 -07:00
Lili Cosic
05f2a18974 jsonnetfile.lock.json: Run jb update 2019-07-24 18:13:16 +02:00
Lili Cosic
aa7df507e9 Regenerate files 2019-07-24 18:11:40 +02:00
Lili Cosic
ccb138374e jsonnet: Include node name in NodeDiskRunningFull
This makes it easier to indetify which node alert is firing for.
Currently only device namespace and pod name were included in
the alert.
2019-07-24 17:52:39 +02:00
Lili Cosic
44dab89344 jsonet: Relabel instance to node name
This allows us to display the node name in alerts, instead of the IP.
2019-07-24 17:34:54 +02:00
Frederic Branczyk
16a49f00d6 Merge pull request #157 from rphillips/fixes/change_to_ga_label
change to use GA kubelet OS label
2019-07-18 14:34:44 -07:00
Ryan Phillips
4d315c44e6 bump kind to 0.4.0 2019-07-18 16:23:16 -05:00
Ryan Phillips
583fef10f2 bump lock and regenerate 2019-07-18 15:20:24 -05:00
Ryan Phillips
176a187117 change to use GA kubelet OS label 2019-07-18 15:19:07 -05:00
Frederic Branczyk
620e0e4d1d Merge pull request #159 from metalmatze/empty-podmonitor-selector
Add empty podMonitorSelector to Prometheus resource
2019-07-18 11:44:36 -07:00
Matthias Loibl
8b37004c5b Generate manifest with empty podMonitorSelector 2019-07-18 16:40:54 +02:00
Matthias Loibl
a535968c33 Add empty podMonitorSelector to Prometheus resource 2019-07-18 16:35:01 +02:00
Frederic Branczyk
7212340368 Merge pull request #150 from brancz/prom-mixin
Use upstream Prometheus monitoring mixin
2019-07-11 17:21:45 +02:00
Frederic Branczyk
3692390075 manifests: Re-generate 2019-07-11 16:16:39 +02:00
Frederic Branczyk
c8c850ef2b jsonnet: Use upstream prometheus-mixin 2019-07-11 15:35:48 +02:00
Frederic Branczyk
f0afafdb08 Merge pull request #149 from metalmatze/kube-thanos
Remove Thanos components except sidecar
2019-07-10 17:18:18 +02:00
Matthias Loibl
f1cd88fdbb Remove Thanos components from kube-prometheus except sidecar 2019-07-10 16:39:41 +02:00
Frederic Branczyk
50d90fba66 Merge pull request #148 from brancz/update-ksm-role
manifests: Re-generate
2019-07-10 15:05:48 +02:00
Frederic Branczyk
a7a1101269 manifests: Re-generate 2019-07-10 14:45:28 +02:00
Frederic Branczyk
daf2e76bc6 Merge pull request #144 from zachaller/patch-1
Update kube-state-metrics.libsonnet
2019-07-10 09:45:31 +02:00
Frederic Branczyk
5a34b9de81 Merge pull request #146 from rajatvig/master
Bump to newer release for Prometheus and Alertmanager
2019-07-10 09:43:31 +02:00
Rajat Vig
b4a8b7f3c5 Update the lock file and the manifests 2019-07-09 23:09:11 -07:00
Rajat Vig
0bc30832d0 Bump to newer release for Prometheus and Alertmanager 2019-07-09 23:02:26 -07:00
Zach Aller
48651090a6 Update kube-state-metrics.libsonnet
With the bump to kube-state-metrics v1.6 they added ingress and certificates but this updates the rbac rules so that those work with the new version.
2019-07-09 16:28:17 -05:00
Hahn, Dominik
552b341bf6 Increase ntp clockskewdetected accuracy to 5ms 2019-07-05 11:40:07 +02:00
Frederic Branczyk
a5ba4c5116 Merge pull request #140 from benjaminhuo/thanos
Add thanos v0.5.0 support & remove deprecated thanos gossip
2019-07-05 09:34:57 +02:00
Benjamin
9a681fad7d Add thanos v0.5.0 support & remove deprecated thanos gossip
Signed-off-by: Benjamin <benjamin@yunify.com>
2019-07-05 14:26:14 +08:00
Frederic Branczyk
98b87e2890 Merge pull request #137 from LiliC/bump-ksm
Bump kube-state-metrics version to 1.6.0
2019-07-01 17:16:11 +02:00
Lili Cosic
aa18a46155 *: Regenerate manifests 2019-07-01 15:21:09 +02:00
Lili Cosic
6c34ff2d72 jsonnet: Bump kube-state-metrics version 2019-07-01 15:16:31 +02:00
Frederic Branczyk
026c09e6ad Merge pull request #133 from metalmatze/mixin-master
Use kubernetes-mixin's master in kube-prometheus master
2019-06-25 14:22:31 +02:00
Matthias Loibl
d0efc60a2e Update deps including kubernetes-mixin to latest master 2019-06-25 11:36:48 +02:00
Matthias Loibl
90b8632fb3 Set kubernetes-mixin on master branch to master 2019-06-25 01:21:24 +02:00
Benoit Gagnon
ffc8832ef9 use std.setMember instead of std.count 2019-06-24 11:43:22 -04:00
Benoit Gagnon
8e7d55d795 Add mixin to strip cpu/memory limits (issue #72) 2019-06-24 11:39:47 -04:00
Lucas Servén Marín
291f7c64fa Merge pull request #130 from brancz/bump-po
Bump prometheus operator to v0.31.0
2019-06-20 13:35:48 +02:00
Frederic Branczyk
b28a65534f *: Re-generate 2019-06-20 13:04:46 +02:00
Frederic Branczyk
bdf84bf186 jsonnet: Bump prometheus operator to release-0.31 2019-06-20 11:27:02 +02:00
Frederic Branczyk
c478aa3ade Merge pull request #128 from brancz/ne-fix
Fix missing semicolon
2019-06-18 18:00:52 +02:00
Frederic Branczyk
e4ff0f8746 manifests: Re-generate 2019-06-18 17:49:07 +02:00
Frederic Branczyk
3f6d0c6dd3 node-exporter: Fix missing semicolon 2019-06-18 17:46:12 +02:00
Frederic Branczyk
e4a8f22622 Merge pull request #127 from paulfantom/cp
cherry-picked #126 into release-0.1 branch
2019-06-18 14:59:02 +02:00
paulfantom
4e586fb3c6 make node_exporter pods tolerate every taint 2019-06-18 14:01:33 +02:00
Frederic Branczyk
88338bb762 Merge pull request #126 from paulfantom/node_exporter
make node_exporter pods tolerate every taint
2019-06-18 08:33:55 +02:00
paulfantom
272ff23cb6 make node_exporter pods tolerate every taint 2019-06-17 21:28:35 +02:00
Frederic Branczyk
33523d0450 Merge pull request #124 from zot24/fix/typo
fix: minor typo
2019-06-17 08:45:48 +02:00
Israel Sotomayor
6e65c508f8 fix: typo 2019-06-15 23:59:30 +00:00
Matthias Loibl
c526434dd2 Merge pull request #122 from rajatvig/master
Fix Thanos deployment and service for ksonnet.beta.4
2019-06-14 10:13:34 +02:00
Rajat Vig
ef8bb5aac0 Fix Thanos deployment and service for ksonnet.beta.4 2019-06-13 14:46:36 -07:00
Lucas Servén Marín
9c1fda7fbe Merge pull request #121 from brancz/fix-jsonnetfile
Fix jsonnetfile.lock.json
2019-06-13 15:27:47 +02:00
Frederic Branczyk
e4c31bfd92 Fix jsonnetfile.lock.json 2019-06-13 15:13:36 +02:00
Rajat Vig
fa93506941 Update Prometheus, Node Exporter & Grafana (#119)
* Update Prometheus, Node Exporter & Grafana

* Update Prometheus to 2.10.0

* Add generated manifests

* Update lock reference to git sha

* Retrigger Travis
2019-06-13 08:22:01 +02:00
Frederic Branczyk
a86fcce12f Merge pull request #109 from MerelyAPseudonym/master
Incorporate “stale `manifests/`” fix from `release-0.1`
2019-06-11 08:18:05 +02:00
Josh Tilles
f4d7270c53 Update kube-prometheus self-dependency 2019-06-10 14:48:06 -04:00
Josh Tilles
df8a5b51b4 Merge branch 'release-0.1' 2019-06-10 14:47:21 -04:00
Lucas Servén Marín
12da6336af Merge pull request #118 from metalmatze/common-example.jsonnet
Add commented imports for most common patches to example.jsonnet
2019-06-07 15:48:50 +02:00
Matthias Loibl
b1c674100b Add commented imports for most common patches to example.jsonnet 2019-06-07 15:31:16 +02:00
Matthias Loibl
133d9a9540 Merge pull request #117 from paulfantom/jsonnet_0.13_regen
manifests: regenerate manifests with jsonnet 0.13
2019-06-07 11:25:50 +02:00
paulfantom
fa08d12e23 manifests: regenerate manifests with jsonnet 0.13 2019-06-07 11:12:32 +02:00
Frederic Branczyk
50d3e774a5 Merge pull request #87 from metalmatze/ksonnet.beta.4
Use ksonnet.beta.4 with updated Kubernetes 1.14
2019-06-04 15:56:20 +02:00
Frederic Branczyk
e7d077937c Merge pull request #102 from benjaminhuo/master
add tolerations
2019-06-04 15:41:57 +02:00
Matthias Loibl
f7f9bdea91 Generate correct manifests/ 2019-06-04 11:47:05 +02:00
Benjamin
72a9f8727f Add prometheus toleration example
Signed-off-by: Benjamin <benjamin@yunify.com>
2019-05-31 13:15:17 +08:00
Matthias Loibl
9d066577a5 Generate manifests 2019-05-29 18:13:34 +02:00
Matthias Loibl
eea692c190 Update kube-prometheus self-dependency 2019-05-29 17:59:43 +02:00
Matthias Loibl
107028fff3 Remove extra ksonnet4 dependency 2019-05-29 17:58:29 +02:00
Matthias Loibl
2e4b89b091 Generate manifests 2019-05-29 17:58:16 +02:00
Matthias Loibl
bedeadb7f5 Add ksonnet4 with ksonnet.beta.4 and update all imports 2019-05-29 17:57:52 +02:00
Frederic Branczyk
ee8f8f1872 Merge pull request #104 from paulfantom/fix_docs
*: Fix documentation on how to run containerized jsonnet
2019-05-21 19:49:08 +02:00
paulfantom
de2edfc5de *: Fix documentation on how to run containerized jsonnet 2019-05-21 16:19:04 +02:00
Frederic Branczyk
a249b1c434 Merge pull request #103 from zgfh/change_mem_limit
update mem limit of kube-rbac-proxy container in pod of node-exporter to 60M
2019-05-18 05:50:27 +02:00
zzg
f21a134fa4 jsonnetfile.lock.json: Update kube-prometheus and regenerate 2019-05-18 04:51:41 +08:00
zzg
6562f95c14 update mem limit of kube-rbac-proxy container in pod of node-exporter to 60M 2019-05-18 04:13:35 +08:00
95 changed files with 34412 additions and 6820 deletions

1
.gitignore vendored
View File

@@ -2,3 +2,4 @@ tmp/
minikube-manifests/
vendor/
./auth
.swp

View File

@@ -16,6 +16,9 @@ services:
jobs:
include:
- script: make --always-make generate-in-docker && git diff --exit-code
- script: make --always-make test-in-docker
- script: GO111MODULE=on ./tests/e2e/travis-e2e.sh
- name: Check generated files
script: make --always-make generate-in-docker && git diff --exit-code
- name: Run tests
script: make --always-make test-in-docker
- name: Run e2e tests
script: GO111MODULE=on ./tests/e2e/travis-e2e.sh

View File

@@ -1,31 +1,44 @@
JSONNET_FMT := jsonnet fmt -n 2 --max-blank-lines 2 --string-style s --comment-style s
JSONNET_ARGS := -n 2 --max-blank-lines 2 --string-style s --comment-style s
ifneq (,$(shell which jsonnetfmt))
JSONNET_FMT_CMD := jsonnetfmt
else
JSONNET_FMT_CMD := jsonnet
JSONNET_FMT_ARGS := fmt $(JSONNET_ARGS)
endif
JSONNET_FMT := $(JSONNET_FMT_CMD) $(JSONNET_FMT_ARGS)
JB_BINARY:=$(GOPATH)/bin/jb
EMBEDMD_BINARY:=$(GOPATH)/bin/embedmd
JB_BINARY := jb
EMBEDMD_BINARY := embedmd
CONTAINER_CMD:=docker run --rm \
-e http_proxy -e https_proxy -e no_proxy \
-u="$(shell id -u):$(shell id -g)" \
-v "$(shell go env GOCACHE):/.cache/go-build" \
-v "$(PWD):/go/src/github.com/coreos/kube-prometheus:Z" \
-w "/go/src/github.com/coreos/kube-prometheus" \
quay.io/coreos/jsonnet-ci
quay.io/coreos/jsonnet-ci:release-0.36
all: generate fmt test
.PHONY: generate-in-docker
generate-in-docker:
@echo ">> Compiling assets and generating Kubernetes manifests"
$(CONTAINER_CMD) $(MAKE) $(MFLAGS) generate
$(CONTAINER_CMD) make $(MFLAGS) generate
.PHONY: clean
clean:
# Remove all files and directories ignored by git.
git clean -Xfd .
generate: manifests **.md
**.md: $(EMBEDMD_BINARY) $(shell find examples) build.sh example.jsonnet
**.md: $(shell find examples) build.sh example.jsonnet
$(EMBEDMD_BINARY) -w `find . -name "*.md" | grep -v vendor`
manifests: vendor example.jsonnet build.sh
manifests: examples/kustomize.jsonnet vendor build.sh
rm -rf manifests
./build.sh ./examples/kustomize.jsonnet
./build.sh $<
vendor: $(JB_BINARY) jsonnetfile.json jsonnetfile.lock.json
vendor: jsonnetfile.json jsonnetfile.lock.json
rm -rf vendor
$(JB_BINARY) install
@@ -33,7 +46,7 @@ fmt:
find . -name 'vendor' -prune -o -name '*.libsonnet' -o -name '*.jsonnet' -print | \
xargs -n 1 -- $(JSONNET_FMT) -i
test: $(JB_BINARY)
test:
$(JB_BINARY) install
./test.sh
@@ -42,12 +55,6 @@ test-e2e:
test-in-docker:
@echo ">> Compiling assets and generating Kubernetes manifests"
$(CONTAINER_CMD) $(MAKE) $(MFLAGS) test
$(JB_BINARY):
go get -u github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb
$(EMBEDMD_BINARY):
go get github.com/campoy/embedmd
$(CONTAINER_CMD) make $(MFLAGS) test
.PHONY: generate generate-in-docker test test-in-docker fmt

192
README.md
View File

@@ -20,30 +20,42 @@ This stack is meant for cluster monitoring, so it is pre-configured to collect m
## Table of contents
* [Prerequisites](#prerequisites)
* [minikube](#minikube)
* [Quickstart](#quickstart)
* [Customizing Kube-Prometheus](#customizing-kube-prometheus)
* [Installing](#installing)
* [Compiling](#compiling)
* [Containerized Installing and Compiling](#containerized-installing-and-compiling)
* [Configuration](#configuration)
* [Customization Examples](#customization-examples)
* [Cluster Creation Tools](#cluster-creation-tools)
* [Internal Registries](#internal-registries)
* [NodePorts](#nodeports)
* [Prometheus Object Name](#prometheus-object-name)
* [node-exporter DaemonSet namespace](#node-exporter-daemonset-namespace)
* [Alertmanager configuration](#alertmanager-configuration)
* [Static etcd configuration](#static-etcd-configuration)
* [Pod Anti-Affinity](#pod-anti-affinity)
* [Customizing Prometheus alerting/recording rules and Grafana dashboards](#customizing-prometheus-alertingrecording-rules-and-grafana-dashboards)
* [Exposing Prometheus/Alermanager/Grafana via Ingress](#exposing-prometheusalermanagergrafana-via-ingress)
* [Minikube Example](#minikube-example)
* [Troubleshooting](#troubleshooting)
* [Error retrieving kubelet metrics](#error-retrieving-kubelet-metrics)
* [kube-state-metrics resource usage](#kube-state-metrics-resource-usage)
* [Contributing](#contributing)
- [kube-prometheus](#kube-prometheus)
- [Table of contents](#table-of-contents)
- [Prerequisites](#prerequisites)
- [minikube](#minikube)
- [Quickstart](#quickstart)
- [Access the dashboards](#access-the-dashboards)
- [Customizing Kube-Prometheus](#customizing-kube-prometheus)
- [Installing](#installing)
- [Compiling](#compiling)
- [Apply the kube-prometheus stack](#apply-the-kube-prometheus-stack)
- [Containerized Installing and Compiling](#containerized-installing-and-compiling)
- [Update from upstream project](#update-from-upstream-project)
- [Update jb](#update-jb)
- [Update kube-prometheus](#update-kube-prometheus)
- [Compile the manifests and apply](#compile-the-manifests-and-apply)
- [Configuration](#configuration)
- [Customization Examples](#customization-examples)
- [Cluster Creation Tools](#cluster-creation-tools)
- [Internal Registry](#internal-registry)
- [NodePorts](#nodeports)
- [Prometheus Object Name](#prometheus-object-name)
- [node-exporter DaemonSet namespace](#node-exporter-daemonset-namespace)
- [Alertmanager configuration](#alertmanager-configuration)
- [Adding additional namespaces to monitor](#adding-additional-namespaces-to-monitor)
- [Defining the ServiceMonitor for each addional Namespace](#defining-the-servicemonitor-for-each-addional-namespace)
- [Static etcd configuration](#static-etcd-configuration)
- [Pod Anti-Affinity](#pod-anti-affinity)
- [Customizing Prometheus alerting/recording rules and Grafana dashboards](#customizing-prometheus-alertingrecording-rules-and-grafana-dashboards)
- [Exposing Prometheus/Alermanager/Grafana via Ingress](#exposing-prometheusalermanagergrafana-via-ingress)
- [Minikube Example](#minikube-example)
- [Troubleshooting](#troubleshooting)
- [Error retrieving kubelet metrics](#error-retrieving-kubelet-metrics)
- [Authentication problem](#authentication-problem)
- [Authorization problem](#authorization-problem)
- [kube-state-metrics resource usage](#kube-state-metrics-resource-usage)
- [Contributing](#contributing)
## Prerequisites
@@ -59,13 +71,13 @@ This adapter is an Extension API Server and Kubernetes needs to be have this fea
### minikube
In order to just try out this stack, start [minikube](https://github.com/kubernetes/minikube) with the following command:
To try out this stack, start [minikube](https://github.com/kubernetes/minikube) with the following command:
```shell
$ minikube delete && minikube start --kubernetes-version=v1.13.5 --memory=4096 --bootstrapper=kubeadm --extra-config=kubelet.authentication-token-webhook=true --extra-config=kubelet.authorization-mode=Webhook --extra-config=scheduler.address=0.0.0.0 --extra-config=controller-manager.address=0.0.0.0
$ minikube delete && minikube start --kubernetes-version=v1.16.0 --memory=6g --bootstrapper=kubeadm --extra-config=kubelet.authentication-token-webhook=true --extra-config=kubelet.authorization-mode=Webhook --extra-config=scheduler.address=0.0.0.0 --extra-config=controller-manager.address=0.0.0.0
```
The kube-prometheus stack includes a resource metrics API server, like the metrics-server does. So ensure the metrics-server plugin is disabled on minikube:
The kube-prometheus stack includes a resource metrics API server, so the metrics-server addon is not necessary. Ensure the metrics-server addon is disabled on minikube:
```shell
$ minikube addons disable metrics-server
@@ -73,23 +85,28 @@ $ minikube addons disable metrics-server
## Quickstart
>Note: For versions before Kubernetes v1.14.0 use the release-0.1 branch instead of master.
This project is intended to be used as a library (i.e. the intent is not for you to create your own modified copy of this repository).
Though for a quickstart a compiled version of the Kubernetes [manifests](manifests) generated with this library (specifically with `example.jsonnet`) is checked into this repository in order to try the content out quickly. To try out the stack un-customized run:
* Simply create the stack:
* Create the monitoring stack using the config in the `manifests` directory:
```shell
$ kubectl create -f manifests/
# It can take a few seconds for the above 'create manifests' command to fully create the following resources, so verify the resources are ready before proceeding.
$ until kubectl get customresourcedefinitions servicemonitors.monitoring.coreos.com ; do date; sleep 1; echo ""; done
$ until kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done
$ kubectl apply -f manifests/ # This command sometimes may need to be done twice (to workaround a race condition).
# Create the namespace and CRDs, and then wait for them to be availble before creating the remaining resources
kubectl create -f manifests/setup
until kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done
kubectl create -f manifests/
```
We create the namespace and CustomResourceDefinitions first to avoid race conditions when deploying the monitoring components.
Alternatively, the resources in both folders can be applied with a single command
`kubectl create -f manifests/setup -f manifests`, but it may be necessary to run the command multiple times for all components to
be created successfullly.
* And to teardown the stack:
```shell
$ kubectl delete -f manifests/
kubectl delete --ignore-not-found=true -f manifests/ -f manifests/setup
```
### Access the dashboards
@@ -160,14 +177,26 @@ Here's [example.jsonnet](example.jsonnet):
[embedmd]:# (example.jsonnet)
```jsonnet
local kp =
(import 'kube-prometheus/kube-prometheus.libsonnet') + {
(import 'kube-prometheus/kube-prometheus.libsonnet') +
// Uncomment the following imports to enable its patches
// (import 'kube-prometheus/kube-prometheus-anti-affinity.libsonnet') +
// (import 'kube-prometheus/kube-prometheus-managed-cluster.libsonnet') +
// (import 'kube-prometheus/kube-prometheus-node-ports.libsonnet') +
// (import 'kube-prometheus/kube-prometheus-static-etcd.libsonnet') +
// (import 'kube-prometheus/kube-prometheus-thanos-sidecar.libsonnet') +
{
_config+:: {
namespace: 'monitoring',
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['setup/0namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{
['setup/prometheus-operator-' + name]: kp.prometheusOperator[name]
for name in std.filter((function(name) name != 'serviceMonitor'), std.objectFields(kp.prometheusOperator))
} +
// serviceMonitor is separated so that it can be created after the CRDs are ready
{ 'prometheus-operator-serviceMonitor': kp.prometheusOperator.serviceMonitor } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
@@ -191,7 +220,7 @@ set -o pipefail
# Make sure to start with a clean 'manifests' dir
rm -rf manifests
mkdir manifests
mkdir -p manifests/setup
# optional, but we would like to generate yaml, not json
jsonnet -J vendor -m manifests "${1-example.jsonnet}" | xargs -I{} sh -c 'cat {} | gojsontoyaml > {}.yaml; rm -f {}' -- {}
@@ -207,22 +236,22 @@ The previous steps (compilation) has created a bunch of manifest files in the ma
Now simply use `kubectl` to install Prometheus and Grafana as per your configuration:
```shell
# Update the namespace and CRDs, and then wait for them to be availble before creating the remaining resources
$ kubectl apply -f manifests/setup
$ kubectl apply -f manifests/
```
Alternatively, the resources in both folders can be applied with a single command
`kubectl apply -Rf manifests`, but it may be necessary to run the command multiple times for all components to
be created successfullly.
Check the monitoring namespace (or the namespace you have specific in `namespace: `) and make sure the pods are running. Prometheus and Grafana should be up and running soon.
### Containerized Installing and Compiling
If you don't care to have `jb` nor `jsonnet` nor `gojsontoyaml` installed, then build the `po-jsonnet` Docker image (this is something you'll need a copy of this repository for). Do the following from this `kube-prometheus` directory:
If you don't care to have `jb` nor `jsonnet` nor `gojsontoyaml` installed, then use `quay.io/coreos/jsonnet-ci` container image. Do the following from this `kube-prometheus` directory:
```shell
$ make hack/jsonnet-docker-image
```
Then you can do commands such as the following:
```shell
$ docker run --rm -v $(pwd):$(pwd) --workdir $(pwd) po-jsonnet jb update
$ docker run --rm -v $(pwd):$(pwd) --workdir $(pwd) po-jsonnet ./build.sh example.jsonnet
$ docker run --rm -v $(pwd):$(pwd) --workdir $(pwd) quay.io/coreos/jsonnet-ci jb update
$ docker run --rm -v $(pwd):$(pwd) --workdir $(pwd) quay.io/coreos/jsonnet-ci ./build.sh example.jsonnet
```
## Update from upstream project
@@ -256,13 +285,12 @@ These are the available fields with their respective default values:
namespace: "default",
versions+:: {
alertmanager: "v0.16.2",
nodeExporter: "v0.17.0",
alertmanager: "v0.17.0",
nodeExporter: "v0.18.1",
kubeStateMetrics: "v1.5.0",
kubeRbacProxy: "v0.4.1",
addonResizer: "1.8.4",
prometheusOperator: "v0.29.0",
prometheus: "v2.7.2",
prometheusOperator: "v0.30.0",
prometheus: "v2.10.0",
},
imageRepos+:: {
@@ -270,7 +298,6 @@ These are the available fields with their respective default values:
alertmanager: "quay.io/prometheus/alertmanager",
kubeStateMetrics: "quay.io/coreos/kube-state-metrics",
kubeRbacProxy: "quay.io/coreos/kube-rbac-proxy",
addonResizer: "k8s.gcr.io/addon-resizer",
nodeExporter: "quay.io/prometheus/node-exporter",
prometheusOperator: "quay.io/coreos/prometheus-operator",
},
@@ -309,8 +336,6 @@ These are the available fields with their respective default values:
baseCPU: '100m',
baseMemory: '150Mi',
cpuPerNode: '2m',
memoryPerNode: '30Mi',
},
nodeExporter+:: {
@@ -541,6 +566,60 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```
#### Defining the ServiceMonitor for each addional Namespace
In order to Prometheus be able to discovery and scrape services inside the additional namespaces specified in previous step you need to define a ServiceMonitor resource.
> Typically it is up to the users of a namespace to provision the ServiceMonitor resource, but in case you want to generate it with the same tooling as the rest of the cluster monitoring infrastructure, this is a guide on how to achieve this.
You can define ServiceMonitor resources in your `jsonnet` spec. See the snippet bellow:
[embedmd]:# (examples/additional-namespaces-servicemonitor.jsonnet)
```jsonnet
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
prometheus+:: {
namespaces+: ['my-namespace', 'my-second-namespace'],
},
},
prometheus+:: {
serviceMonitorMyNamespace: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'my-servicemonitor',
namespace: 'my-namespace',
},
spec: {
jobLabel: 'app',
endpoints: [
{
port: 'http-metrics',
},
],
selector: {
matchLabels: {
app: 'myapp',
},
},
},
},
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```
> NOTE: make sure your service resources has the right labels (eg. `'app': 'myapp'`) applied. Prometheus use kubernetes labels to discovery resources inside the namespaces.
### Static etcd configuration
In order to configure a static etcd cluster to scrape there is a simple [kube-prometheus-static-etcd.libsonnet](jsonnet/kube-prometheus/kube-prometheus-static-etcd.libsonnet) mixin prepared - see [etcd.jsonnet](examples/etcd.jsonnet) for an example of how to use that mixin, and [Monitoring external etcd](docs/monitoring-external-etcd.md) for more information.
@@ -579,6 +658,7 @@ As described in the [Prerequisites](#prerequisites) section, in order to retriev
If you are using Google's GKE product, see [cAdvisor support](docs/GKE-cadvisor-support.md).
If you are using AWS EKS, see [AWS EKS CNI support](docs/EKS-cni-support.md)
#### Authentication problem
The Prometheus `/targets` page will show the kubelet job with the error `403 Unauthorized`, when token authentication is not enabled. Ensure, that the `--authentication-token-webhook=true` flag is enabled on all kubelet configurations.

View File

@@ -9,7 +9,7 @@ set -o pipefail
# Make sure to start with a clean 'manifests' dir
rm -rf manifests
mkdir manifests
mkdir -p manifests/setup
# optional, but we would like to generate yaml, not json
jsonnet -J vendor -m manifests "${1-example.jsonnet}" | xargs -I{} sh -c 'cat {} | gojsontoyaml > {}.yaml; rm -f {}' -- {}

42
docs/EKS-cni-support.md Normal file
View File

@@ -0,0 +1,42 @@
# CNI monitoring special configuration updates for EKS
AWS EKS uses [CNI](https://github.com/aws/amazon-vpc-cni-k8s) networking plugin for pod networking in Kubernetes using Elastic Network Interfaces on AWS
One fatal issue that can occur is that you run out of IP addresses in your eks cluster. (Generally happens due to error configs where pods keep scheduling).
You can monitor the `awscni` using kube-promethus with :
[embedmd]:# (../examples/eks-cni-example.jsonnet)
```jsonnet
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-eks.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
prometheusRules+:: {
groups+: [
{
name: 'example-group',
rules: [
{
record: 'aws_eks_available_ip',
expr: 'sum by(instance) (awscni_total_ip_addresses) - sum by(instance) (awscni_assigned_ip_addresses) < 10',
},
],
},
],
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) }
```
After you have the required yaml file please run
```
kubectl apply -f manifests/prometheus-serviceMonitorAwsEksCNI.yaml
```

View File

@@ -11,14 +11,26 @@ As a basis, all examples in this guide are based on the base example of the kube
[embedmd]:# (../example.jsonnet)
```jsonnet
local kp =
(import 'kube-prometheus/kube-prometheus.libsonnet') + {
(import 'kube-prometheus/kube-prometheus.libsonnet') +
// Uncomment the following imports to enable its patches
// (import 'kube-prometheus/kube-prometheus-anti-affinity.libsonnet') +
// (import 'kube-prometheus/kube-prometheus-managed-cluster.libsonnet') +
// (import 'kube-prometheus/kube-prometheus-node-ports.libsonnet') +
// (import 'kube-prometheus/kube-prometheus-static-etcd.libsonnet') +
// (import 'kube-prometheus/kube-prometheus-thanos-sidecar.libsonnet') +
{
_config+:: {
namespace: 'monitoring',
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['setup/0namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{
['setup/prometheus-operator-' + name]: kp.prometheusOperator[name]
for name in std.filter((function(name) name != 'serviceMonitor'), std.objectFields(kp.prometheusOperator))
} +
// serviceMonitor is separated so that it can be created after the CRDs are ready
{ 'prometheus-operator-serviceMonitor': kp.prometheusOperator.serviceMonitor } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
@@ -70,6 +82,7 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```
@@ -106,6 +119,7 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```
@@ -128,7 +142,12 @@ Then import it in jsonnet:
[embedmd]:# (../examples/prometheus-additional-rendered-rule-example.jsonnet)
```jsonnet
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
prometheusAlerts+:: (import 'existingrule.json'),
_config+:: {
namespace: 'monitoring',
},
prometheusAlerts+:: {
groups+: (import 'existingrule.json').groups,
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
@@ -137,6 +156,7 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```
### Changing default rules
@@ -290,3 +310,24 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```
Incase you have lots of json dashboard exported out from grafan UI the above approch is going to take lots of time. to improve performance we can use `rawGrafanaDashboards` field and provide it's value as json string by using importstr
[embedmd]:# (../examples/grafana-additional-rendered-dashboard-example-2.jsonnet)
```jsonnet
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
rawGrafanaDashboards+:: {
'my-dashboard.json': (importstr 'example-grafana-dashboard.json'),
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```

View File

@@ -81,7 +81,7 @@ k.core.v1.list.new([
])
```
In order to expose Alertmanager and Grafana, simply create additional fields containing an ingress object, but simply pointing at the `alertmanager` or `grafana` instead of the `prometheus-k8s` Service. Make sure to also use the correct port respectively, for Alertmanager it is also `web`, for Grafana it is `http`. Be sure to also specify the appropriate external URL.
In order to expose Alertmanager and Grafana, simply create additional fields containing an ingress object, but simply pointing at the `alertmanager` or `grafana` instead of the `prometheus-k8s` Service. Make sure to also use the correct port respectively, for Alertmanager it is also `web`, for Grafana it is `http`. Be sure to also specify the appropriate external URL. Note that the external URL for grafana is set in a different way than the external URL for Prometheus or Alertmanager. See [ingress.jsonnet](../examples/ingress.jsonnet) for how to set the Grafana external URL.
In order to render the ingress objects similar to the other objects use as demonstrated in the [main readme](../README.md#usage):

View File

@@ -1,12 +1,24 @@
local kp =
(import 'kube-prometheus/kube-prometheus.libsonnet') + {
(import 'kube-prometheus/kube-prometheus.libsonnet') +
// Uncomment the following imports to enable its patches
// (import 'kube-prometheus/kube-prometheus-anti-affinity.libsonnet') +
// (import 'kube-prometheus/kube-prometheus-managed-cluster.libsonnet') +
// (import 'kube-prometheus/kube-prometheus-node-ports.libsonnet') +
// (import 'kube-prometheus/kube-prometheus-static-etcd.libsonnet') +
// (import 'kube-prometheus/kube-prometheus-thanos-sidecar.libsonnet') +
{
_config+:: {
namespace: 'monitoring',
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['setup/0namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{
['setup/prometheus-operator-' + name]: kp.prometheusOperator[name]
for name in std.filter((function(name) name != 'serviceMonitor'), std.objectFields(kp.prometheusOperator))
} +
// serviceMonitor is separated so that it can be created after the CRDs are ready
{ 'prometheus-operator-serviceMonitor': kp.prometheusOperator.serviceMonitor } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +

View File

@@ -0,0 +1,40 @@
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
prometheus+:: {
namespaces+: ['my-namespace', 'my-second-namespace'],
},
},
prometheus+:: {
serviceMonitorMyNamespace: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'my-servicemonitor',
namespace: 'my-namespace',
},
spec: {
jobLabel: 'app',
endpoints: [
{
port: 'http-metrics',
},
],
selector: {
matchLabels: {
app: 'myapp',
},
},
},
},
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -0,0 +1,26 @@
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-eks.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
prometheusRules+:: {
groups+: [
{
name: 'example-group',
rules: [
{
record: 'aws_eks_available_ip',
expr: 'sum by(instance) (awscni_total_ip_addresses) - sum by(instance) (awscni_assigned_ip_addresses) < 10',
},
],
},
],
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) }

View File

@@ -14,12 +14,16 @@ spec:
port: 8080
targetPort: web
---
apiVersion: extensions/v1beta1
apiVersion: apps/v1
kind: Deployment
metadata:
name: example-app
namespace: default
spec:
selector:
matchLabels:
app: example-app
version: 1.1.3
replicas: 4
template:
metadata:

View File

@@ -0,0 +1,16 @@
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
rawGrafanaDashboards+:: {
'my-dashboard.json': (importstr 'example-grafana-dashboard.json'),
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -6,8 +6,15 @@ local kp =
};
local manifests =
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
// Uncomment line below to enable vertical auto scaling of kube-state-metrics
//{ ['ksm-autoscaler-' + name]: kp.ksmAutoscaler[name] for name in std.objectFields(kp.ksmAutoscaler) } +
{ ['setup/0namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{
['setup/prometheus-operator-' + name]: kp.prometheusOperator[name]
for name in std.filter((function(name) name != 'serviceMonitor'), std.objectFields(kp.prometheusOperator))
} +
// serviceMonitor is separated so that it can be created after the CRDs are ready
{ 'prometheus-operator-serviceMonitor': kp.prometheusOperator.serviceMonitor } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +

View File

@@ -29,4 +29,5 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -23,4 +23,5 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -1,5 +1,10 @@
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
prometheusAlerts+:: (import 'existingrule.json'),
_config+:: {
namespace: 'monitoring',
},
prometheusAlerts+:: {
groups+: (import 'existingrule.json').groups,
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
@@ -8,4 +13,5 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -0,0 +1,38 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local statefulSet = k.apps.v1beta2.statefulSet;
local toleration = statefulSet.mixin.spec.template.spec.tolerationsType;
{
_config+:: {
tolerations+:: [
{
key: 'key1',
operator: 'Equal',
value: 'value1',
effect: 'NoSchedule',
},
{
key: 'key2',
operator: 'Exists',
},
]
},
local withTolerations() = {
tolerations: [
toleration.new() + (
if std.objectHas(t, 'key') then toleration.withKey(t.key) else toleration) + (
if std.objectHas(t, 'operator') then toleration.withOperator(t.operator) else toleration) + (
if std.objectHas(t, 'value') then toleration.withValue(t.value) else toleration) + (
if std.objectHas(t, 'effect') then toleration.withEffect(t.effect) else toleration),
for t in $._config.tolerations
],
},
prometheus+: {
prometheus+: {
spec+:
withTolerations(),
},
},
}

View File

@@ -14,6 +14,14 @@ rules:
- get
- list
- watch
- apiGroups:
- "apps"
resources:
- deployments
verbs:
- get
- list
- watch
- apiGroups:
- "extensions"
resources:

View File

@@ -1,4 +1,4 @@
apiVersion: extensions/v1beta1
apiVersion: apps/v1
kind: Deployment
metadata:
name: metrics-server

13
go.mod
View File

@@ -4,25 +4,30 @@ go 1.12
require (
github.com/Jeffail/gabs v1.2.0
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d // indirect
github.com/gogo/protobuf v1.1.1 // indirect
github.com/google/gofuzz v0.0.0-20170612174753-24818f796faf // indirect
github.com/googleapis/gnostic v0.0.0-20170729233727-0c5108395e2d // indirect
github.com/imdario/mergo v0.3.7 // indirect
github.com/json-iterator/go v0.0.0-20180701071628-ab8a2e0c74be // indirect
github.com/jsonnet-bundler/jsonnet-bundler v0.1.0 // indirect
github.com/mattn/go-colorable v0.1.4 // indirect
github.com/mattn/go-isatty v0.0.10 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.1 // indirect
github.com/pkg/errors v0.8.1
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/spf13/pflag v1.0.3 // indirect
github.com/stretchr/testify v1.2.2 // indirect
github.com/stretchr/objx v0.2.0 // indirect
golang.org/x/crypto v0.0.0-20190411191339-88737f569e3a // indirect
golang.org/x/net v0.0.0-20190206173232-65e2d4e15006 // indirect
golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a // indirect
golang.org/x/sys v0.0.0-20191023151326-f89234f9a2c2 // indirect
golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db // indirect
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 // indirect
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.2.2 // indirect
gopkg.in/yaml.v2 v2.2.4 // indirect
k8s.io/api v0.0.0-20190313235455-40a48860b5ab // indirect
k8s.io/apimachinery v0.0.0-20190313205120-d7deff9243b1
k8s.io/client-go v11.0.0+incompatible

32
go.sum
View File

@@ -1,8 +1,18 @@
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
github.com/Jeffail/gabs v1.2.0 h1:uFhoIVTtsX7hV2RxNgWad8gMU+8OJdzFbOathJdhD3o=
github.com/Jeffail/gabs v1.2.0/go.mod h1:6xMvQMK4k33lb7GUUpaAPh6nKMmemQeg5d4gn7/bOXc=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafoB+tBA3gMyHYHrpOtNuDiK/uB5uXxq5wM=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d h1:UQZhZ2O0vMHr2cI+DC1Mbh0TJxzA3RcLoMsFw+aXw7E=
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=
github.com/campoy/embedmd v1.0.0/go.mod h1:oxyr9RCiSXg0M3VJ3ks0UGfp98BpSSGr0kpiX3MzVl8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fatih/color v1.7.0 h1:DkWD4oS2D8LGGgTQ6IvwJJXSL5Vp2ffcQg58nFV38Ys=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/gogo/protobuf v1.1.1 h1:72R+M5VuhED/KujmZVcIquuo8mBgX4oVda//DQb3PXo=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM=
@@ -15,18 +25,32 @@ github.com/imdario/mergo v0.3.7 h1:Y+UAYTZ7gDEuOfhxKWy+dvb5dRQ6rJjFSdX2HZY1/gI=
github.com/imdario/mergo v0.3.7/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
github.com/json-iterator/go v0.0.0-20180701071628-ab8a2e0c74be h1:AHimNtVIpiBjPUhEF5KNCkrUyqTSA5zWUl8sQ2bfGBE=
github.com/json-iterator/go v0.0.0-20180701071628-ab8a2e0c74be/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/jsonnet-bundler/jsonnet-bundler v0.1.0 h1:T/HtHFr+mYCRULrH1x/RnoB0prIs0rMkolJhFMXNC9A=
github.com/jsonnet-bundler/jsonnet-bundler v0.1.0/go.mod h1:YKsSFc9VFhhLITkJS3X2PrRqWG9u2Jq99udTdDjQLfM=
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
github.com/mattn/go-colorable v0.1.4 h1:snbPLB8fVfU9iwbbo30TPtbLRzwWu6aJS6Xh4eaaviA=
github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
github.com/mattn/go-isatty v0.0.6/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.10 h1:qxFzApOv4WsAL965uUPIsXzAKCZxN2p9UqdhFS4ZW10=
github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
golang.org/x/crypto v0.0.0-20190411191339-88737f569e3a h1:Igim7XhdOpBnWPuYJ70XcNpq8q3BCACtVgNfoJxOV7g=
golang.org/x/crypto v0.0.0-20190411191339-88737f569e3a/go.mod h1:WFFai1msRO1wXaEeE5yQxYXgSfI8pQAWXbQop6sCtWE=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -37,8 +61,12 @@ golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a h1:tImsplftrFpALCYumobsd0
golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4 h1:YUO/7uOKsKeq9UokNS62b8FYywz3ker1l1vDZRCRefw=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190310054646-10058d7d4faa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e h1:nFYrTHrdrAOpShe27kaFHjsqYSEQ0KWqdWLu3xuZJts=
golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191023151326-f89234f9a2c2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db h1:6/JqlYfC1CCaLnGceQTI+sDGhC9UBSPAsBqI0Gun6kU=
golang.org/x/text v0.3.1-0.20181227161524-e6919f6577db/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
@@ -47,12 +75,16 @@ golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxb
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
google.golang.org/appengine v1.4.0 h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
gopkg.in/alecthomas/kingpin.v2 v2.2.6 h1:jMFz6MfLP0/4fUyZle81rXUoxOBFi19VUFKVDOQfozc=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
k8s.io/api v0.0.0-20190313235455-40a48860b5ab h1:DG9A67baNpoeweOy2spF1OWHhnVY5KR7/Ek/+U1lVZc=
k8s.io/api v0.0.0-20190313235455-40a48860b5ab/go.mod h1:iuAfoD4hCxJ8Onx9kaTIt30j7jUFS00AXQi6QMi99vA=
k8s.io/apimachinery v0.0.0-20190313205120-d7deff9243b1 h1:IS7K02iBkQXpCeieSiyJjGoLSdVOv2DbPaWHJ+ZtgKg=

View File

@@ -1,11 +1,11 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
_config+:: {
namespace: 'default',
versions+:: {
alertmanager: 'v0.17.0',
alertmanager: 'v0.18.0',
},
imageRepos+:: {
@@ -13,13 +13,13 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
},
alertmanager+:: {
name: $._config.alertmanager.name,
name: 'main',
config: {
global: {
resolve_timeout: '5m',
},
route: {
group_by: ['job'],
group_by: ['namespace'],
group_wait: '30s',
group_interval: '5m',
repeat_interval: '12h',
@@ -112,7 +112,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
replicas: $._config.alertmanager.replicas,
version: $._config.versions.alertmanager,
baseImage: $._config.imageRepos.alertmanager,
nodeSelector: { 'beta.kubernetes.io/os': 'linux' },
nodeSelector: { 'kubernetes.io/os': 'linux' },
serviceAccountName: 'alertmanager-' + $._config.alertmanager.name,
securityContext: {
runAsUser: 1000,

View File

@@ -1,5 +1,4 @@
(import 'alertmanager.libsonnet') +
(import 'general.libsonnet') +
(import 'node.libsonnet') +
(import 'prometheus.libsonnet') +
(import 'prometheus-operator.libsonnet')

View File

@@ -7,9 +7,9 @@
{
alert: 'TargetDown',
annotations: {
message: '{{ $value }}% of the {{ $labels.job }} targets are down.',
message: '{{ printf "%.4g" $value }}% of the {{ $labels.job }} targets in {{ $labels.namespace }} namespace are down.',
},
expr: '100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10',
expr: '100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10',
'for': '10m',
labels: {
severity: 'warning',

View File

@@ -1,37 +1,6 @@
{
prometheusAlerts+:: {
groups+: [
{
name: 'kube-prometheus-node-alerting.rules',
rules: [
{
alert: 'NodeDiskRunningFull',
annotations: {
message: 'Device {{ $labels.device }} of node-exporter {{ $labels.namespace }}/{{ $labels.pod }} will be full within the next 24 hours.',
},
expr: |||
(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[6h], 3600 * 24) < 0)
||| % $._config,
'for': '30m',
labels: {
severity: 'warning',
},
},
{
alert: 'NodeDiskRunningFull',
annotations: {
message: 'Device {{ $labels.device }} of node-exporter {{ $labels.namespace }}/{{ $labels.pod }} will be full within the next 2 hours.',
},
expr: |||
(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[30m], 3600 * 2) < 0)
||| % $._config,
'for': '10m',
labels: {
severity: 'critical',
},
},
],
},
{
name: 'node-time',
rules: [
@@ -41,7 +10,7 @@
message: 'Clock skew detected on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}. Ensure NTP is configured correctly on this host.',
},
expr: |||
abs(node_timex_offset_seconds{%(nodeExporterSelector)s}) > 0.03
abs(node_timex_offset_seconds{%(nodeExporterSelector)s}) > 0.05
||| % $._config,
'for': '2m',
labels: {
@@ -53,32 +22,6 @@
{
name: 'node-network',
rules: [
{
alert: 'NetworkReceiveErrors',
annotations: {
message: 'Network interface "{{ $labels.device }}" showing receive errors on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"',
},
expr: |||
rate(node_network_receive_errs_total{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 0
||| % $._config,
'for': '2m',
labels: {
severity: 'warning',
},
},
{
alert: 'NetworkTransmitErrors',
annotations: {
message: 'Network interface "{{ $labels.device }}" showing transmit errors on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"',
},
expr: |||
rate(node_network_transmit_errs_total{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 0
||| % $._config,
'for': '2m',
labels: {
severity: 'warning',
},
},
{
alert: 'NodeNetworkInterfaceFlapping',
annotations: {

View File

@@ -1,151 +0,0 @@
{
prometheusAlerts+:: {
groups+: [
{
name: 'prometheus.rules',
rules: [
{
alert: 'PrometheusConfigReloadFailed',
annotations: {
description: "Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}}",
summary: "Reloading Prometheus' configuration failed",
},
expr: |||
prometheus_config_last_reload_successful{%(prometheusSelector)s} == 0
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusNotificationQueueRunningFull',
annotations: {
description: "Prometheus' alert notification queue is running full for {{$labels.namespace}}/{{ $labels.pod}}",
summary: "Prometheus' alert notification queue is running full",
},
expr: |||
predict_linear(prometheus_notifications_queue_length{%(prometheusSelector)s}[5m], 60 * 30) > prometheus_notifications_queue_capacity{%(prometheusSelector)s}
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusErrorSendingAlerts',
annotations: {
description: 'Errors while sending alerts from Prometheus {{$labels.namespace}}/{{ $labels.pod}} to Alertmanager {{$labels.Alertmanager}}',
summary: 'Errors while sending alert from Prometheus',
},
expr: |||
rate(prometheus_notifications_errors_total{%(prometheusSelector)s}[5m]) / rate(prometheus_notifications_sent_total{%(prometheusSelector)s}[5m]) > 0.01
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusErrorSendingAlerts',
annotations: {
description: 'Errors while sending alerts from Prometheus {{$labels.namespace}}/{{ $labels.pod}} to Alertmanager {{$labels.Alertmanager}}',
summary: 'Errors while sending alerts from Prometheus',
},
expr: |||
rate(prometheus_notifications_errors_total{%(prometheusSelector)s}[5m]) / rate(prometheus_notifications_sent_total{%(prometheusSelector)s}[5m]) > 0.03
||| % $._config,
'for': '10m',
labels: {
severity: 'critical',
},
},
{
alert: 'PrometheusNotConnectedToAlertmanagers',
annotations: {
description: 'Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected to any Alertmanagers',
summary: 'Prometheus is not connected to any Alertmanagers',
},
expr: |||
prometheus_notifications_alertmanagers_discovered{%(prometheusSelector)s} < 1
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusTSDBReloadsFailing',
annotations: {
description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}} reload failures over the last four hours.',
summary: 'Prometheus has issues reloading data blocks from disk',
},
expr: |||
increase(prometheus_tsdb_reloads_failures_total{%(prometheusSelector)s}[2h]) > 0
||| % $._config,
'for': '12h',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusTSDBCompactionsFailing',
annotations: {
description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}} compaction failures over the last four hours.',
summary: 'Prometheus has issues compacting sample blocks',
},
expr: |||
increase(prometheus_tsdb_compactions_failed_total{%(prometheusSelector)s}[2h]) > 0
||| % $._config,
'for': '12h',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusTSDBWALCorruptions',
annotations: {
description: '{{$labels.job}} at {{$labels.instance}} has a corrupted write-ahead log (WAL).',
summary: 'Prometheus write-ahead log is corrupted',
},
expr: |||
prometheus_tsdb_wal_corruptions_total{%(prometheusSelector)s} > 0
||| % $._config,
'for': '4h',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusNotIngestingSamples',
annotations: {
description: "Prometheus {{ $labels.namespace }}/{{ $labels.pod}} isn't ingesting samples.",
summary: "Prometheus isn't ingesting samples",
},
expr: |||
rate(prometheus_tsdb_head_samples_appended_total{%(prometheusSelector)s}[5m]) <= 0
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusTargetScrapesDuplicate',
annotations: {
description: '{{$labels.namespace}}/{{$labels.pod}} has many samples rejected due to duplicate timestamps but different values',
summary: 'Prometheus has many samples rejected',
},
expr: |||
increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{%(prometheusSelector)s}[5m]) > 0
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
],
},
],
},
}

View File

@@ -0,0 +1,50 @@
[
// Drop all kubelet metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)',
action: 'drop',
},
// Drop all scheduler metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)',
action: 'drop',
},
// Drop all apiserver metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)',
action: 'drop',
},
// Drop all docker metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)',
action: 'drop',
},
// Drop all reflector metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)',
action: 'drop',
},
// Drop all etcd metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)',
action: 'drop',
},
// Drop all transformation metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: 'transformation_(transformation_latencies_microseconds|failures_total)',
action: 'drop',
},
// Drop all other metrics which are deprecated in kubernetes.
{
sourceLabels: ['__name__'],
regex: '(admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)',
action: 'drop',
},
]

View File

@@ -18,7 +18,7 @@
"subdir": ""
}
},
"version": "release-0.1"
"version": "master"
},
{
"name": "grafana",
@@ -38,7 +38,7 @@
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "release-0.30"
"version": "release-0.34"
},
{
"name": "etcd-mixin",
@@ -49,6 +49,26 @@
}
},
"version": "master"
},
{
"name": "prometheus",
"source": {
"git": {
"remote": "https://github.com/prometheus/prometheus",
"subdir": "documentation/prometheus-mixin"
}
},
"version": "master"
},
{
"name": "node-mixin",
"source": {
"git": {
"remote": "https://github.com/prometheus/node_exporter",
"subdir": "docs/node-mixin"
}
},
"version": "master"
}
]
}

View File

@@ -0,0 +1,118 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
_config+:: {
versions+:: {
clusterVerticalAutoscaler: "v0.8.1"
},
imageRepos+:: {
clusterVerticalAutoscaler: 'gcr.io/google_containers/cpvpa-amd64'
},
kubeStateMetrics+:: {
stepCPU: '1m',
stepMemory: '2Mi',
},
},
ksmAutoscaler+:: {
clusterRole:
local clusterRole = k.rbac.v1.clusterRole;
local rulesType = clusterRole.rulesType;
local rules = [
rulesType.new() +
rulesType.withApiGroups(['']) +
rulesType.withResources([
'nodes',
]) +
rulesType.withVerbs(['list', 'watch']),
];
clusterRole.new() +
clusterRole.mixin.metadata.withName('ksm-autoscaler') +
clusterRole.withRules(rules),
clusterRoleBinding:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withName('ksm-autoscaler') +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName('ksm-autoscaler') +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'ksm-autoscaler', namespace: $._config.namespace }]),
roleBinding:
local roleBinding = k.rbac.v1.roleBinding;
roleBinding.new() +
roleBinding.mixin.metadata.withName('ksm-autoscaler') +
roleBinding.mixin.metadata.withNamespace($._config.namespace) +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withName('ksm-autoscaler') +
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'ksm-autoscaler' }]),
role:
local role = k.rbac.v1.role;
local rulesType = role.rulesType;
local extensionsRule = rulesType.new() +
rulesType.withApiGroups(['extensions']) +
rulesType.withResources([
'deployments',
]) +
rulesType.withVerbs(['patch']) +
rulesType.withResourceNames(['kube-state-metrics']);
local appsRule = rulesType.new() +
rulesType.withApiGroups(['apps']) +
rulesType.withResources([
'deployments',
]) +
rulesType.withVerbs(['patch']) +
rulesType.withResourceNames(['kube-state-metrics']);
local rules = [extensionsRule, appsRule];
role.new() +
role.mixin.metadata.withName('ksm-autoscaler') +
role.mixin.metadata.withNamespace($._config.namespace) +
role.withRules(rules),
serviceAccount:
local serviceAccount = k.core.v1.serviceAccount;
serviceAccount.new('ksm-autoscaler') +
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
deployment:
local deployment = k.apps.v1.deployment;
local container = deployment.mixin.spec.template.spec.containersType;
local podSelector = deployment.mixin.spec.template.spec.selectorType;
local podLabels = { app: 'ksm-autoscaler' };
local kubeStateMetricsAutoscaler =
container.new('ksm-autoscaler', $._config.imageRepos.clusterVerticalAutoscaler + ':' + $._config.versions.clusterVerticalAutoscaler) +
container.withArgs([
'/cpvpa',
'--target=deployment/kube-state-metrics',
'--namespace=' + $._config.namespace,
'--logtostderr=true',
'--poll-period-seconds=10',
'--default-config={"kube-state-metrics":{"requests":{"cpu":{"base":"' + $._config.kubeStateMetrics.baseCPU + '","step":"' + $._config.kubeStateMetrics.stepCPU + '","nodesPerStep":1},"memory":{"base":"' + $._config.kubeStateMetrics.baseMemory + '","step":"' + $._config.kubeStateMetrics.stepMemory + '","nodesPerStep":1}},"limits":{"cpu":{"base":"' + $._config.kubeStateMetrics.baseCPU + '","step":"' + $._config.kubeStateMetrics.stepCPU + '","nodesPerStep":1},"memory":{"base":"' + $._config.kubeStateMetrics.baseMemory + '","step":"' + $._config.kubeStateMetrics.stepMemory + '","nodesPerStep":1}}}}'
]) +
container.mixin.resources.withRequests({cpu: '20m', memory: '10Mi'});
local c = [kubeStateMetricsAutoscaler];
deployment.new('ksm-autoscaler', 1, c, podLabels) +
deployment.mixin.metadata.withNamespace($._config.namespace) +
deployment.mixin.metadata.withLabels(podLabels) +
deployment.mixin.spec.selector.withMatchLabels(podLabels) +
deployment.mixin.spec.template.spec.withNodeSelector({ 'kubernetes.io/os': 'linux' }) +
deployment.mixin.spec.template.spec.securityContext.withRunAsNonRoot(true) +
deployment.mixin.spec.template.spec.securityContext.withRunAsUser(65534) +
deployment.mixin.spec.template.spec.withServiceAccountName('ksm-autoscaler'),
},
}

View File

@@ -0,0 +1,20 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
prometheus+:: {
clusterRole+: {
rules+:
local role = k.rbac.v1.role;
local policyRule = role.rulesType;
local rule = policyRule.new() +
policyRule.withApiGroups(['']) +
policyRule.withResources([
'services',
'endpoints',
'pods',
]) +
policyRule.withVerbs(['get', 'list', 'watch']);
[rule]
},
}
}

View File

@@ -1,16 +1,16 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local statefulSet = k.apps.v1beta2.statefulSet;
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local statefulSet = k.apps.v1.statefulSet;
local affinity = statefulSet.mixin.spec.template.spec.affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecutionType;
local matchExpression = affinity.mixin.podAffinityTerm.labelSelector.matchExpressionsType;
{
local antiaffinity(key, values) = {
local antiaffinity(key, values, namespace) = {
affinity: {
podAntiAffinity: {
preferredDuringSchedulingIgnoredDuringExecution: [
affinity.new() +
affinity.withWeight(100) +
affinity.mixin.podAffinityTerm.withNamespaces($._config.namespace) +
affinity.mixin.podAffinityTerm.withNamespaces(namespace) +
affinity.mixin.podAffinityTerm.withTopologyKey('kubernetes.io/hostname') +
affinity.mixin.podAffinityTerm.labelSelector.withMatchExpressions([
matchExpression.new() +
@@ -26,14 +26,16 @@ local matchExpression = affinity.mixin.podAffinityTerm.labelSelector.matchExpres
alertmanager+:: {
alertmanager+: {
spec+:
antiaffinity('alertmanager', [$._config.alertmanager.name]),
antiaffinity('alertmanager', [$._config.alertmanager.name], $._config.namespace),
},
},
prometheus+: {
local p = self,
prometheus+: {
spec+:
antiaffinity('prometheus', [$._config.prometheus.name]),
antiaffinity('prometheus', [p.name], p.namespace),
},
},
}

View File

@@ -1,4 +1,4 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;

View File

@@ -0,0 +1,76 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
prometheus+: {
serviceMonitorCoreDNS+: {
spec+: {
endpoints: [
{
bearerTokenFile: "/var/run/secrets/kubernetes.io/serviceaccount/token",
interval: "15s",
targetPort: 9153
}
]
},
},
AwsEksCniMetricService:
service.new('aws-node', { 'k8s-app' : 'aws-node' } , servicePort.newNamed('cni-metrics-port', 61678, 61678)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'aws-node' }) +
service.mixin.spec.withClusterIp('None'),
serviceMonitorAwsEksCNI:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'awsekscni',
namespace: $._config.namespace,
labels: {
'k8s-app': 'eks-cni',
},
},
spec: {
jobLabel: 'k8s-app',
selector: {
matchLabels: {
'k8s-app': 'aws-node',
},
},
namespaceSelector: {
matchNames: [
'kube-system',
],
},
endpoints: [
{
port: 'cni-metrics-port',
interval: '30s',
path: '/metrics',
},
],
},
},
},
prometheusRules+: {
groups+: [
{
name: 'kube-prometheus-eks.rules',
rules: [
{
expr: 'sum by(instance) (awscni_total_ip_addresses) - sum by(instance) (awscni_assigned_ip_addresses) < 10',
labels: {
severity: 'critical',
},
annotations: {
message: 'Instance {{ $labels.instance }} has less than 10 IPs available.'
},
'for': '10m',
alert: 'EksAvailableIPs'
},
],
},
],
},
}

View File

@@ -9,6 +9,12 @@
scheme: 'http',
interval: '30s',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path'
},
],
},
{
port: 'http-metrics',
@@ -17,6 +23,21 @@
interval: '30s',
honorLabels: true,
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path'
},
],
metricRelabelings: [
// Drop a bunch of metrics which are disabled but still sent, see
// https://github.com/google/cadvisor/issues/1925.
{
sourceLabels: ['__name__'],
regex: 'container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)',
action: 'drop',
},
],
},
],
},

View File

@@ -1,4 +1,4 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;

View File

@@ -1,4 +1,4 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;

View File

@@ -1,4 +1,4 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;

View File

@@ -1,4 +1,4 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;

View File

@@ -1,15 +1,9 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
_config+:: {
jobs+: {
CoreDNS: 'job="coredns"',
},
},
prometheus+: {
kubeControllerManagerPrometheusDiscoveryService:
service.new('kube-controller-manager-prometheus-discovery', { 'component': 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) +
@@ -22,16 +16,6 @@ local servicePort = k.core.v1.service.mixin.spec.portsType;
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
service.mixin.spec.withClusterIp('None'),
serviceMonitorCoreDNS+: {
spec+: {
selector: {
matchLabels: {
'k8s-app': 'coredns',
},
},
},
},
serviceMonitorKubeScheduler+: {
spec+: {
selector+: {

View File

@@ -1,6 +1,5 @@
// On managed Kubernetes clusters some of the control plane components are not exposed to customers.
// Disable scrape jobs and service monitors for these components by overwriting 'kube-prometheus.libsonnet' defaults
// Note this doesn't disable generation of associated alerting rules but the rules don't trigger
// Disable scrape jobs, service monitors, and alert groups for these components by overwriting 'kube-prometheus.libsonnet' defaults
{
_config+:: {
@@ -12,6 +11,18 @@
for k in std.objectFields(j)
if !std.setMember(k, ['KubeControllerManager', 'KubeScheduler'])
},
// Skip alerting rules too
prometheus+:: {
rules+:: {
local g = super.groups,
groups: [
h
for h in g
if !std.setMember(h.name, ['kubernetes-system-controller-manager', 'kubernetes-system-scheduler'])
],
},
},
},
// Same as above but for ServiceMonitor's
@@ -21,8 +32,4 @@
for q in std.objectFields(p)
if !std.setMember(q, ['serviceMonitorKubeControllerManager', 'serviceMonitorKubeScheduler'])
},
// TODO: disable generationg of alerting rules
// manifests/prometheus-rules.yaml:52: - name: kube-scheduler.rules
}

View File

@@ -1,4 +1,4 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;

View File

@@ -1,4 +1,4 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
(import 'etcd-mixin/mixin.libsonnet') + {
_config+:: {

View File

@@ -0,0 +1,35 @@
// Strips spec.containers[].limits for certain containers
// https://github.com/coreos/kube-prometheus/issues/72
{
_config+:: {
resources+:: {
'addon-resizer'+: {
limits: {},
},
'kube-rbac-proxy'+: {
limits: {},
},
'kube-state-metrics'+: {
limits: {},
},
'node-exporter'+: {
limits: {},
},
},
},
prometheusOperator+: {
deployment+: {
spec+: {
template+: {
spec+: {
local addArgs(c) =
if c.name == 'prometheus-operator'
then c + {args+: ['--config-reloader-cpu=0']}
else c,
containers: std.map(addArgs, super.containers),
},
},
},
},
},
}

View File

@@ -0,0 +1,39 @@
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
_config+:: {
versions+:: {
thanos: 'v0.7.0',
},
imageRepos+:: {
thanos: 'quay.io/thanos/thanos',
},
thanos+:: {
objectStorageConfig: {
key: 'thanos.yaml', // How the file inside the secret is called
name: 'thanos-objectstorage', // This is the name of your Kubernetes secret with the config
},
},
},
prometheus+:: {
// Add the grpc port to the Prometheus service to be able to query it with the Thanos Querier
service+: {
spec+: {
ports+: [
servicePort.newNamed('grpc', 10901, 10901),
],
},
},
prometheus+: {
spec+: {
thanos+: {
version: $._config.versions.thanos,
baseImage: $._config.imageRepos.thanos,
objectStorageConfig: $._config.thanos.objectStorageConfig,
},
},
},
},
}

View File

@@ -1,219 +0,0 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
_config+:: {
versions+:: {
thanos: 'v0.3.2',
},
imageRepos+:: {
thanos: 'improbable/thanos',
},
thanos+:: {
objectStorageConfig: {
key: 'thanos.yaml', // How the file inside the secret is called
name: 'thanos-objstore-config', // This is the name of your Kubernetes secret with the config
},
},
},
prometheus+:: {
prometheus+: {
spec+: {
podMetadata+: {
labels+: { 'thanos-peers': 'true' },
},
thanos+: {
peers: 'thanos-peers.' + $._config.namespace + '.svc:10900',
version: $._config.versions.thanos,
baseImage: $._config.imageRepos.thanos,
objectStorageConfig: $._config.thanos.objectStorageConfig,
},
},
},
thanosPeerService:
service.new('thanos-peers', { 'thanos-peers': 'true' }, [
servicePort.newNamed('cluster', 10900, 'cluster'),
servicePort.newNamed('http', 10902, 'http'),
]) +
service.mixin.metadata.withNamespace($._config.namespace) +
service.mixin.metadata.withLabels({ 'thanos-peers': 'true' }) +
service.mixin.spec.withType('ClusterIP') +
service.mixin.spec.withClusterIp('None'),
serviceMonitorThanosPeer:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'thanos-peers',
namespace: $._config.namespace,
labels: {
'k8s-app': 'thanos-peers',
},
},
spec: {
jobLabel: 'k8s-app',
endpoints: [
{
port: 'http',
interval: '30s',
},
],
selector: {
matchLabels: {
'thanos-peers': 'true',
},
},
},
},
thanosQueryDeployment:
local deployment = k.apps.v1beta2.deployment;
local container = k.apps.v1beta2.deployment.mixin.spec.template.spec.containersType;
local containerPort = container.portsType;
local thanosQueryContainer =
container.new('thanos-query', $._config.imageRepos.thanos + ':' + $._config.versions.thanos) +
container.withPorts([
containerPort.newNamed('http', 10902),
containerPort.newNamed('grpc', 10901),
containerPort.newNamed('cluster', 10900),
]) +
container.withArgs([
'query',
'--log.level=debug',
'--query.replica-label=prometheus_replica',
'--query.auto-downsampling',
'--cluster.peers=thanos-peers.' + $._config.namespace + '.svc:10900',
]);
local podLabels = { app: 'thanos-query', 'thanos-peers': 'true' };
deployment.new('thanos-query', 1, thanosQueryContainer, podLabels) +
deployment.mixin.metadata.withNamespace($._config.namespace) +
deployment.mixin.metadata.withLabels(podLabels) +
deployment.mixin.spec.selector.withMatchLabels(podLabels) +
deployment.mixin.spec.template.spec.withServiceAccountName('prometheus-' + $._config.prometheus.name),
thanosQueryService:
local thanosQueryPort = servicePort.newNamed('http-query', 9090, 'http');
service.new('thanos-query', { app: 'thanos-query' }, thanosQueryPort) +
service.mixin.metadata.withNamespace($._config.namespace) +
service.mixin.metadata.withLabels({ app: 'thanos-query' }),
thanosStoreStatefulset:
local statefulSet = k.apps.v1beta2.statefulSet;
local volume = statefulSet.mixin.spec.template.spec.volumesType;
local container = statefulSet.mixin.spec.template.spec.containersType;
local containerEnv = container.envType;
local containerVolumeMount = container.volumeMountsType;
local labels = { app: 'thanos', 'thanos-peers': 'true' };
local c =
container.new('thanos-store', $._config.imageRepos.thanos + ':' + $._config.versions.thanos) +
container.withArgs([
'store',
'--log.level=debug',
'--data-dir=/var/thanos/store',
'--cluster.peers=thanos-peers.' + $._config.namespace + '.svc:10900',
'--objstore.config=$(OBJSTORE_CONFIG)',
]) +
container.withEnv([
containerEnv.fromSecretRef(
'OBJSTORE_CONFIG',
$._config.thanos.objectStorageConfig.name,
$._config.thanos.objectStorageConfig.key,
),
]) +
container.withPorts([
{ name: 'cluster', containerPort: 10900 },
{ name: 'grpc', containerPort: 10901 },
{ name: 'http', containerPort: 10902 },
]) +
container.withVolumeMounts([
containerVolumeMount.new('data', '/var/thanos/store', false),
]);
statefulSet.new('thanos-store', 1, c, [], labels) +
statefulSet.mixin.metadata.withNamespace($._config.namespace) +
statefulSet.mixin.spec.selector.withMatchLabels(labels) +
statefulSet.mixin.spec.withServiceName('thanos-store') +
statefulSet.mixin.spec.template.spec.withVolumes([
volume.fromEmptyDir('data'),
]),
serviceMonitorThanosCompactor:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'thanos-compactor',
namespace: $._config.namespace,
labels: {
'k8s-app': 'thanos-compactor',
},
},
spec: {
jobLabel: 'k8s-app',
endpoints: [
{
port: 'http',
interval: '30s',
},
],
selector: {
matchLabels: {
app: 'thanos-compactor',
},
},
},
},
thanosCompactorService:
service.new(
'thanos-compactor',
{ app: 'thanos-compactor' },
servicePort.newNamed('http', 9090, 'http'),
) +
service.mixin.metadata.withNamespace($._config.namespace) +
service.mixin.metadata.withLabels({ app: 'thanos-compactor' }),
thanosCompactorStatefulset:
local statefulSet = k.apps.v1beta2.statefulSet;
local volume = statefulSet.mixin.spec.template.spec.volumesType;
local container = statefulSet.mixin.spec.template.spec.containersType;
local containerEnv = container.envType;
local containerVolumeMount = container.volumeMountsType;
local labels = { app: 'thanos-compactor' };
local c =
container.new('thanos-compactor', $._config.imageRepos.thanos + ':' + $._config.versions.thanos) +
container.withArgs([
'compact',
'--log.level=debug',
'--data-dir=/var/thanos/store',
'--objstore.config=$(OBJSTORE_CONFIG)',
'--wait',
]) +
container.withEnv([
containerEnv.fromSecretRef(
'OBJSTORE_CONFIG',
$._config.thanos.objectStorageConfig.name,
$._config.thanos.objectStorageConfig.key,
),
]) +
container.withPorts([
{ name: 'http', containerPort: 10902 },
]) +
container.withVolumeMounts([
containerVolumeMount.new('data', '/var/thanos/store', false),
]);
statefulSet.new('thanos-compactor', 1, c, [], labels) +
statefulSet.mixin.metadata.withNamespace($._config.namespace) +
statefulSet.mixin.spec.selector.withMatchLabels(labels) +
statefulSet.mixin.spec.withServiceName('thanos-compactor') +
statefulSet.mixin.spec.template.spec.withVolumes([
volume.fromEmptyDir('data'),
]),
},
}

View File

@@ -1,14 +1,17 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local configMapList = k.core.v1.configMapList;
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
local k3 = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local configMapList = k3.core.v1.configMapList;
(import 'grafana/grafana.libsonnet') +
(import 'kube-state-metrics/kube-state-metrics.libsonnet') +
(import 'node-exporter/node-exporter.libsonnet') +
(import 'node-mixin/mixin.libsonnet') +
(import 'alertmanager/alertmanager.libsonnet') +
(import 'prometheus-operator/prometheus-operator.libsonnet') +
(import 'prometheus/prometheus.libsonnet') +
(import 'prometheus-adapter/prometheus-adapter.libsonnet') +
(import 'kubernetes-mixin/mixin.libsonnet') +
(import 'prometheus/mixin.libsonnet') +
(import 'alerts/alerts.libsonnet') +
(import 'rules/rules.libsonnet') + {
kubePrometheus+:: {
@@ -43,7 +46,7 @@ local configMapList = k.core.v1.configMapList;
namespace: 'default',
versions+:: {
grafana: '6.0.1',
grafana: '6.4.3',
},
tlsCipherSuites: [
@@ -75,8 +78,8 @@ local configMapList = k.core.v1.configMapList;
// 'TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305', // TODO: Might not work with h2
],
cadvisorSelector: 'job="kubelet"',
kubeletSelector: 'job="kubelet"',
cadvisorSelector: 'job="kubelet", metrics_path="/metrics/cadvisor"',
kubeletSelector: 'job="kubelet", metrics_path="/metrics"',
kubeStateMetricsSelector: 'job="kube-state-metrics"',
nodeExporterSelector: 'job="node-exporter"',
notKubeDnsSelector: 'job!="kube-dns"',
@@ -88,6 +91,7 @@ local configMapList = k.core.v1.configMapList;
alertmanagerSelector: 'job="alertmanager-' + $._config.alertmanager.name + '",namespace="' + $._config.namespace + '"',
prometheusSelector: 'job="prometheus-' + $._config.prometheus.name + '",namespace="' + $._config.namespace + '"',
prometheusName: '{{$labels.namespace}}/{{$labels.pod}}',
prometheusOperatorSelector: 'job="prometheus-operator",namespace="' + $._config.namespace + '"',
jobs: {
@@ -103,6 +107,24 @@ local configMapList = k.core.v1.configMapList;
CoreDNS: $._config.coreDNSSelector,
},
resources+:: {
'addon-resizer': {
requests: { cpu: '10m', memory: '30Mi' },
limits: { cpu: '50m', memory: '30Mi' },
},
'kube-rbac-proxy': {
requests: { cpu: '10m', memory: '20Mi' },
limits: { cpu: '20m', memory: '40Mi' },
},
'kube-state-metrics': {
requests: { cpu: '100m', memory: '150Mi' },
limits: { cpu: '100m', memory: '150Mi' },
},
'node-exporter': {
requests: { cpu: '102m', memory: '180Mi' },
limits: { cpu: '250m', memory: '180Mi' },
},
},
prometheus+:: {
rules: $.prometheusRules + $.prometheusAlerts,
},
@@ -110,5 +132,6 @@ local configMapList = k.core.v1.configMapList;
grafana+:: {
dashboards: $.grafanaDashboards,
},
},
}

View File

@@ -1,4 +1,4 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
_config+:: {
@@ -8,23 +8,16 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
collectors: '', // empty string gets a default set
scrapeInterval: '30s',
scrapeTimeout: '30s',
baseCPU: '100m',
baseMemory: '150Mi',
cpuPerNode: '2m',
memoryPerNode: '30Mi',
},
versions+:: {
kubeStateMetrics: 'v1.5.0',
kubeStateMetrics: 'v1.9.2',
kubeRbacProxy: 'v0.4.1',
addonResizer: '1.8.4',
},
imageRepos+:: {
kubeStateMetrics: 'quay.io/coreos/kube-state-metrics',
kubeRbacProxy: 'quay.io/coreos/kube-rbac-proxy',
addonResizer: 'k8s.gcr.io/addon-resizer',
},
},
@@ -43,88 +36,119 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local clusterRole = k.rbac.v1.clusterRole;
local rulesType = clusterRole.rulesType;
local coreRule = rulesType.new() +
rulesType.withApiGroups(['']) +
rulesType.withResources([
'configmaps',
'secrets',
'nodes',
'pods',
'services',
'resourcequotas',
'replicationcontrollers',
'limitranges',
'persistentvolumeclaims',
'persistentvolumes',
'namespaces',
'endpoints',
]) +
rulesType.withVerbs(['list', 'watch']);
local rules = [
rulesType.new() +
rulesType.withApiGroups(['']) +
rulesType.withResources([
'configmaps',
'secrets',
'nodes',
'pods',
'services',
'resourcequotas',
'replicationcontrollers',
'limitranges',
'persistentvolumeclaims',
'persistentvolumes',
'namespaces',
'endpoints',
]) +
rulesType.withVerbs(['list', 'watch']),
local extensionsRule = rulesType.new() +
rulesType.withApiGroups(['extensions']) +
rulesType.withResources([
'daemonsets',
'deployments',
'replicasets',
]) +
rulesType.withVerbs(['list', 'watch']);
rulesType.new() +
rulesType.withApiGroups(['extensions']) +
rulesType.withResources([
'daemonsets',
'deployments',
'replicasets',
'ingresses',
]) +
rulesType.withVerbs(['list', 'watch']),
local appsRule = rulesType.new() +
rulesType.withApiGroups(['apps']) +
rulesType.withResources([
'statefulsets',
'daemonsets',
'deployments',
'replicasets',
]) +
rulesType.withVerbs(['list', 'watch']);
rulesType.new() +
rulesType.withApiGroups(['apps']) +
rulesType.withResources([
'statefulsets',
'daemonsets',
'deployments',
'replicasets',
]) +
rulesType.withVerbs(['list', 'watch']),
local batchRule = rulesType.new() +
rulesType.withApiGroups(['batch']) +
rulesType.withResources([
'cronjobs',
'jobs',
]) +
rulesType.withVerbs(['list', 'watch']);
rulesType.new() +
rulesType.withApiGroups(['batch']) +
rulesType.withResources([
'cronjobs',
'jobs',
]) +
rulesType.withVerbs(['list', 'watch']),
local autoscalingRule = rulesType.new() +
rulesType.withApiGroups(['autoscaling']) +
rulesType.withResources([
'horizontalpodautoscalers',
]) +
rulesType.withVerbs(['list', 'watch']);
rulesType.new() +
rulesType.withApiGroups(['autoscaling']) +
rulesType.withResources([
'horizontalpodautoscalers',
]) +
rulesType.withVerbs(['list', 'watch']),
local authenticationRole = rulesType.new() +
rulesType.withApiGroups(['authentication.k8s.io']) +
rulesType.withResources([
'tokenreviews',
]) +
rulesType.withVerbs(['create']);
rulesType.new() +
rulesType.withApiGroups(['authentication.k8s.io']) +
rulesType.withResources([
'tokenreviews',
]) +
rulesType.withVerbs(['create']),
local authorizationRole = rulesType.new() +
rulesType.withApiGroups(['authorization.k8s.io']) +
rulesType.withResources([
'subjectaccessreviews',
]) +
rulesType.withVerbs(['create']);
rulesType.new() +
rulesType.withApiGroups(['authorization.k8s.io']) +
rulesType.withResources([
'subjectaccessreviews',
]) +
rulesType.withVerbs(['create']),
local policyRule = rulesType.new() +
rulesType.withApiGroups(['policy']) +
rulesType.withResources([
'poddisruptionbudgets',
]) +
rulesType.withVerbs(['list', 'watch']);
rulesType.new() +
rulesType.withApiGroups(['policy']) +
rulesType.withResources([
'poddisruptionbudgets',
]) +
rulesType.withVerbs(['list', 'watch']),
local rules = [coreRule, extensionsRule, appsRule, batchRule, autoscalingRule, authenticationRole, authorizationRole, policyRule];
rulesType.new() +
rulesType.withApiGroups(['certificates.k8s.io']) +
rulesType.withResources([
'certificatesigningrequests',
]) +
rulesType.withVerbs(['list', 'watch']),
rulesType.new() +
rulesType.withApiGroups(['storage.k8s.io']) +
rulesType.withResources([
'storageclasses',
'volumeattachments',
]) +
rulesType.withVerbs(['list', 'watch']),
rulesType.new() +
rulesType.withApiGroups(['admissionregistration.k8s.io']) +
rulesType.withResources([
'validatingwebhookconfigurations',
'mutatingwebhookconfigurations',
]) +
rulesType.withVerbs(['list', 'watch']),
rulesType.new() +
rulesType.withApiGroups(['networking.k8s.io']) +
rulesType.withResources([
'networkpolicies',
]) +
rulesType.withVerbs(['list', 'watch']),
];
clusterRole.new() +
clusterRole.mixin.metadata.withName('kube-state-metrics') +
clusterRole.withRules(rules),
deployment:
local deployment = k.apps.v1beta2.deployment;
local container = k.apps.v1beta2.deployment.mixin.spec.template.spec.containersType;
local volume = k.apps.v1beta2.deployment.mixin.spec.template.spec.volumesType;
local deployment = k.apps.v1.deployment;
local container = deployment.mixin.spec.template.spec.containersType;
local volume = deployment.mixin.spec.template.spec.volumesType;
local containerPort = container.portsType;
local containerVolumeMount = container.volumeMountsType;
local podSelector = deployment.mixin.spec.template.spec.selectorType;
@@ -139,9 +163,9 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
'--tls-cipher-suites=' + std.join(',', $._config.tlsCipherSuites),
'--upstream=http://127.0.0.1:8081/',
]) +
container.withPorts(containerPort.newNamed('https-main', 8443)) +
container.mixin.resources.withRequests({ cpu: '10m', memory: '20Mi' }) +
container.mixin.resources.withLimits({ cpu: '20m', memory: '40Mi' });
container.withPorts(containerPort.newNamed(8443, 'https-main',)) +
container.mixin.resources.withRequests($._config.resources['kube-rbac-proxy'].requests) +
container.mixin.resources.withLimits($._config.resources['kube-rbac-proxy'].limits);
local proxySelfMetrics =
container.new('kube-rbac-proxy-self', $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy) +
@@ -151,9 +175,9 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
'--tls-cipher-suites=' + std.join(',', $._config.tlsCipherSuites),
'--upstream=http://127.0.0.1:8082/',
]) +
container.withPorts(containerPort.newNamed('https-self', 9443)) +
container.mixin.resources.withRequests({ cpu: '10m', memory: '20Mi' }) +
container.mixin.resources.withLimits({ cpu: '20m', memory: '40Mi' });
container.withPorts(containerPort.newNamed(9443, 'https-self',)) +
container.mixin.resources.withRequests($._config.resources['kube-rbac-proxy'].requests) +
container.mixin.resources.withLimits($._config.resources['kube-rbac-proxy'].limits);
local kubeStateMetrics =
container.new('kube-state-metrics', $._config.imageRepos.kubeStateMetrics + ':' + $._config.versions.kubeStateMetrics) +
@@ -163,45 +187,16 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
'--telemetry-host=127.0.0.1',
'--telemetry-port=8082',
] + if $._config.kubeStateMetrics.collectors != '' then ['--collectors=' + $._config.kubeStateMetrics.collectors] else []) +
container.mixin.resources.withRequests({ cpu: $._config.kubeStateMetrics.baseCPU, memory: $._config.kubeStateMetrics.baseMemory }) +
container.mixin.resources.withLimits({ cpu: $._config.kubeStateMetrics.baseCPU, memory: $._config.kubeStateMetrics.baseMemory });
container.mixin.resources.withRequests($._config.resources['kube-state-metrics'].requests) +
container.mixin.resources.withLimits($._config.resources['kube-state-metrics'].limits);
local addonResizer =
container.new('addon-resizer', $._config.imageRepos.addonResizer + ':' + $._config.versions.addonResizer) +
container.withCommand([
'/pod_nanny',
'--container=kube-state-metrics',
'--cpu=' + $._config.kubeStateMetrics.baseCPU,
'--extra-cpu=' + $._config.kubeStateMetrics.cpuPerNode,
'--memory=' + $._config.kubeStateMetrics.baseMemory,
'--extra-memory=' + $._config.kubeStateMetrics.memoryPerNode,
'--threshold=5',
'--deployment=kube-state-metrics',
]) +
container.withEnv([
{
name: 'MY_POD_NAME',
valueFrom: {
fieldRef: { apiVersion: 'v1', fieldPath: 'metadata.name' },
},
},
{
name: 'MY_POD_NAMESPACE',
valueFrom: {
fieldRef: { apiVersion: 'v1', fieldPath: 'metadata.namespace' },
},
},
]) +
container.mixin.resources.withRequests({ cpu: '10m', memory: '30Mi' }) +
container.mixin.resources.withLimits({ cpu: '50m', memory: '30Mi' });
local c = [proxyClusterMetrics, proxySelfMetrics, kubeStateMetrics, addonResizer];
local c = [proxyClusterMetrics, proxySelfMetrics, kubeStateMetrics];
deployment.new('kube-state-metrics', 1, c, podLabels) +
deployment.mixin.metadata.withNamespace($._config.namespace) +
deployment.mixin.metadata.withLabels(podLabels) +
deployment.mixin.spec.selector.withMatchLabels(podLabels) +
deployment.mixin.spec.template.spec.withNodeSelector({ 'beta.kubernetes.io/os': 'linux' }) +
deployment.mixin.spec.template.spec.withNodeSelector({ 'kubernetes.io/os': 'linux' }) +
deployment.mixin.spec.template.spec.securityContext.withRunAsNonRoot(true) +
deployment.mixin.spec.template.spec.securityContext.withRunAsUser(65534) +
deployment.mixin.spec.template.spec.withServiceAccountName('kube-state-metrics'),
@@ -259,7 +254,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
service:
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local servicePort = service.mixin.spec.portsType;
local ksmServicePortMain = servicePort.newNamed('https-main', 8443, 'https-main');
local ksmServicePortSelf = servicePort.newNamed('https-self', 9443, 'https-self');
@@ -295,6 +290,12 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
scrapeTimeout: $._config.kubeStateMetrics.scrapeTimeout,
honorLabels: true,
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
regex: '(pod|service|endpoint|namespace)',
action: 'labeldrop',
},
],
tlsConfig: {
insecureSkipVerify: true,
},

View File

@@ -1,11 +1,11 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
_config+:: {
namespace: 'default',
versions+:: {
nodeExporter: 'v0.17.0',
nodeExporter: 'v0.18.1',
kubeRbacProxy: 'v0.4.1',
},
@@ -55,7 +55,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
clusterRole.withRules(rules),
daemonset:
local daemonset = k.apps.v1beta2.daemonSet;
local daemonset = k.apps.v1.daemonSet;
local container = daemonset.mixin.spec.template.spec.containersType;
local volume = daemonset.mixin.spec.template.spec.volumesType;
local containerPort = container.portsType;
@@ -89,7 +89,8 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
'--path.procfs=/host/proc',
'--path.sysfs=/host/sys',
'--path.rootfs=/host/root',
'--no-collector.wifi',
'--no-collector.hwmon',
// The following settings have been taken from
// https://github.com/prometheus/node_exporter/blob/0662673/collector/filesystem_linux.go#L30-L31
// Once node exporter is being released with those settings, this can be removed.
@@ -97,15 +98,15 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
'--collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$',
]) +
container.withVolumeMounts([procVolumeMount, sysVolumeMount, rootVolumeMount]) +
container.mixin.resources.withRequests({ cpu: '102m', memory: '180Mi' }) +
container.mixin.resources.withLimits({ cpu: '250m', memory: '180Mi' });
container.mixin.resources.withRequests($._config.resources['node-exporter'].requests) +
container.mixin.resources.withLimits($._config.resources['node-exporter'].limits);
local ip = containerEnv.fromFieldPath('IP', 'status.podIP');
local proxy =
container.new('kube-rbac-proxy', $._config.imageRepos.kubeRbacProxy + ':' + $._config.versions.kubeRbacProxy) +
container.withArgs([
'--logtostderr',
'--secure-listen-address=$(IP):' + $._config.nodeExporter.port,
'--secure-listen-address=[$(IP)]:' + $._config.nodeExporter.port,
'--tls-cipher-suites=' + std.join(',', $._config.tlsCipherSuites),
'--upstream=http://127.0.0.1:' + $._config.nodeExporter.port + '/',
]) +
@@ -118,8 +119,8 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
// forgo declaring the host port, however it is important to declare
// it so that the scheduler can decide if the pod is schedulable.
container.withPorts(containerPort.new($._config.nodeExporter.port) + containerPort.withHostPort($._config.nodeExporter.port) + containerPort.withName('https')) +
container.mixin.resources.withRequests({ cpu: '10m', memory: '20Mi' }) +
container.mixin.resources.withLimits({ cpu: '20m', memory: '40Mi' }) +
container.mixin.resources.withRequests($._config.resources['kube-rbac-proxy'].requests) +
container.mixin.resources.withLimits($._config.resources['kube-rbac-proxy'].limits) +
container.withEnv([ip]);
local c = [nodeExporter, proxy];
@@ -131,7 +132,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
daemonset.mixin.spec.selector.withMatchLabels(podLabels) +
daemonset.mixin.spec.template.metadata.withLabels(podLabels) +
daemonset.mixin.spec.template.spec.withTolerations([existsToleration]) +
daemonset.mixin.spec.template.spec.withNodeSelector({ 'beta.kubernetes.io/os': 'linux' }) +
daemonset.mixin.spec.template.spec.withNodeSelector({ 'kubernetes.io/os': 'linux' }) +
daemonset.mixin.spec.template.spec.withContainers(c) +
daemonset.mixin.spec.template.spec.withVolumes([procVolume, sysVolume, rootVolume]) +
daemonset.mixin.spec.template.spec.securityContext.withRunAsNonRoot(true) +
@@ -168,8 +169,17 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
{
port: 'https',
scheme: 'https',
interval: '30s',
interval: '15s',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
action: 'replace',
regex: '(.*)',
replacement: '$1',
sourceLabels: ['__meta_kubernetes_pod_node_name'],
targetLabel: 'instance',
},
],
tlsConfig: {
insecureSkipVerify: true,
},

View File

@@ -1,11 +1,11 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
_config+:: {
namespace: 'default',
versions+:: {
prometheusAdapter: 'v0.4.1',
prometheusAdapter: 'v0.5.0',
},
imageRepos+:: {
@@ -19,30 +19,30 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
config: |||
resourceRules:
cpu:
containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container_name!="POD",container_name!="",pod_name!=""}[1m])) by (<<.GroupBy>>)
nodeQuery: sum(1 - rate(node_cpu_seconds_total{mode="idle"}[1m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)
containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}[5m])) by (<<.GroupBy>>)
nodeQuery: sum(1 - rate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)
resources:
overrides:
node:
resource: node
namespace:
resource: namespace
pod_name:
pod:
resource: pod
containerLabel: container_name
containerLabel: container
memory:
containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container_name!="POD",container_name!="",pod_name!=""}) by (<<.GroupBy>>)
nodeQuery: sum(node:node_memory_bytes_total:sum{<<.LabelMatchers>>} - node:node_memory_bytes_available:sum{<<.LabelMatchers>>}) by (<<.GroupBy>>)
containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>)
nodeQuery: sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>)
resources:
overrides:
node:
instance:
resource: node
namespace:
resource: namespace
pod_name:
pod:
resource: pod
containerLabel: container_name
window: 1m
containerLabel: container
window: 5m
|||,
},
},
@@ -87,7 +87,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
service.mixin.metadata.withLabels($._config.prometheusAdapter.labels),
deployment:
local deployment = k.apps.v1beta2.deployment;
local deployment = k.apps.v1.deployment;
local volume = deployment.mixin.spec.template.spec.volumesType;
local container = deployment.mixin.spec.template.spec.containersType;
local containerVolumeMount = container.volumeMountsType;
@@ -113,7 +113,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
deployment.mixin.metadata.withNamespace($._config.namespace) +
deployment.mixin.spec.selector.withMatchLabels($._config.prometheusAdapter.labels) +
deployment.mixin.spec.template.spec.withServiceAccountName($.prometheusAdapter.serviceAccount.metadata.name) +
deployment.mixin.spec.template.spec.withNodeSelector({ 'beta.kubernetes.io/os': 'linux' }) +
deployment.mixin.spec.template.spec.withNodeSelector({ 'kubernetes.io/os': 'linux' }) +
deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(1) +
deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(0) +
deployment.mixin.spec.template.spec.withVolumes([
@@ -191,7 +191,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local rules =
policyRule.new() +
policyRule.withApiGroups(['metrics.k8s.io']) +
policyRule.withResources(['pods']) +
policyRule.withResources(['pods', 'nodes']) +
policyRule.withVerbs(['get','list','watch']);
clusterRole.new() +

View File

@@ -1,11 +1,12 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k3 = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
{
_config+:: {
namespace: 'default',
versions+:: {
prometheus: 'v2.7.2',
prometheus: 'v2.11.0',
},
imageRepos+:: {
@@ -20,57 +21,67 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
name: 'k8s',
replicas: 2,
rules: {},
renderedRules: {},
namespaces: ['default', 'kube-system', $._config.namespace],
},
},
prometheus+:: {
local p = self,
name:: $._config.prometheus.name,
namespace:: $._config.namespace,
roleBindingNamespaces:: $._config.prometheus.namespaces,
replicas:: $._config.prometheus.replicas,
prometheusRules:: $._config.prometheus.rules,
alertmanagerName:: $.alertmanager.service.metadata.name,
serviceAccount:
local serviceAccount = k.core.v1.serviceAccount;
serviceAccount.new('prometheus-' + $._config.prometheus.name) +
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
serviceAccount.new('prometheus-' + p.name) +
serviceAccount.mixin.metadata.withNamespace(p.namespace),
service:
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local prometheusPort = servicePort.newNamed('web', 9090, 'web');
service.new('prometheus-' + $._config.prometheus.name, { app: 'prometheus', prometheus: $._config.prometheus.name }, prometheusPort) +
service.new('prometheus-' + p.name, { app: 'prometheus', prometheus: p.name }, prometheusPort) +
service.mixin.spec.withSessionAffinity('ClientIP') +
service.mixin.metadata.withNamespace($._config.namespace) +
service.mixin.metadata.withLabels({ prometheus: $._config.prometheus.name }),
[if $._config.prometheus.rules != null && $._config.prometheus.rules != {} then 'rules']:
service.mixin.metadata.withNamespace(p.namespace) +
service.mixin.metadata.withLabels({ prometheus: p.name }),
rules:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'PrometheusRule',
metadata: {
labels: {
prometheus: $._config.prometheus.name,
prometheus: p.name,
role: 'alert-rules',
},
name: 'prometheus-' + $._config.prometheus.name + '-rules',
namespace: $._config.namespace,
name: 'prometheus-' + p.name + '-rules',
namespace: p.namespace,
},
spec: {
groups: $._config.prometheus.rules.groups,
groups: p.prometheusRules.groups,
},
},
roleBindingSpecificNamespaces:
local roleBinding = k.rbac.v1.roleBinding;
local newSpecificRoleBinding(namespace) =
roleBinding.new() +
roleBinding.mixin.metadata.withName('prometheus-' + $._config.prometheus.name) +
roleBinding.mixin.metadata.withName('prometheus-' + p.name) +
roleBinding.mixin.metadata.withNamespace(namespace) +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withName('prometheus-' + $._config.prometheus.name) +
roleBinding.mixin.roleRef.withName('prometheus-' + p.name) +
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + $._config.prometheus.name, namespace: $._config.namespace }]);
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + p.name, namespace: p.namespace }]);
local roleBindigList = k.rbac.v1.roleBindingList;
roleBindigList.new([newSpecificRoleBinding(x) for x in $._config.prometheus.namespaces]),
local roleBindingList = k3.rbac.v1.roleBindingList;
roleBindingList.new([newSpecificRoleBinding(x) for x in p.roleBindingNamespaces]),
clusterRole:
local clusterRole = k.rbac.v1.clusterRole;
local policyRule = clusterRole.rulesType;
@@ -87,7 +98,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local rules = [nodeMetricsRule, metricsRule];
clusterRole.new() +
clusterRole.mixin.metadata.withName('prometheus-' + $._config.prometheus.name) +
clusterRole.mixin.metadata.withName('prometheus-' + p.name) +
clusterRole.withRules(rules),
roleConfig:
local role = k.rbac.v1.role;
@@ -101,28 +112,28 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
policyRule.withVerbs(['get']);
role.new() +
role.mixin.metadata.withName('prometheus-' + $._config.prometheus.name + '-config') +
role.mixin.metadata.withNamespace($._config.namespace) +
role.mixin.metadata.withName('prometheus-' + p.name + '-config') +
role.mixin.metadata.withNamespace(p.namespace) +
role.withRules(configmapRule),
roleBindingConfig:
local roleBinding = k.rbac.v1.roleBinding;
roleBinding.new() +
roleBinding.mixin.metadata.withName('prometheus-' + $._config.prometheus.name + '-config') +
roleBinding.mixin.metadata.withNamespace($._config.namespace) +
roleBinding.mixin.metadata.withName('prometheus-' + p.name + '-config') +
roleBinding.mixin.metadata.withNamespace(p.namespace) +
roleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
roleBinding.mixin.roleRef.withName('prometheus-' + $._config.prometheus.name + '-config') +
roleBinding.mixin.roleRef.withName('prometheus-' + p.name + '-config') +
roleBinding.mixin.roleRef.mixinInstance({ kind: 'Role' }) +
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + $._config.prometheus.name, namespace: $._config.namespace }]),
roleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + p.name, namespace: p.namespace }]),
clusterRoleBinding:
local clusterRoleBinding = k.rbac.v1.clusterRoleBinding;
clusterRoleBinding.new() +
clusterRoleBinding.mixin.metadata.withName('prometheus-' + $._config.prometheus.name) +
clusterRoleBinding.mixin.metadata.withName('prometheus-' + p.name) +
clusterRoleBinding.mixin.roleRef.withApiGroup('rbac.authorization.k8s.io') +
clusterRoleBinding.mixin.roleRef.withName('prometheus-' + $._config.prometheus.name) +
clusterRoleBinding.mixin.roleRef.withName('prometheus-' + p.name) +
clusterRoleBinding.mixin.roleRef.mixinInstance({ kind: 'ClusterRole' }) +
clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + $._config.prometheus.name, namespace: $._config.namespace }]),
clusterRoleBinding.withSubjects([{ kind: 'ServiceAccount', name: 'prometheus-' + p.name, namespace: p.namespace }]),
roleSpecificNamespaces:
local role = k.rbac.v1.role;
local policyRule = role.rulesType;
@@ -137,18 +148,19 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local newSpecificRole(namespace) =
role.new() +
role.mixin.metadata.withName('prometheus-' + $._config.prometheus.name) +
role.mixin.metadata.withName('prometheus-' + p.name) +
role.mixin.metadata.withNamespace(namespace) +
role.withRules(coreRule);
local roleList = k.rbac.v1.roleList;
roleList.new([newSpecificRole(x) for x in $._config.prometheus.namespaces]),
local roleList = k3.rbac.v1.roleList;
roleList.new([newSpecificRole(x) for x in p.roleBindingNamespaces]),
prometheus:
local statefulSet = k.apps.v1beta2.statefulSet;
local statefulSet = k.apps.v1.statefulSet;
local container = statefulSet.mixin.spec.template.spec.containersType;
local resourceRequirements = container.mixin.resourcesType;
local selector = statefulSet.mixin.spec.selectorType;
local resources =
resourceRequirements.new() +
resourceRequirements.withRequests({ memory: '400Mi' });
@@ -157,30 +169,32 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
apiVersion: 'monitoring.coreos.com/v1',
kind: 'Prometheus',
metadata: {
name: $._config.prometheus.name,
namespace: $._config.namespace,
name: p.name,
namespace: p.namespace,
labels: {
prometheus: $._config.prometheus.name,
prometheus: p.name,
},
},
spec: {
replicas: $._config.prometheus.replicas,
replicas: p.replicas,
version: $._config.versions.prometheus,
baseImage: $._config.imageRepos.prometheus,
serviceAccountName: 'prometheus-' + $._config.prometheus.name,
serviceAccountName: 'prometheus-' + p.name,
serviceMonitorSelector: {},
podMonitorSelector: {},
serviceMonitorNamespaceSelector: {},
nodeSelector: { 'beta.kubernetes.io/os': 'linux' },
podMonitorNamespaceSelector: {},
nodeSelector: { 'kubernetes.io/os': 'linux' },
ruleSelector: selector.withMatchLabels({
role: 'alert-rules',
prometheus: $._config.prometheus.name,
prometheus: p.name,
}),
resources: resources,
alerting: {
alertmanagers: [
{
namespace: $._config.namespace,
name: 'alertmanager-' + $._config.alertmanager.name,
namespace: p.namespace,
name: p.alertmanagerName,
port: 'web',
},
],
@@ -198,7 +212,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
kind: 'ServiceMonitor',
metadata: {
name: 'prometheus',
namespace: $._config.namespace,
namespace: p.namespace,
labels: {
'k8s-app': 'prometheus',
},
@@ -206,7 +220,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
spec: {
selector: {
matchLabels: {
prometheus: $._config.prometheus.name,
prometheus: p.name,
},
},
endpoints: [
@@ -223,7 +237,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
kind: 'ServiceMonitor',
metadata: {
name: 'kube-scheduler',
namespace: $._config.namespace,
namespace: p.namespace,
labels: {
'k8s-app': 'kube-scheduler',
},
@@ -254,7 +268,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
kind: 'ServiceMonitor',
metadata: {
name: 'kubelet',
namespace: $._config.namespace,
namespace: p.namespace,
labels: {
'k8s-app': 'kubelet',
},
@@ -271,6 +285,13 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
insecureSkipVerify: true,
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
metricRelabelings: (import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet'),
relabelings: [
{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path',
},
],
},
{
port: 'https-metrics',
@@ -282,6 +303,12 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
insecureSkipVerify: true,
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
relabelings: [
{
sourceLabels: ['__metrics_path__'],
targetLabel: 'metrics_path',
},
],
metricRelabelings: [
// Drop a bunch of metrics which are disabled but still sent, see
// https://github.com/google/cadvisor/issues/1925.
@@ -311,7 +338,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
kind: 'ServiceMonitor',
metadata: {
name: 'kube-controller-manager',
namespace: $._config.namespace,
namespace: p.namespace,
labels: {
'k8s-app': 'kube-controller-manager',
},
@@ -322,7 +349,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
{
port: 'http-metrics',
interval: '30s',
metricRelabelings: [
metricRelabelings: (import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet') + [
{
sourceLabels: ['__name__'],
regex: 'etcd_(debugging|disk|request|server).*',
@@ -349,7 +376,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
kind: 'ServiceMonitor',
metadata: {
name: 'kube-apiserver',
namespace: $._config.namespace,
namespace: p.namespace,
labels: {
'k8s-app': 'apiserver',
},
@@ -377,7 +404,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
serverName: 'kubernetes',
},
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
metricRelabelings: [
metricRelabelings: (import 'kube-prometheus/dropping-deprecated-metrics-relabelings.libsonnet') + [
{
sourceLabels: ['__name__'],
regex: 'etcd_(debugging|disk|request|server).*',
@@ -393,6 +420,11 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
regex: 'apiserver_admission_step_admission_latencies_seconds_.*',
action: 'drop',
},
{
sourceLabels: ['__name__', 'le'],
regex: 'apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)',
action: 'drop',
},
],
},
],
@@ -404,7 +436,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
kind: 'ServiceMonitor',
metadata: {
name: 'coredns',
namespace: $._config.namespace,
namespace: p.namespace,
labels: {
'k8s-app': 'coredns',
},

View File

@@ -0,0 +1,19 @@
{
prometheusRules+:: {
groups+: [
{
name: 'kube-prometheus-general.rules',
rules: [
{
expr: 'count without(instance, pod, node) (up == 1)',
record: 'count:up1',
},
{
expr: 'count without(instance, pod, node) (up == 0)',
record: 'count:up0',
},
],
},
],
},
}

View File

@@ -1 +1,2 @@
(import 'node-rules.libsonnet')
(import 'node-rules.libsonnet') +
(import 'general.libsonnet')

View File

@@ -1,13 +1,13 @@
{
"dependencies": [
{
"name": "kube-prometheus",
"source": {
"local": {
"directory": "jsonnet/kube-prometheus"
}
},
"version": ""
"dependencies": [
{
"name": "kube-prometheus",
"source": {
"local": {
"directory": "jsonnet/kube-prometheus"
}
]
},
"version": ""
}
]
}

View File

@@ -1,83 +1,134 @@
{
"dependencies": [
{
"name": "kube-prometheus",
"source": {
"local": {
"directory": "jsonnet/kube-prometheus"
}
},
"version": ""
},
{
"name": "ksonnet",
"source": {
"git": {
"remote": "https://github.com/ksonnet/ksonnet-lib",
"subdir": ""
}
},
"version": "0d2f82676817bbf9e4acf6495b2090205f323b9f"
},
{
"name": "kubernetes-mixin",
"source": {
"git": {
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
"subdir": ""
}
},
"version": "d0e069002ba767676145fe5e29325720669499c6"
},
{
"name": "grafonnet",
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib",
"subdir": "grafonnet"
}
},
"version": "47db72da03fc4a7a0658a87791e13c3315a3a252"
},
{
"name": "grafana-builder",
"source": {
"git": {
"remote": "https://github.com/kausalco/public",
"subdir": "grafana-builder"
}
},
"version": "3fe9a46d5fe0b70cbcabec1d2054f8ac3b3faae7"
},
{
"name": "grafana",
"source": {
"git": {
"remote": "https://github.com/brancz/kubernetes-grafana",
"subdir": "grafana"
}
},
"version": "a5c2b4da6ca92064604d5a8a893dec07ddead136"
},
{
"name": "prometheus-operator",
"source": {
"git": {
"remote": "https://github.com/coreos/prometheus-operator",
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "18fbf558ab7f8809fd610a3dc50bf483508dc1bb"
},
{
"name": "etcd-mixin",
"source": {
"git": {
"remote": "https://github.com/coreos/etcd",
"subdir": "Documentation/etcd-mixin"
}
},
"version": "9c48dfabff597b086cc98f34c960b77a0c569551"
"dependencies": [
{
"name": "etcd-mixin",
"source": {
"git": {
"remote": "https://github.com/coreos/etcd",
"subdir": "Documentation/etcd-mixin"
}
]
},
"version": "5770a6d286fe48682e29b54ce0df37e7d24b3280",
"sum": "Ko3qhNfC2vN/houLh6C0Ryacjv70gl0DVPGU/PQ4OD0="
},
{
"name": "grafana",
"source": {
"git": {
"remote": "https://github.com/brancz/kubernetes-grafana",
"subdir": "grafana"
}
},
"version": "539a90dbf63c812ad0194d8078dd776868a11c81",
"sum": "b8faWX1qqLGyN67sA36oRqYZ5HX+tHBRMPtrWRqIysE="
},
{
"name": "grafana-builder",
"source": {
"git": {
"remote": "https://github.com/grafana/jsonnet-libs",
"subdir": "grafana-builder"
}
},
"version": "676ff4b4fe9135f85a5d6e30523d64d2d3713087",
"sum": "ELsYwK+kGdzX1mee2Yy+/b2mdO4Y503BOCDkFzwmGbE="
},
{
"name": "grafonnet",
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib",
"subdir": "grafonnet"
}
},
"version": "f3ee1d810858cf556d25f045b53cb0f1fd10b94e",
"sum": "14YBZUP/cl8qi9u86xiuUS4eXQrEAam+4GSg6i9n9Ys="
},
{
"name": "ksonnet",
"source": {
"git": {
"remote": "https://github.com/ksonnet/ksonnet-lib",
"subdir": ""
}
},
"version": "0d2f82676817bbf9e4acf6495b2090205f323b9f",
"sum": "h28BXZ7+vczxYJ2sCt8JuR9+yznRtU/iA6DCpQUrtEg="
},
{
"name": "kube-prometheus",
"source": {
"local": {
"directory": "jsonnet/kube-prometheus"
}
},
"version": ""
},
{
"name": "kubernetes-mixin",
"source": {
"git": {
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
"subdir": ""
}
},
"version": "68f82d2a428d91df57e9af43739981a6a8ede897",
"sum": "J/tuXi0Z8GRHo63pM17YFIyk4QgkFuMcQ20mAxi1flM="
},
{
"name": "node-mixin",
"source": {
"git": {
"remote": "https://github.com/prometheus/node_exporter",
"subdir": "docs/node-mixin"
}
},
"version": "2cae917bb7e0b6379221e8a24da012b16e63d661",
"sum": "7vEamDTP9AApeiF4Zu9ZyXzDIs3rYHzwf9k7g8X+wsg="
},
{
"name": "prometheus",
"source": {
"git": {
"remote": "https://github.com/prometheus/prometheus",
"subdir": "documentation/prometheus-mixin"
}
},
"version": "31700a05df64c2b4e32bb0ecd8baa25279144778",
"sum": "/cohvDTaIiLElG66tKeQsi4v1M9mlGDKjOBSWivL9TU="
},
{
"name": "prometheus-operator",
"source": {
"git": {
"remote": "https://github.com/coreos/prometheus-operator",
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "8d44e0990230144177f97cf62ae4f43b1c4e3168",
"sum": "5U7/8MD3pF9O0YDTtUhg4vctkUBRVFxZxWUyhtNiBM8="
},
{
"name": "promgrafonnet",
"source": {
"git": {
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
"subdir": "lib/promgrafonnet"
}
},
"version": "a7ee9d1abe1b1a3670a02ede1135cadb660b9d0c",
"sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc="
},
{
"name": "slo-libsonnet",
"source": {
"git": {
"remote": "https://github.com/metalmatze/slo-libsonnet",
"subdir": "slo-libsonnet"
}
},
"version": "437c402c5f3ad86c3c16db8471f1649284fef0ee",
"sum": "2Zcyku1f558VrUpMaJnI78fahDksPLcS1idmxxwcQ7Q="
}
]
}

View File

@@ -1,17 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ./manifests/00namespace-namespace.yaml
- ./manifests/0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml
- ./manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml
- ./manifests/0prometheus-operator-0prometheusruleCustomResourceDefinition.yaml
- ./manifests/0prometheus-operator-0servicemonitorCustomResourceDefinition.yaml
- ./manifests/0prometheus-operator-clusterRole.yaml
- ./manifests/0prometheus-operator-clusterRoleBinding.yaml
- ./manifests/0prometheus-operator-deployment.yaml
- ./manifests/0prometheus-operator-service.yaml
- ./manifests/0prometheus-operator-serviceAccount.yaml
- ./manifests/0prometheus-operator-serviceMonitor.yaml
- ./manifests/alertmanager-alertmanager.yaml
- ./manifests/alertmanager-secret.yaml
- ./manifests/alertmanager-service.yaml
@@ -51,6 +40,7 @@ resources:
- ./manifests/prometheus-adapter-serviceAccount.yaml
- ./manifests/prometheus-clusterRole.yaml
- ./manifests/prometheus-clusterRoleBinding.yaml
- ./manifests/prometheus-operator-serviceMonitor.yaml
- ./manifests/prometheus-prometheus.yaml
- ./manifests/prometheus-roleBindingConfig.yaml
- ./manifests/prometheus-roleBindingSpecificNamespaces.yaml
@@ -65,3 +55,14 @@ resources:
- ./manifests/prometheus-serviceMonitorKubeControllerManager.yaml
- ./manifests/prometheus-serviceMonitorKubeScheduler.yaml
- ./manifests/prometheus-serviceMonitorKubelet.yaml
- ./manifests/setup/0namespace-namespace.yaml
- ./manifests/setup/prometheus-operator-0alertmanagerCustomResourceDefinition.yaml
- ./manifests/setup/prometheus-operator-0podmonitorCustomResourceDefinition.yaml
- ./manifests/setup/prometheus-operator-0prometheusCustomResourceDefinition.yaml
- ./manifests/setup/prometheus-operator-0prometheusruleCustomResourceDefinition.yaml
- ./manifests/setup/prometheus-operator-0servicemonitorCustomResourceDefinition.yaml
- ./manifests/setup/prometheus-operator-clusterRole.yaml
- ./manifests/setup/prometheus-operator-clusterRoleBinding.yaml
- ./manifests/setup/prometheus-operator-deployment.yaml
- ./manifests/setup/prometheus-operator-service.yaml
- ./manifests/setup/prometheus-operator-serviceAccount.yaml

View File

@@ -1,18 +0,0 @@
apiVersion: v1
kind: Service
metadata:
labels:
apps.kubernetes.io/component: controller
apps.kubernetes.io/name: prometheus-operator
apps.kubernetes.io/version: v0.30.0
name: prometheus-operator
namespace: monitoring
spec:
clusterIP: None
ports:
- name: http
port: 8080
targetPort: http
selector:
apps.kubernetes.io/component: controller
apps.kubernetes.io/name: prometheus-operator

View File

@@ -1,9 +0,0 @@
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
apps.kubernetes.io/component: controller
apps.kubernetes.io/name: prometheus-operator
apps.kubernetes.io/version: v0.30.0
name: prometheus-operator
namespace: monitoring

View File

@@ -1,18 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
apps.kubernetes.io/component: controller
apps.kubernetes.io/name: prometheus-operator
apps.kubernetes.io/version: v0.30.0
name: prometheus-operator
namespace: monitoring
spec:
endpoints:
- honorLabels: true
port: http
selector:
matchLabels:
apps.kubernetes.io/component: controller
apps.kubernetes.io/name: prometheus-operator
apps.kubernetes.io/version: v0.30.0

View File

@@ -8,11 +8,11 @@ metadata:
spec:
baseImage: quay.io/prometheus/alertmanager
nodeSelector:
beta.kubernetes.io/os: linux
kubernetes.io/os: linux
replicas: 3
securityContext:
fsGroup: 2000
runAsNonRoot: true
runAsUser: 1000
serviceAccountName: alertmanager-main
version: v0.17.0
version: v0.18.0

View File

@@ -1,6 +1,6 @@
apiVersion: v1
data:
alertmanager.yaml: Imdsb2JhbCI6CiAgInJlc29sdmVfdGltZW91dCI6ICI1bSIKInJlY2VpdmVycyI6Ci0gIm5hbWUiOiAibnVsbCIKInJvdXRlIjoKICAiZ3JvdXBfYnkiOgogIC0gImpvYiIKICAiZ3JvdXBfaW50ZXJ2YWwiOiAiNW0iCiAgImdyb3VwX3dhaXQiOiAiMzBzIgogICJyZWNlaXZlciI6ICJudWxsIgogICJyZXBlYXRfaW50ZXJ2YWwiOiAiMTJoIgogICJyb3V0ZXMiOgogIC0gIm1hdGNoIjoKICAgICAgImFsZXJ0bmFtZSI6ICJXYXRjaGRvZyIKICAgICJyZWNlaXZlciI6ICJudWxsIg==
alertmanager.yaml: Imdsb2JhbCI6CiAgInJlc29sdmVfdGltZW91dCI6ICI1bSIKInJlY2VpdmVycyI6Ci0gIm5hbWUiOiAibnVsbCIKInJvdXRlIjoKICAiZ3JvdXBfYnkiOgogIC0gIm5hbWVzcGFjZSIKICAiZ3JvdXBfaW50ZXJ2YWwiOiAiNW0iCiAgImdyb3VwX3dhaXQiOiAiMzBzIgogICJyZWNlaXZlciI6ICJudWxsIgogICJyZXBlYXRfaW50ZXJ2YWwiOiAiMTJoIgogICJyb3V0ZXMiOgogIC0gIm1hdGNoIjoKICAgICAgImFsZXJ0bmFtZSI6ICJXYXRjaGRvZyIKICAgICJyZWNlaXZlciI6ICJudWxsIg==
kind: Secret
metadata:
name: alertmanager-main

File diff suppressed because it is too large Load Diff

View File

@@ -16,7 +16,7 @@ spec:
app: grafana
spec:
containers:
- image: grafana/grafana:6.0.1
- image: grafana/grafana:6.4.3
name: grafana
ports:
- containerPort: 3000
@@ -42,11 +42,14 @@ spec:
- mountPath: /etc/grafana/provisioning/dashboards
name: grafana-dashboards
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-cluster-rsrc-use
name: grafana-dashboard-k8s-cluster-rsrc-use
- mountPath: /grafana-dashboard-definitions/0/apiserver
name: grafana-dashboard-apiserver
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-node-rsrc-use
name: grafana-dashboard-k8s-node-rsrc-use
- mountPath: /grafana-dashboard-definitions/0/cluster-total
name: grafana-dashboard-cluster-total
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/controller-manager
name: grafana-dashboard-controller-manager
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-cluster
name: grafana-dashboard-k8s-resources-cluster
@@ -54,6 +57,9 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-namespace
name: grafana-dashboard-k8s-resources-namespace
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-node
name: grafana-dashboard-k8s-resources-node
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-pod
name: grafana-dashboard-k8s-resources-pod
readOnly: false
@@ -63,18 +69,51 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/k8s-resources-workloads-namespace
name: grafana-dashboard-k8s-resources-workloads-namespace
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/kubelet
name: grafana-dashboard-kubelet
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/namespace-by-pod
name: grafana-dashboard-namespace-by-pod
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/namespace-by-workload
name: grafana-dashboard-namespace-by-workload
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/node-cluster-rsrc-use
name: grafana-dashboard-node-cluster-rsrc-use
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/node-rsrc-use
name: grafana-dashboard-node-rsrc-use
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/nodes
name: grafana-dashboard-nodes
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/persistentvolumesusage
name: grafana-dashboard-persistentvolumesusage
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/pod-total
name: grafana-dashboard-pod-total
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/pods
name: grafana-dashboard-pods
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/prometheus-remote-write
name: grafana-dashboard-prometheus-remote-write
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/prometheus
name: grafana-dashboard-prometheus
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/proxy
name: grafana-dashboard-proxy
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/scheduler
name: grafana-dashboard-scheduler
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/statefulset
name: grafana-dashboard-statefulset
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/workload-total
name: grafana-dashboard-workload-total
readOnly: false
nodeSelector:
beta.kubernetes.io/os: linux
securityContext:
@@ -91,17 +130,23 @@ spec:
name: grafana-dashboards
name: grafana-dashboards
- configMap:
name: grafana-dashboard-k8s-cluster-rsrc-use
name: grafana-dashboard-k8s-cluster-rsrc-use
name: grafana-dashboard-apiserver
name: grafana-dashboard-apiserver
- configMap:
name: grafana-dashboard-k8s-node-rsrc-use
name: grafana-dashboard-k8s-node-rsrc-use
name: grafana-dashboard-cluster-total
name: grafana-dashboard-cluster-total
- configMap:
name: grafana-dashboard-controller-manager
name: grafana-dashboard-controller-manager
- configMap:
name: grafana-dashboard-k8s-resources-cluster
name: grafana-dashboard-k8s-resources-cluster
- configMap:
name: grafana-dashboard-k8s-resources-namespace
name: grafana-dashboard-k8s-resources-namespace
- configMap:
name: grafana-dashboard-k8s-resources-node
name: grafana-dashboard-k8s-resources-node
- configMap:
name: grafana-dashboard-k8s-resources-pod
name: grafana-dashboard-k8s-resources-pod
@@ -111,15 +156,48 @@ spec:
- configMap:
name: grafana-dashboard-k8s-resources-workloads-namespace
name: grafana-dashboard-k8s-resources-workloads-namespace
- configMap:
name: grafana-dashboard-kubelet
name: grafana-dashboard-kubelet
- configMap:
name: grafana-dashboard-namespace-by-pod
name: grafana-dashboard-namespace-by-pod
- configMap:
name: grafana-dashboard-namespace-by-workload
name: grafana-dashboard-namespace-by-workload
- configMap:
name: grafana-dashboard-node-cluster-rsrc-use
name: grafana-dashboard-node-cluster-rsrc-use
- configMap:
name: grafana-dashboard-node-rsrc-use
name: grafana-dashboard-node-rsrc-use
- configMap:
name: grafana-dashboard-nodes
name: grafana-dashboard-nodes
- configMap:
name: grafana-dashboard-persistentvolumesusage
name: grafana-dashboard-persistentvolumesusage
- configMap:
name: grafana-dashboard-pod-total
name: grafana-dashboard-pod-total
- configMap:
name: grafana-dashboard-pods
name: grafana-dashboard-pods
- configMap:
name: grafana-dashboard-prometheus-remote-write
name: grafana-dashboard-prometheus-remote-write
- configMap:
name: grafana-dashboard-prometheus
name: grafana-dashboard-prometheus
- configMap:
name: grafana-dashboard-proxy
name: grafana-dashboard-proxy
- configMap:
name: grafana-dashboard-scheduler
name: grafana-dashboard-scheduler
- configMap:
name: grafana-dashboard-statefulset
name: grafana-dashboard-statefulset
- configMap:
name: grafana-dashboard-workload-total
name: grafana-dashboard-workload-total

View File

@@ -27,6 +27,7 @@ rules:
- daemonsets
- deployments
- replicasets
- ingresses
verbs:
- list
- watch
@@ -74,3 +75,33 @@ rules:
verbs:
- list
- watch
- apiGroups:
- certificates.k8s.io
resources:
- certificatesigningrequests
verbs:
- list
- watch
- apiGroups:
- storage.k8s.io
resources:
- storageclasses
- volumeattachments
verbs:
- list
- watch
- apiGroups:
- admissionregistration.k8s.io
resources:
- validatingwebhookconfigurations
- mutatingwebhookconfigurations
verbs:
- list
- watch
- apiGroups:
- networking.k8s.io
resources:
- networkpolicies
verbs:
- list
- watch

View File

@@ -1,4 +1,4 @@
apiVersion: apps/v1beta2
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
@@ -55,7 +55,7 @@ spec:
- --port=8081
- --telemetry-host=127.0.0.1
- --telemetry-port=8082
image: quay.io/coreos/kube-state-metrics:v1.5.0
image: quay.io/coreos/kube-state-metrics:v1.9.2
name: kube-state-metrics
resources:
limits:
@@ -64,37 +64,8 @@ spec:
requests:
cpu: 100m
memory: 150Mi
- command:
- /pod_nanny
- --container=kube-state-metrics
- --cpu=100m
- --extra-cpu=2m
- --memory=150Mi
- --extra-memory=30Mi
- --threshold=5
- --deployment=kube-state-metrics
env:
- name: MY_POD_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
- name: MY_POD_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
image: k8s.gcr.io/addon-resizer:1.8.4
name: addon-resizer
resources:
limits:
cpu: 50m
memory: 30Mi
requests:
cpu: 10m
memory: 30Mi
nodeSelector:
beta.kubernetes.io/os: linux
kubernetes.io/os: linux
securityContext:
runAsNonRoot: true
runAsUser: 65534

View File

@@ -11,6 +11,9 @@ spec:
honorLabels: true
interval: 30s
port: https-main
relabelings:
- action: labeldrop
regex: (pod|service|endpoint|namespace)
scheme: https
scrapeTimeout: 30s
tlsConfig:

View File

@@ -1,4 +1,4 @@
apiVersion: apps/v1beta2
apiVersion: apps/v1
kind: DaemonSet
metadata:
labels:
@@ -20,9 +20,11 @@ spec:
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
- --path.rootfs=/host/root
- --no-collector.wifi
- --no-collector.hwmon
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/)
- --collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$
image: quay.io/prometheus/node-exporter:v0.17.0
image: quay.io/prometheus/node-exporter:v0.18.1
name: node-exporter
resources:
limits:
@@ -44,7 +46,7 @@ spec:
readOnly: true
- args:
- --logtostderr
- --secure-listen-address=$(IP):9100
- --secure-listen-address=[$(IP)]:9100
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256
- --upstream=http://127.0.0.1:9100/
env:
@@ -68,7 +70,7 @@ spec:
hostNetwork: true
hostPID: true
nodeSelector:
beta.kubernetes.io/os: linux
kubernetes.io/os: linux
securityContext:
runAsNonRoot: true
runAsUser: 65534

View File

@@ -8,8 +8,15 @@ metadata:
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 30s
interval: 15s
port: https
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: instance
scheme: https
tlsConfig:
insecureSkipVerify: true

View File

@@ -11,6 +11,7 @@ rules:
- metrics.k8s.io
resources:
- pods
- nodes
verbs:
- get
- list

View File

@@ -3,30 +3,30 @@ data:
config.yaml: |
resourceRules:
cpu:
containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container_name!="POD",container_name!="",pod_name!=""}[1m])) by (<<.GroupBy>>)
nodeQuery: sum(1 - rate(node_cpu_seconds_total{mode="idle"}[1m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)
containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}[5m])) by (<<.GroupBy>>)
nodeQuery: sum(1 - rate(node_cpu_seconds_total{mode="idle"}[5m]) * on(namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{<<.LabelMatchers>>}) by (<<.GroupBy>>)
resources:
overrides:
node:
resource: node
namespace:
resource: namespace
pod_name:
pod:
resource: pod
containerLabel: container_name
containerLabel: container
memory:
containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container_name!="POD",container_name!="",pod_name!=""}) by (<<.GroupBy>>)
nodeQuery: sum(node:node_memory_bytes_total:sum{<<.LabelMatchers>>} - node:node_memory_bytes_available:sum{<<.LabelMatchers>>}) by (<<.GroupBy>>)
containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,container!="POD",container!="",pod!=""}) by (<<.GroupBy>>)
nodeQuery: sum(node_memory_MemTotal_bytes{job="node-exporter",<<.LabelMatchers>>} - node_memory_MemAvailable_bytes{job="node-exporter",<<.LabelMatchers>>}) by (<<.GroupBy>>)
resources:
overrides:
node:
instance:
resource: node
namespace:
resource: namespace
pod_name:
pod:
resource: pod
containerLabel: container_name
window: 1m
containerLabel: container
window: 5m
kind: ConfigMap
metadata:
name: adapter-config

View File

@@ -1,4 +1,4 @@
apiVersion: apps/v1beta2
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-adapter
@@ -25,7 +25,7 @@ spec:
- --metrics-relist-interval=1m
- --prometheus-url=http://prometheus-k8s.monitoring.svc:9090/
- --secure-port=6443
image: quay.io/coreos/k8s-prometheus-adapter-amd64:v0.4.1
image: quay.io/coreos/k8s-prometheus-adapter-amd64:v0.5.0
name: prometheus-adapter
ports:
- containerPort: 6443
@@ -40,7 +40,7 @@ spec:
name: config
readOnly: false
nodeSelector:
beta.kubernetes.io/os: linux
kubernetes.io/os: linux
serviceAccountName: prometheus-adapter
volumes:
- emptyDir: {}

View File

@@ -0,0 +1,18 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.34.0
name: prometheus-operator
namespace: monitoring
spec:
endpoints:
- honorLabels: true
port: http
selector:
matchLabels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.34.0

View File

@@ -13,7 +13,9 @@ spec:
port: web
baseImage: quay.io/prometheus/prometheus
nodeSelector:
beta.kubernetes.io/os: linux
kubernetes.io/os: linux
podMonitorNamespaceSelector: {}
podMonitorSelector: {}
replicas: 2
resources:
requests:
@@ -29,4 +31,4 @@ spec:
serviceAccountName: prometheus-k8s
serviceMonitorNamespaceSelector: {}
serviceMonitorSelector: {}
version: v2.7.2
version: v2.11.0

File diff suppressed because it is too large Load Diff

View File

@@ -10,6 +10,38 @@ spec:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
interval: 30s
metricRelabelings:
- action: drop
regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
sourceLabels:
- __name__
- action: drop
regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
sourceLabels:
- __name__
- action: drop
regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
sourceLabels:
- __name__
- action: drop
regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
sourceLabels:
- __name__
- action: drop
regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
sourceLabels:
- __name__
- action: drop
regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
sourceLabels:
- __name__
- action: drop
regex: transformation_(transformation_latencies_microseconds|failures_total)
sourceLabels:
- __name__
- action: drop
regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
sourceLabels:
- __name__
- action: drop
regex: etcd_(debugging|disk|request|server).*
sourceLabels:
@@ -22,6 +54,11 @@ spec:
regex: apiserver_admission_step_admission_latencies_seconds_.*
sourceLabels:
- __name__
- action: drop
regex: apiserver_request_duration_seconds_bucket;(0.15|0.25|0.3|0.35|0.4|0.45|0.6|0.7|0.8|0.9|1.25|1.5|1.75|2.5|3|3.5|4.5|6|7|8|9|15|25|30|50)
sourceLabels:
- __name__
- le
port: https
scheme: https
tlsConfig:

View File

@@ -9,6 +9,38 @@ spec:
endpoints:
- interval: 30s
metricRelabelings:
- action: drop
regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
sourceLabels:
- __name__
- action: drop
regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
sourceLabels:
- __name__
- action: drop
regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
sourceLabels:
- __name__
- action: drop
regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
sourceLabels:
- __name__
- action: drop
regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
sourceLabels:
- __name__
- action: drop
regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
sourceLabels:
- __name__
- action: drop
regex: transformation_(transformation_latencies_microseconds|failures_total)
sourceLabels:
- __name__
- action: drop
regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
sourceLabels:
- __name__
- action: drop
regex: etcd_(debugging|disk|request|server).*
sourceLabels:

View File

@@ -10,7 +10,44 @@ spec:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
honorLabels: true
interval: 30s
metricRelabelings:
- action: drop
regex: kubelet_(pod_worker_latency_microseconds|pod_start_latency_microseconds|cgroup_manager_latency_microseconds|pod_worker_start_latency_microseconds|pleg_relist_latency_microseconds|pleg_relist_interval_microseconds|runtime_operations|runtime_operations_latency_microseconds|runtime_operations_errors|eviction_stats_age_microseconds|device_plugin_registration_count|device_plugin_alloc_latency_microseconds|network_plugin_operations_latency_microseconds)
sourceLabels:
- __name__
- action: drop
regex: scheduler_(e2e_scheduling_latency_microseconds|scheduling_algorithm_predicate_evaluation|scheduling_algorithm_priority_evaluation|scheduling_algorithm_preemption_evaluation|scheduling_algorithm_latency_microseconds|binding_latency_microseconds|scheduling_latency_seconds)
sourceLabels:
- __name__
- action: drop
regex: apiserver_(request_count|request_latencies|request_latencies_summary|dropped_requests|storage_data_key_generation_latencies_microseconds|storage_transformation_failures_total|storage_transformation_latencies_microseconds|proxy_tunnel_sync_latency_secs)
sourceLabels:
- __name__
- action: drop
regex: kubelet_docker_(operations|operations_latency_microseconds|operations_errors|operations_timeout)
sourceLabels:
- __name__
- action: drop
regex: reflector_(items_per_list|items_per_watch|list_duration_seconds|lists_total|short_watches_total|watch_duration_seconds|watches_total)
sourceLabels:
- __name__
- action: drop
regex: etcd_(helper_cache_hit_count|helper_cache_miss_count|helper_cache_entry_count|request_cache_get_latencies_summary|request_cache_add_latencies_summary|request_latencies_summary)
sourceLabels:
- __name__
- action: drop
regex: transformation_(transformation_latencies_microseconds|failures_total)
sourceLabels:
- __name__
- action: drop
regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
sourceLabels:
- __name__
port: https-metrics
relabelings:
- sourceLabels:
- __metrics_path__
targetLabel: metrics_path
scheme: https
tlsConfig:
insecureSkipVerify: true
@@ -24,6 +61,10 @@ spec:
- __name__
path: /metrics/cadvisor
port: https-metrics
relabelings:
- sourceLabels:
- __metrics_path__
targetLabel: metrics_path
scheme: https
tlsConfig:
insecureSkipVerify: true

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,239 @@
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
creationTimestamp: null
name: podmonitors.monitoring.coreos.com
spec:
group: monitoring.coreos.com
names:
kind: PodMonitor
plural: podmonitors
scope: Namespaced
validation:
openAPIV3Schema:
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
spec:
description: PodMonitorSpec contains specification parameters for a PodMonitor.
properties:
jobLabel:
description: The label to use to retrieve the job name from.
type: string
namespaceSelector:
description: NamespaceSelector is a selector for selecting either all
namespaces or a list of namespaces.
properties:
any:
description: Boolean describing whether all namespaces are selected
in contrast to a list restricting them.
type: boolean
matchNames:
description: List of namespace names.
items:
type: string
type: array
type: object
podMetricsEndpoints:
description: A list of endpoints allowed as part of this PodMonitor.
items:
description: PodMetricsEndpoint defines a scrapeable endpoint of a
Kubernetes Pod serving Prometheus metrics.
properties:
honorLabels:
description: HonorLabels chooses the metric's labels on collisions
with target labels.
type: boolean
honorTimestamps:
description: HonorTimestamps controls whether Prometheus respects
the timestamps present in scraped data.
type: boolean
interval:
description: Interval at which metrics should be scraped
type: string
metricRelabelings:
description: MetricRelabelConfigs to apply to samples before ingestion.
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It defines
`<metric_relabel_configs>`-section of Prometheus configuration.
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
description: Action to perform based on regex matching.
Default is 'replace'
type: string
modulus:
description: Modulus to take of the hash of the source label
values.
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. defailt is '(.*)'
type: string
replacement:
description: Replacement value against which a regex replace
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular expression
for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string
type: object
type: array
params:
description: Optional HTTP URL parameters
type: object
path:
description: HTTP path to scrape for metrics.
type: string
port:
description: Name of the port this endpoint refers to. Mutually
exclusive with targetPort.
type: string
proxyUrl:
description: ProxyURL eg http://proxyserver:2195 Directs scrapes
to proxy through this endpoint.
type: string
relabelings:
description: 'RelabelConfigs to apply to samples before ingestion.
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items:
description: 'RelabelConfig allows dynamic rewriting of the
label set, being applied to samples before ingestion. It defines
`<metric_relabel_configs>`-section of Prometheus configuration.
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties:
action:
description: Action to perform based on regex matching.
Default is 'replace'
type: string
modulus:
description: Modulus to take of the hash of the source label
values.
format: int64
type: integer
regex:
description: Regular expression against which the extracted
value is matched. defailt is '(.*)'
type: string
replacement:
description: Replacement value against which a regex replace
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
type: string
separator:
description: Separator placed between concatenated source
label values. default is ';'.
type: string
sourceLabels:
description: The source labels select values from existing
labels. Their content is concatenated using the configured
separator and matched against the configured regular expression
for the replace, keep, and drop actions.
items:
type: string
type: array
targetLabel:
description: Label to which the resulting value is written
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string
type: object
type: array
scheme:
description: HTTP scheme to use for scraping.
type: string
scrapeTimeout:
description: Timeout after which the scrape is ended
type: string
targetPort:
anyOf:
- type: string
- type: integer
type: object
type: array
podTargetLabels:
description: PodTargetLabels transfers labels on the Kubernetes Pod
onto the target.
items:
type: string
type: array
sampleLimit:
description: SampleLimit defines per-scrape limit on number of scraped
samples that will be accepted.
format: int64
type: integer
selector:
description: A label selector is a label query over a set of resources.
The result of matchLabels and matchExpressions are ANDed. An empty
label selector matches all objects. A null label selector matches
no objects.
properties:
matchExpressions:
description: matchExpressions is a list of label selector requirements.
The requirements are ANDed.
items:
description: A label selector requirement is a selector that contains
values, a key, and an operator that relates the key and values.
properties:
key:
description: key is the label key that the selector applies
to.
type: string
operator:
description: operator represents a key's relationship to a
set of values. Valid operators are In, NotIn, Exists and
DoesNotExist.
type: string
values:
description: values is an array of string values. If the operator
is In or NotIn, the values array must be non-empty. If the
operator is Exists or DoesNotExist, the values array must
be empty. This array is replaced during a strategic merge
patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
description: matchLabels is a map of {key,value} pairs. A single
{key,value} in the matchLabels map is equivalent to an element
of matchExpressions, whose key field is "key", the operator is
"In", and the values array contains only "value". The requirements
are ANDed.
type: object
type: object
required:
- podMetricsEndpoints
- selector
type: object
type: object
version: v1

View File

@@ -15,12 +15,12 @@ spec:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
description: ObjectMeta is metadata that all persisted resources must have,
@@ -70,179 +70,26 @@ spec:
If this field is specified and the generated name exists, the server will NOT return a 409 - instead, it will either return 201 Created or 500 with Reason ServerTimeout indicating a unique name could not be found in the time allotted, and the client should retry (optionally after the time indicated in the Retry-After header).
Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#idempotency
Applied only if Name is not specified. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#idempotency
type: string
generation:
description: A sequence number representing a specific generation of
the desired state. Populated by the system. Read-only.
format: int64
type: integer
initializers:
description: Initializers tracks the progress of initialization.
properties:
pending:
description: Pending is a list of initializers that must execute
in order before this object is visible. When the last pending
initializer is removed, and no failing result is set, the initializers
struct will be set to nil and the object is considered as initialized
and visible to all clients.
items:
description: Initializer is information about an initializer that
has not yet completed.
properties:
name:
description: name of the process that is responsible for initializing
this object.
type: string
required:
- name
type: object
type: array
result:
description: Status is a return value for calls that don't return
other objects.
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this
representation of an object. Servers should convert recognized
schemas to the latest internal value, and may reject unrecognized
values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
type: string
code:
description: Suggested HTTP return code for this status, 0 if
not set.
format: int32
type: integer
details:
description: StatusDetails is a set of additional properties
that MAY be set by the server to provide additional information
about a response. The Reason field of a Status object defines
what attributes will be set. Clients must ignore fields that
do not match the defined type of each attribute, and should
assume that any attribute may be empty, invalid, or under
defined.
properties:
causes:
description: The Causes array includes more details associated
with the StatusReason failure. Not all StatusReasons may
provide detailed causes.
items:
description: StatusCause provides more information about
an api.Status failure, including cases when multiple
errors are encountered.
properties:
field:
description: |-
The field of the resource that has caused this error, as named by its JSON serialization. May include dot and postfix notation for nested attributes. Arrays are zero-indexed. Fields may appear more than once in an array of causes due to fields having multiple errors. Optional.
Examples:
"name" - the field "name" on the current resource
"items[0].name" - the field "name" on the first array entry in "items"
type: string
message:
description: A human-readable description of the cause
of the error. This field may be presented as-is
to a reader.
type: string
reason:
description: A machine-readable description of the
cause of the error. If this value is empty there
is no information available.
type: string
type: object
type: array
group:
description: The group attribute of the resource associated
with the status StatusReason.
type: string
kind:
description: 'The kind attribute of the resource associated
with the status StatusReason. On some operations may differ
from the requested resource Kind. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
type: string
name:
description: The name attribute of the resource associated
with the status StatusReason (when there is a single name
which can be described).
type: string
retryAfterSeconds:
description: If specified, the time in seconds before the
operation should be retried. Some errors may indicate
the client must take an alternate action - for those errors
this field may indicate how long to wait before taking
the alternate action.
format: int32
type: integer
uid:
description: 'UID of the resource. (when there is a single
resource which can be described). More info: http://kubernetes.io/docs/user-guide/identifiers#uids'
type: string
type: object
kind:
description: 'Kind is a string value representing the REST resource
this object represents. Servers may infer this from the endpoint
the client submits requests to. Cannot be updated. In CamelCase.
More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
type: string
message:
description: A human-readable description of the status of this
operation.
type: string
metadata:
description: ListMeta describes metadata that synthetic resources
must have, including lists and various status objects. A resource
may have only one of {ObjectMeta, ListMeta}.
properties:
continue:
description: continue may be set if the user set a limit
on the number of items returned, and indicates that the
server has more data available. The value is opaque and
may be used to issue another request to the endpoint that
served this list to retrieve the next set of available
objects. Continuing a consistent list may not be possible
if the server configuration has changed or more than a
few minutes have passed. The resourceVersion field returned
when using this continue value will be identical to the
value in the first response, unless you have received
this token from an error message.
type: string
resourceVersion:
description: 'String that identifies the server''s internal
version of this object that can be used by clients to
determine when objects have changed. Value must be treated
as opaque by clients and passed unmodified back to the
server. Populated by the system. Read-only. More info:
https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency'
type: string
selfLink:
description: selfLink is a URL representing this object.
Populated by the system. Read-only.
type: string
type: object
reason:
description: A machine-readable description of why this operation
is in the "Failure" status. If this value is empty there is
no information available. A Reason clarifies an HTTP status
code but does not override it.
type: string
status:
description: 'Status of the operation. One of: "Success" or
"Failure". More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status'
type: string
type: object
required:
- pending
type: object
labels:
description: 'Map of string keys and values that can be used to organize
and categorize (scope and select) objects. May match selectors of
replication controllers and services. More info: http://kubernetes.io/docs/user-guide/labels'
type: object
managedFields:
description: |-
ManagedFields maps workflow-id and version to the set of fields that are managed by that workflow. This is mostly for internal housekeeping, and users typically shouldn't need to set or understand this field. A workflow can be the user's name, a controller's name, or the name of a specific apply path like "ci-cd". The set of fields is always in the version that the workflow used when modifying the object.
This field is alpha and can be changed or removed without notice.
description: ManagedFields maps workflow-id and version to the set of
fields that are managed by that workflow. This is mostly for internal
housekeeping, and users typically shouldn't need to set or understand
this field. A workflow can be the user's name, a controller's name,
or the name of a specific apply path like "ci-cd". The set of fields
is always in the version that the workflow used when modifying the
object.
items:
description: ManagedFieldsEntry is a workflow-id, a FieldSet and the
group version of the resource that the fieldset applies to.
@@ -254,9 +101,18 @@ spec:
the version of a field set because it cannot be automatically
converted.
type: string
fields:
description: 'Fields stores a set of fields in a data structure
like a Trie. To understand how this is used, see: https://github.com/kubernetes-sigs/structured-merge-diff'
fieldsType:
description: 'FieldsType is the discriminator for the different
fields format and version. There is currently only one possible
value: "FieldsV1"'
type: string
fieldsV1:
description: |-
FieldsV1 stores a set of fields in a data structure like a Trie, in JSON format.
Each key is either a '.' representing the field itself, and will always map to an empty set, or a string representing a sub-field or item. The string will follow one of these four formats: 'f:<name>', where <name> is the name of a field in a struct, or key in a map 'v:<value>', where <value> is the exact json formatted value of a list item 'i:<index>', where <index> is position of a item in a list 'k:<keys>', where <keys> is a map of a list item's key fields to their unique values If a key maps to an empty Fields value, the field that key represents is part of the set.
The exact format is defined in sigs.k8s.io/structured-merge-diff
type: object
manager:
description: Manager is an identifier of the workflow managing
@@ -314,7 +170,7 @@ spec:
description: If true, this reference points to the managing controller.
type: boolean
kind:
description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
name:
description: 'Name of the referent. More info: http://kubernetes.io/docs/user-guide/identifiers#names'
@@ -333,11 +189,13 @@ spec:
description: |-
An opaque value that represents the internal version of this object that can be used by clients to determine when objects have changed. May be used for optimistic concurrency, change detection, and the watch operation on a resource or set of resources. Clients must treat these values as opaque and passed unmodified back to the server. They may only be valid for a particular resource or set of resources.
Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#concurrency-control-and-consistency
Populated by the system. Read-only. Value must be treated as opaque by clients and . More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
type: string
selfLink:
description: SelfLink is a URL representing this object. Populated by
the system. Read-only.
description: |-
SelfLink is a URL representing this object. Populated by the system. Read-only.
DEPRECATED Kubernetes will stop propagating this field in 1.20 release and the field is planned to be removed in 1.21 release.
type: string
uid:
description: |-

View File

@@ -15,12 +15,12 @@ spec:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
spec:
description: ServiceMonitorSpec contains specification parameters for a
@@ -47,7 +47,7 @@ spec:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
type: string
optional:
description: Specify whether the Secret or it's key must
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
@@ -64,7 +64,7 @@ spec:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
type: string
optional:
description: Specify whether the Secret or it's key must
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
@@ -74,10 +74,31 @@ spec:
bearerTokenFile:
description: File to read bearer token for scraping targets.
type: string
bearerTokenSecret:
description: SecretKeySelector selects a key of a Secret.
properties:
key:
description: The key of the secret to select from. Must be
a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
type: string
optional:
description: Specify whether the Secret or its key must be
defined
type: boolean
required:
- key
type: object
honorLabels:
description: HonorLabels chooses the metric's labels on collisions
with target labels.
type: boolean
honorTimestamps:
description: HonorTimestamps controls whether Prometheus respects
the timestamps present in scraped data.
type: boolean
interval:
description: Interval at which metrics should be scraped
type: string
@@ -141,7 +162,7 @@ spec:
to proxy through this endpoint.
type: string
relabelings:
description: 'RelabelConfigs to apply to samples before ingestion.
description: 'RelabelConfigs to apply to samples before scraping.
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items:
description: 'RelabelConfig allows dynamic rewriting of the
@@ -199,18 +220,40 @@ spec:
tlsConfig:
description: TLSConfig specifies TLS configuration parameters.
properties:
ca: {}
caFile:
description: The CA cert to use for the targets.
description: Path to the CA cert in the Prometheus container
to use for the targets.
type: string
cert: {}
certFile:
description: The client cert file for the targets.
description: Path to the client cert file in the Prometheus
container for the targets.
type: string
insecureSkipVerify:
description: Disable target certificate validation.
type: boolean
keyFile:
description: The client key file for the targets.
description: Path to the client key file in the Prometheus
container for the targets.
type: string
keySecret:
description: SecretKeySelector selects a key of a Secret.
properties:
key:
description: The key of the secret to select from. Must
be a valid secret key.
type: string
name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
type: string
optional:
description: Specify whether the Secret or its key must
be defined
type: boolean
required:
- key
type: object
serverName:
description: Used to verify the hostname for the targets.
type: string

View File

@@ -2,9 +2,9 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
apps.kubernetes.io/component: controller
apps.kubernetes.io/name: prometheus-operator
apps.kubernetes.io/version: v0.30.0
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.34.0
name: prometheus-operator
rules:
- apiGroups:
@@ -21,6 +21,7 @@ rules:
- prometheuses/finalizers
- alertmanagers/finalizers
- servicemonitors
- podmonitors
- prometheusrules
verbs:
- '*'

View File

@@ -2,9 +2,9 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
apps.kubernetes.io/component: controller
apps.kubernetes.io/name: prometheus-operator
apps.kubernetes.io/version: v0.30.0
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.34.0
name: prometheus-operator
roleRef:
apiGroup: rbac.authorization.k8s.io

View File

@@ -1,32 +1,32 @@
apiVersion: apps/v1beta2
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
apps.kubernetes.io/component: controller
apps.kubernetes.io/name: prometheus-operator
apps.kubernetes.io/version: v0.30.0
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.34.0
name: prometheus-operator
namespace: monitoring
spec:
replicas: 1
selector:
matchLabels:
apps.kubernetes.io/component: controller
apps.kubernetes.io/name: prometheus-operator
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
template:
metadata:
labels:
apps.kubernetes.io/component: controller
apps.kubernetes.io/name: prometheus-operator
apps.kubernetes.io/version: v0.30.0
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.34.0
spec:
containers:
- args:
- --kubelet-service=kube-system/kubelet
- --logtostderr=true
- --config-reloader-image=quay.io/coreos/configmap-reload:v0.0.1
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.30.0
image: quay.io/coreos/prometheus-operator:v0.30.0
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.34.0
image: quay.io/coreos/prometheus-operator:v0.34.0
name: prometheus-operator
ports:
- containerPort: 8080
@@ -40,7 +40,6 @@ spec:
memory: 100Mi
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
nodeSelector:
beta.kubernetes.io/os: linux
securityContext:

View File

@@ -0,0 +1,18 @@
apiVersion: v1
kind: Service
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.34.0
name: prometheus-operator
namespace: monitoring
spec:
clusterIP: None
ports:
- name: http
port: 8080
targetPort: http
selector:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator

View File

@@ -0,0 +1,9 @@
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator
app.kubernetes.io/version: v0.34.0
name: prometheus-operator
namespace: monitoring

12
scripts/minikube-start-kvm.sh Executable file
View File

@@ -0,0 +1,12 @@
#!/bin/bash
minikube delete
minikube addons disable metrics-server
minikube start \
--vm-driver=kvm2 \
--kubernetes-version=v1.16.0 \
--memory=6g \
--bootstrapper=kubeadm \
--extra-config=kubelet.authentication-token-webhook=true \
--extra-config=kubelet.authorization-mode=Webhook \
--extra-config=scheduler.address=0.0.0.0 \
--extra-config=controller-manager.address=0.0.0.0

11
scripts/minikube-start.sh Executable file
View File

@@ -0,0 +1,11 @@
#!/bin/bash
minikube delete
minikube addons disable metrics-server
minikube start \
--kubernetes-version=v1.16.0 \
--memory=6g \
--bootstrapper=kubeadm \
--extra-config=kubelet.authentication-token-webhook=true \
--extra-config=kubelet.authorization-mode=Webhook \
--extra-config=scheduler.address=0.0.0.0 \
--extra-config=controller-manager.address=0.0.0.0

11
scripts/monitoring-deploy.sh Executable file
View File

@@ -0,0 +1,11 @@
#!/bin/bash
# create namespace and CRDs
kubectl create -f manifests/setup
# wait for CRD creation to complete
until kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done
# create monitoring components
kubectl create -f manifests/

View File

@@ -17,6 +17,7 @@ package e2e
import (
"log"
"os"
"strings"
"testing"
"time"
@@ -57,23 +58,22 @@ func testMain(m *testing.M) int {
}
func TestQueryPrometheus(t *testing.T) {
t.Parallel()
queries := []struct {
query string
expectN int
}{
{
// query: `up{job="node-exporter"} == 1`,
// expectN: 1,
// }, {
query: `up{job="node-exporter"} == 1`,
expectN: 1,
}, {
// query: `up{job="kubelet"} == 1`,
// expectN: 1,
// }, {
query: `up{job="apiserver"} == 1`,
expectN: 1,
// }, {
// query: `up{job="kube-state-metrics"} == 1`,
// expectN: 1,
}, {
query: `up{job="kube-state-metrics"} == 1`,
expectN: 1,
}, {
query: `up{job="prometheus-k8s"} == 1`,
expectN: 1,
@@ -87,7 +87,7 @@ func TestQueryPrometheus(t *testing.T) {
}
// Wait for pod to respond at queries at all. Then start verifying their results.
err := wait.Poll(5*time.Second, 1*time.Minute, func() (bool, error) {
err := wait.Poll(5*time.Second, 2*time.Minute, func() (bool, error) {
_, err := promClient.query("up")
return err == nil, nil
})
@@ -116,3 +116,25 @@ func TestQueryPrometheus(t *testing.T) {
t.Fatal(err)
}
}
func TestDroppedMetrics(t *testing.T) {
// query metadata for all metrics and their metadata
md, err := promClient.metadata("{job=~\".+\"}")
if err != nil {
log.Fatal(err)
}
for _, k := range md.Data {
// check if the metric' help text contains Deprecated
if strings.Contains(k.Help, "Deprecated") {
// query prometheus for the Deprecated metric
n, err := promClient.query(k.Metric)
if err != nil {
log.Fatal(err)
}
if n > 0 {
t.Fatalf("deprecated metric with name: %s and help text: %s exists.", k.Metric, k.Help)
}
}
}
}

View File

@@ -15,6 +15,10 @@
package e2e
import (
"bytes"
"encoding/json"
"fmt"
"k8s.io/client-go/kubernetes"
"github.com/Jeffail/gabs"
@@ -50,3 +54,41 @@ func (c *prometheusClient) query(query string) (int, error) {
n, err := res.ArrayCountP("data.result")
return n, err
}
type Metadata struct {
Status string `json:"status,omitempty"`
Data []Data `json:"data,omitempty"`
}
type Data struct {
Metric string `json:"metric,omitempty"`
Help string `json:"help,omitempty"`
}
// metadata makes a request against the Prometheus /api/v1/targets/metadata endpoint.
// It returns all the metrics and its metadata.
func (c *prometheusClient) metadata(query string) (Metadata, error) {
req := c.kubeClient.CoreV1().RESTClient().Get().
Namespace("monitoring").
Resource("pods").
SubResource("proxy").
Name("prometheus-k8s-0:9090").
Suffix("/api/v1/targets/metadata").Param("match_target", query)
var data Metadata
b, err := req.DoRaw()
if err != nil {
return data, err
}
r := bytes.NewReader(b)
decoder := json.NewDecoder(r)
err = decoder.Decode(&data)
if err != nil {
return data, err
}
if data.Status != "success" {
return data, fmt.Errorf("status of returned response was not successful; status: %s", data.Status)
}
return data, err
}

View File

@@ -10,19 +10,33 @@ set -x
curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl
chmod +x kubectl
curl -Lo kind https://github.com/kubernetes-sigs/kind/releases/download/0.2.1/kind-linux-amd64
curl -Lo kind https://github.com/kubernetes-sigs/kind/releases/download/v0.6.1/kind-linux-amd64
chmod +x kind
./kind create cluster
export KUBECONFIG="$(./kind get kubeconfig-path)"
run_e2e_tests() {
cluster_version=$1
./kubectl apply -f manifests/0prometheus-operator-0alertmanagerCustomResourceDefinition.yaml
./kubectl apply -f manifests/0prometheus-operator-0prometheusCustomResourceDefinition.yaml
./kubectl apply -f manifests/0prometheus-operator-0prometheusruleCustomResourceDefinition.yaml
./kubectl apply -f manifests/0prometheus-operator-0servicemonitorCustomResourceDefinition.yaml
./kind create cluster --image=kindest/node:$cluster_version
export KUBECONFIG="$(./kind get kubeconfig-path)"
# Wait for CRDs to be successfully registered
sleep 10
# create namespace, permissions, and CRDs
./kubectl create -f manifests/setup
# wait for CRD creation to complete
until ./kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done
# create monitoring components
./kubectl create -f manifests/
make test-e2e
./kind delete cluster
}
cluster_compatible_versions=("v1.16.1" "v1.17.0")
for cluster_version in "${cluster_compatible_versions[@]}"
do
run_e2e_tests $cluster_version
done
./kubectl apply -f manifests
make test-e2e