Merge branch 'prometheus-operator'

This commit is contained in:
Matthias Loibl
2019-04-12 14:04:50 +02:00
182 changed files with 24931 additions and 5 deletions

4
.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
tmp/
minikube-manifests/
vendor/
./auth

65
Makefile Normal file
View File

@@ -0,0 +1,65 @@
JSONNET_FMT := jsonnet fmt -n 2 --max-blank-lines 2 --string-style s --comment-style s
JB_BINARY:=$(GOPATH)/bin/jb
EMBEDMD_BINARY:=$(GOPATH)/bin/embedmd
all: generate fmt test
../../hack/jsonnet-docker-image: ../../scripts/jsonnet/Dockerfile
# Create empty target file, for the sole purpose of recording when this target
# was last executed via the last-modification timestamp on the file. See
# https://www.gnu.org/software/make/manual/make.html#Empty-Targets
docker build -f - -t po-jsonnet . < ../../scripts/jsonnet/Dockerfile
touch $@
generate-in-docker: ../../hack/jsonnet-docker-image
@echo ">> Compiling assets and generating Kubernetes manifests"
docker run \
--rm \
-u=$(shell id -u $(USER)):$(shell id -g $(USER)) \
-v $(shell dirname $(dir $(abspath $(dir $$PWD)))):/go/src/github.com/coreos/prometheus-operator/ \
-v $(shell go env GOCACHE):/.cache/go-build \
--workdir /go/src/github.com/coreos/prometheus-operator/contrib/kube-prometheus \
po-jsonnet make generate
generate: manifests **.md
**.md: $(EMBEDMD_BINARY) $(shell find examples) build.sh example.jsonnet
$(EMBEDMD_BINARY) -w `find . -name "*.md" | grep -v vendor`
manifests: vendor example.jsonnet build.sh
rm -rf manifests
./build.sh ./examples/kustomize.jsonnet
vendor: $(JB_BINARY) jsonnetfile.json jsonnetfile.lock.json
rm -rf vendor
$(JB_BINARY) install
fmt:
find . -name 'vendor' -prune -o -name '*.libsonnet' -o -name '*.jsonnet' -print | \
xargs -n 1 -- $(JSONNET_FMT) -i
test: $(JB_BINARY)
$(JB_BINARY) install
./test.sh
test-e2e:
go test -timeout 55m -v ./tests/e2e -count=1
test-in-docker: ../../hack/jsonnet-docker-image
@echo ">> Compiling assets and generating Kubernetes manifests"
docker run \
--rm \
-u=$(shell id -u $(USER)):$(shell id -g $(USER)) \
-v $(shell dirname $(dir $(abspath $(dir $$PWD)))):/go/src/github.com/coreos/prometheus-operator/ \
-v $(shell go env GOCACHE):/.cache/go-build \
--workdir /go/src/github.com/coreos/prometheus-operator/contrib/kube-prometheus \
po-jsonnet make test
$(JB_BINARY):
go get -u github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb
$(EMBEDMD_BINARY):
go get github.com/campoy/embedmd
.PHONY: generate generate-in-docker test test-in-docker fmt

634
README.md
View File

@@ -1,9 +1,633 @@
# kube-prometheus # kube-prometheus
This repository collects Kubernetes manifests, dashboards, and alerting rules > Note that everything in the `contrib/kube-prometheus/` directory is experimental and may change significantly at any time.
combined with documentation and scripts to provide single-command deployments
of end-to-end Kubernetes cluster monitoring.
# This repository has moved This repository collects Kubernetes manifests, [Grafana](http://grafana.com/) dashboards, and [Prometheus rules](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) combined with documentation and scripts to provide easy to operate end-to-end Kubernetes cluster monitoring with [Prometheus](https://prometheus.io/) using the Prometheus Operator.
This repository has been merged with the [Prometheus Operator](https://github.com/coreos/prometheus-operator). It can now be found under [`contrib/kube-prometheus`](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus). The content of this project is written in [jsonnet](http://jsonnet.org/). This project could both be described as a package as well as a library.
Components included in this package:
* The [Prometheus Operator](https://github.com/coreos/prometheus-operator)
* Highly available [Prometheus](https://prometheus.io/)
* Highly available [Alertmanager](https://github.com/prometheus/alertmanager)
* [Prometheus node-exporter](https://github.com/prometheus/node_exporter)
* [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics)
* [Grafana](https://grafana.com/)
This stack is meant for cluster monitoring, so it is pre-configured to collect metrics from all Kubernetes components. In addition to that it delivers a default set of dashboards and alerting rules. Many of the useful dashboards and alerts come from the [kubernetes-mixin project](https://github.com/kubernetes-monitoring/kubernetes-mixin), similar to this project it provides composable jsonnet as a library for users to customize to their needs.
## Table of contents
* [Prerequisites](#prerequisites)
* [minikube](#minikube)
* [Quickstart](#quickstart)
* [Customizing Kube-Prometheus](#customizing-kube-prometheus)
* [Installing](#installing)
* [Compiling](#compiling)
* [Containerized Installing and Compiling](#containerized-installing-and-compiling)
* [Configuration](#configuration)
* [Customization Examples](#customization-examples)
* [Cluster Creation Tools](#cluster-creation-tools)
* [Internal Registries](#internal-registries)
* [NodePorts](#nodeports)
* [Prometheus Object Name](#prometheus-object-name)
* [node-exporter DaemonSet namespace](#node-exporter-daemonset-namespace)
* [Alertmanager configuration](#alertmanager-configuration)
* [Static etcd configuration](#static-etcd-configuration)
* [Pod Anti-Affinity](#pod-anti-affinity)
* [Customizing Prometheus alerting/recording rules and Grafana dashboards](#customizing-prometheus-alertingrecording-rules-and-grafana-dashboards)
* [Exposing Prometheus/Alermanager/Grafana via Ingress](#exposing-prometheusalermanagergrafana-via-ingress)
* [Minikube Example](#minikube-example)
* [Troubleshooting](#troubleshooting)
* [Error retrieving kubelet metrics](#error-retrieving-kubelet-metrics)
* [kube-state-metrics resource usage](#kube-state-metrics-resource-usage)
* [Contributing](#contributing)
## Prerequisites
You will need a Kubernetes cluster, that's it! By default it is assumed, that the kubelet uses token authentication and authorization, as otherwise Prometheus needs a client certificate, which gives it full access to the kubelet, rather than just the metrics. Token authentication and authorization allows more fine grained and easier access control.
This means the kubelet configuration must contain these flags:
* `--authentication-token-webhook=true` This flag enables, that a `ServiceAccount` token can be used to authenticate against the kubelet(s).
* `--authorization-mode=Webhook` This flag enables, that the kubelet will perform an RBAC request with the API to determine, whether the requesting entity (Prometheus in this case) is allow to access a resource, in specific for this project the `/metrics` endpoint.
This stack provides [resource metrics](https://github.com/kubernetes/metrics#resource-metrics-api) by deploying the [Prometheus Adapter](https://github.com/DirectXMan12/k8s-prometheus-adapter/).
This adapter is an Extension API Server and Kubernetes needs to be have this feature enabled, otherwise the adapter has no effect, but is still deployed.
### minikube
In order to just try out this stack, start minikube with the following command:
```
$ minikube delete && minikube start --kubernetes-version=v1.13.2 --memory=4096 --bootstrapper=kubeadm --extra-config=kubelet.authentication-token-webhook=true --extra-config=kubelet.authorization-mode=Webhook --extra-config=scheduler.address=0.0.0.0 --extra-config=controller-manager.address=0.0.0.0
```
> The kube-prometheus stack includes a resource metrics API server, like the metrics-server does. So ensure the metrics-server plugin is disabled on minikube:
>
> ```
> minikube addons disable metrics-server
> ```
## Quickstart
This project is intended to be used as a library (i.e. the intent is not for you to create your own modified copy of this repository).
Though for a quickstart a compiled version of the Kubernetes [manifests](manifests) generated with this library (specifically with `example.jsonnet`) is checked into this repository in order to try the content out quickly. To try out the stack un-customized run:
* Simply create the stack:
```
$ kubectl create -f manifests/
# It can take a few seconds for the above 'create manifests' command to fully create the following resources, so verify the resources are ready before proceeding.
$ until kubectl get customresourcedefinitions servicemonitors.monitoring.coreos.com ; do date; sleep 1; echo ""; done
$ until kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done
$ kubectl apply -f manifests/ # This command sometimes may need to be done twice (to workaround a race condition).
```
* And to teardown the stack:
```
$ kubectl delete -f manifests/
```
### Access the dashboards
Prometheus, Grafana, and Alertmanager dashboards can be accessed quickly using `kubectl port-forward` after running the quickstart via the commands below. Kubernetes 1.10 or later is required.
> Note: There are instructions on how to route to these pods behind an ingress controller in the [Exposing Prometheus/Alermanager/Grafana via Ingress](#exposing-prometheusalermanagergrafana-via-ingress) section.
Prometheus
```shell
kubectl --namespace monitoring port-forward svc/prometheus-k8s 9090
```
Then access via [http://localhost:9090](http://localhost:9090)
Grafana
```shell
kubectl --namespace monitoring port-forward svc/grafana 3000
```
Then access via [http://localhost:3000](http://localhost:3000) and use the default grafana user:password of `admin:admin`.
Alert Manager
```shell
kubectl --namespace monitoring port-forward svc/alertmanager-main 9093
```
Then access via [http://localhost:9093](http://localhost:9093)
## Customizing Kube-Prometheus
This section:
* describes how to customize the kube-prometheus library via compiling the kube-prometheus manifests yourself (as an alternative to the [Quickstart section](#Quickstart)).
* still doesn't require you to make a copy of this entire repository, but rather only a copy of a few select files.
### Installing
The content of this project consists of a set of [jsonnet](http://jsonnet.org/) files making up a library to be consumed.
Install this library in your own project with [jsonnet-bundler](https://github.com/jsonnet-bundler/jsonnet-bundler#install) (the jsonnet package manager):
```
$ mkdir my-kube-prometheus; cd my-kube-prometheus
$ jb init # Creates the initial/empty `jsonnetfile.json`
# Install the kube-prometheus dependency
$ jb install github.com/coreos/prometheus-operator/contrib/kube-prometheus/jsonnet/kube-prometheus # Creates `vendor/` & `jsonnetfile.lock.json`, and fills in `jsonnetfile.json`
```
> `jb` can be installed with `go get github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb`
> An e.g. of how to install a given version of this library: `jb install github.com/coreos/prometheus-operator/contrib/kube-prometheus/jsonnet/kube-prometheus/@v0.22.0`
In order to update the kube-prometheus dependency, simply use the jsonnet-bundler update functionality:
`$ jb update`
### Compiling
e.g. of how to compile the manifests: `./build.sh example.jsonnet`
> before compiling, install `gojsontoyaml` tool with `go get github.com/brancz/gojsontoyaml`
Here's [example.jsonnet](example.jsonnet):
[embedmd]:# (example.jsonnet)
```jsonnet
local kp =
(import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```
And here's the [build.sh](build.sh) script (which uses `vendor/` to render all manifests in a json structure of `{filename: manifest-content}`):
[embedmd]:# (build.sh)
```sh
#!/usr/bin/env bash
# This script uses arg $1 (name of *.jsonnet file to use) to generate the manifests/*.yaml files.
set -e
set -x
# only exit with zero if all commands of the pipeline exit successfully
set -o pipefail
# Make sure to start with a clean 'manifests' dir
rm -rf manifests
mkdir manifests
# optional, but we would like to generate yaml, not json
jsonnet -J vendor -m manifests "${1-example.jsonnet}" | xargs -I{} sh -c 'cat {} | gojsontoyaml > {}.yaml; rm -f {}' -- {}
```
> Note you need `jsonnet` (`go get github.com/google/go-jsonnet/cmd/jsonnet`) and `gojsontoyaml` (`go get github.com/brancz/gojsontoyaml`) installed to run `build.sh`. If you just want json output, not yaml, then you can skip the pipe and everything afterwards.
This script runs the jsonnet code, then reads each key of the generated json and uses that as the file name, and writes the value of that key to that file, and converts each json manifest to yaml.
### Apply the kube-prometheus stack
The previous steps (compilation) has created a bunch of manifest files in the manifest/ folder.
Now simply use kubectl to install Prometheus and Grafana as per your configuration:
`kubectl apply -f manifests/`
Check the monitoring namespace (or the namespace you have specific in `namespace: `) and make sure the pods are running. Prometheus and Grafana should be up and running soon.
### Containerized Installing and Compiling
If you don't care to have `jb` nor `jsonnet` nor `gojsontoyaml` installed, then build the `po-jsonnet` Docker image (this is something you'll need a copy of this repository for). Do the following from this `kube-prometheus` directory:
```
$ make ../../hack/jsonnet-docker-image
```
Then you can do commands such as the following:
```
docker run \
--rm \
-v `pwd`:`pwd` \
--workdir `pwd` \
po-jsonnet jb init
docker run \
--rm \
-v `pwd`:`pwd` \
--workdir `pwd` \
po-jsonnet jb install github.com/coreos/prometheus-operator/contrib/kube-prometheus/jsonnet/kube-prometheus
docker run \
--rm \
-v `pwd`:`pwd` \
--workdir `pwd` \
po-jsonnet ./build.sh example.jsonnet
```
## Update from upstream project
You may wish to fetch changes made on this project so they are available to you.
### Update jb
jb may have been updated so it's a good idea to get the latest version of this binary
```
go get -u github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb
```
### Update kube-prometheus
The command below will sync with upstream project.
```
jb update
```
### Compile the manifests and apply
Once updated, just follow the instructions under "Compiling" and "Apply the kube-prometheus stack" to apply the changes to your cluster.
## Configuration
Jsonnet has the concept of hidden fields. These are fields, that are not going to be rendered in a result. This is used to configure the kube-prometheus components in jsonnet. In the example jsonnet code of the above [Usage section](#Usage), you can see an example of this, where the `namespace` is being configured to be `monitoring`. In order to not override the whole object, use the `+::` construct of jsonnet, to merge objects, this way you can override individual settings, but retain all other settings and defaults.
These are the available fields with their respective default values:
```
{
_config+:: {
namespace: "default",
versions+:: {
alertmanager: "v0.16.1",
nodeExporter: "v0.17.0",
kubeStateMetrics: "v1.5.0",
kubeRbacProxy: "v0.4.1",
addonResizer: "1.8.4",
prometheusOperator: "v0.29.0",
prometheus: "v2.5.0",
},
imageRepos+:: {
prometheus: "quay.io/prometheus/prometheus",
alertmanager: "quay.io/prometheus/alertmanager",
kubeStateMetrics: "quay.io/coreos/kube-state-metrics",
kubeRbacProxy: "quay.io/coreos/kube-rbac-proxy",
addonResizer: "k8s.gcr.io/addon-resizer",
nodeExporter: "quay.io/prometheus/node-exporter",
prometheusOperator: "quay.io/coreos/prometheus-operator",
},
prometheus+:: {
names: 'k8s',
replicas: 2,
rules: {},
},
alertmanager+:: {
name: 'main',
config: |||
global:
resolve_timeout: 5m
route:
group_by: ['job']
group_wait: 30s
group_interval: 5m
repeat_interval: 12h
receiver: 'null'
routes:
- match:
alertname: Watchdog
receiver: 'null'
receivers:
- name: 'null'
|||,
replicas: 3,
},
kubeStateMetrics+:: {
collectors: '', // empty string gets a default set
scrapeInterval: '30s',
scrapeTimeout: '30s',
baseCPU: '100m',
baseMemory: '150Mi',
cpuPerNode: '2m',
memoryPerNode: '30Mi',
},
nodeExporter+:: {
port: 9100,
},
},
}
```
The grafana definition is located in a different project (https://github.com/brancz/kubernetes-grafana), but needed configuration can be customized from the same top level `_config` field. For example to allow anonymous access to grafana, add the following `_config` section:
```
grafana+:: {
config: { // http://docs.grafana.org/installation/configuration/
sections: {
"auth.anonymous": {enabled: true},
},
},
},
```
## Customization Examples
Jsonnet is a turing complete language, any logic can be reflected in it. It also has powerful merge functionalities, allowing sophisticated customizations of any kind simply by merging it into the object the library provides.
### Cluster Creation Tools
A common example is that not all Kubernetes clusters are created exactly the same way, meaning the configuration to monitor them may be slightly different. For [kubeadm](examples/jsonnet-snippets/kubeadm.jsonnet), [bootkube](examples/jsonnet-snippets/bootkube.jsonnet), [kops](examples/jsonnet-snippets/kops.jsonnet) and [kubespray](examples/jsonnet-snippets/kubespray.jsonnet) clusters there are mixins available to easily configure these:
kubeadm:
[embedmd]:# (examples/jsonnet-snippets/kubeadm.jsonnet)
```jsonnet
(import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-kubeadm.libsonnet')
```
bootkube:
[embedmd]:# (examples/jsonnet-snippets/bootkube.jsonnet)
```jsonnet
(import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-bootkube.libsonnet')
```
kops:
[embedmd]:# (examples/jsonnet-snippets/kops.jsonnet)
```jsonnet
(import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-kops.libsonnet')
```
kops with CoreDNS:
If your kops cluster is using CoreDNS, there is an additional mixin to import.
[embedmd]:# (examples/jsonnet-snippets/kops-coredns.jsonnet)
```jsonnet
(import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-kops.libsonnet') +
(import 'kube-prometheus/kube-prometheus-kops-coredns.libsonnet')
```
kubespray:
[embedmd]:# (examples/jsonnet-snippets/kubespray.jsonnet)
```jsonnet
(import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-kubespray.libsonnet')
```
kube-aws:
[embedmd]:# (examples/jsonnet-snippets/kube-aws.jsonnet)
```jsonnet
(import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-kube-aws.libsonnet')
```
### Internal Registry
Some Kubernetes installations source all their images from an internal registry. kube-prometheus supports this use case and helps the user synchronize every image it uses to the internal registry and generate manifests pointing at the internal registry.
To produce the `docker pull/tag/push` commands that will synchronize upstream images to `internal-registry.com/organization` (after having run the `jb` command to populate the vendor directory):
```shell
$ jsonnet -J vendor -S --tla-str repository=internal-registry.com/organization sync-to-internal-registry.jsonnet
docker pull k8s.gcr.io/addon-resizer:1.8.4
docker tag k8s.gcr.io/addon-resizer:1.8.4 internal-registry.com/organization/addon-resizer:1.8.4
docker push internal-registry.com/organization/addon-resizer:1.8.4
docker pull quay.io/prometheus/alertmanager:v0.16.1
docker tag quay.io/prometheus/alertmanager:v0.16.1 internal-registry.com/organization/alertmanager:v0.16.1
docker push internal-registry.com/organization/alertmanager:v0.16.1
...
```
The output of this command can be piped to a shell to be executed by appending `| sh`.
Then to generate manifests with `internal-registry.com/organization`, use the `withImageRepository` mixin:
[embedmd]:# (examples/internal-registry.jsonnet)
```jsonnet
local mixin = import 'kube-prometheus/kube-prometheus-config-mixins.libsonnet';
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
} + mixin.withImageRepository('internal-registry.com/organization');
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```
### NodePorts
Another mixin that may be useful for exploring the stack is to expose the UIs of Prometheus, Alertmanager and Grafana on NodePorts:
[embedmd]:# (examples/jsonnet-snippets/node-ports.jsonnet)
```jsonnet
(import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-node-ports.libsonnet')
```
### Prometheus Object Name
To give another customization example, the name of the `Prometheus` object provided by this library can be overridden:
[embedmd]:# (examples/prometheus-name-override.jsonnet)
```jsonnet
((import 'kube-prometheus/kube-prometheus.libsonnet') + {
prometheus+: {
prometheus+: {
metadata+: {
name: 'my-name',
},
},
},
}).prometheus.prometheus
```
### node-exporter DaemonSet namespace
Standard Kubernetes manifests are all written using [ksonnet-lib](https://github.com/ksonnet/ksonnet-lib/), so they can be modified with the mixins supplied by ksonnet-lib. For example to override the namespace of the node-exporter DaemonSet:
[embedmd]:# (examples/ksonnet-example.jsonnet)
```jsonnet
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local daemonset = k.apps.v1beta2.daemonSet;
((import 'kube-prometheus/kube-prometheus.libsonnet') + {
nodeExporter+: {
daemonset+:
daemonset.mixin.metadata.withNamespace('my-custom-namespace'),
},
}).nodeExporter.daemonset
```
### Alertmanager configuration
The Alertmanager configuration is located in the `_config.alertmanager.config` configuration field. In order to set a custom Alertmanager configuration simply set this field.
[embedmd]:# (examples/alertmanager-config.jsonnet)
```jsonnet
((import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
alertmanager+: {
config: |||
global:
resolve_timeout: 10m
route:
group_by: ['job']
group_wait: 30s
group_interval: 5m
repeat_interval: 12h
receiver: 'null'
routes:
- match:
alertname: Watchdog
receiver: 'null'
receivers:
- name: 'null'
|||,
},
},
}).alertmanager.secret
```
In the above example the configuration has been inlined, but can just as well be an external file imported in jsonnet via the `importstr` function.
[embedmd]:# (examples/alertmanager-config-external.jsonnet)
```jsonnet
((import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
alertmanager+: {
config: importstr 'alertmanager-config.yaml',
},
},
}).alertmanager.secret
```
### Adding additional namespaces to monitor
In order to monitor additional namespaces, the Prometheus server requires the appropriate `Role` and `RoleBinding` to be able to discover targets from that namespace. By default the Prometheus server is limited to the three namespaces it requires: default, kube-system and the namespace you configure the stack to run in via `$._config.namespace`. This is specified in `$._config.prometheus.namespaces`, to add new namespaces to monitor, simply append the additional namespaces:
[embedmd]:# (examples/additional-namespaces.jsonnet)
```jsonnet
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
prometheus+:: {
namespaces+: ['my-namespace', 'my-second-namespace'],
},
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```
### Static etcd configuration
In order to configure a static etcd cluster to scrape there is a simple [kube-prometheus-static-etcd.libsonnet](jsonnet/kube-prometheus/kube-prometheus-static-etcd.libsonnet) mixin prepared - see [etcd.jsonnet](examples/etcd.jsonnet) for an example of how to use that mixin, and [Monitoring external etcd](docs/monitoring-external-etcd.md) for more information.
> Note that monitoring etcd in minikube is currently not possible because of how etcd is setup. (minikube's etcd binds to 127.0.0.1:2379 only, and within host networking namespace.)
### Pod Anti-Affinity
To prevent `Prometheus` and `Alertmanager` instances from being deployed onto the same node when
possible, one can include the [kube-prometheus-anti-affinity.libsonnet](jsonnet/kube-prometheus/kube-prometheus-anti-affinity.libsonnet) mixin:
```jsonnet
(import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-anti-affinity.libsonnet')
```
### Customizing Prometheus alerting/recording rules and Grafana dashboards
See [developing Prometheus rules and Grafana dashboards](docs/developing-prometheus-rules-and-grafana-dashboards.md) guide.
### Exposing Prometheus/Alermanager/Grafana via Ingress
See [exposing Prometheus/Alertmanager/Grafana](docs/exposing-prometheus-alertmanager-grafana-ingress.md) guide.
## Minikube Example
To use an easy to reproduce example, see [minikube.jsonnet](examples/minikube.jsonnet), which uses the minikube setup as demonstrated in [Prerequisites](#prerequisites). Because we would like easy access to our Prometheus, Alertmanager and Grafana UIs, `minikube.jsonnet` exposes the services as NodePort type services.
## Troubleshooting
### Error retrieving kubelet metrics
Should the Prometheus `/targets` page show kubelet targets, but not able to successfully scrape the metrics, then most likely it is a problem with the authentication and authorization setup of the kubelets.
As described in the [Prerequisites](#prerequisites) section, in order to retrieve metrics from the kubelet token authentication and authorization must be enabled. Some Kubernetes setup tools do not enable this by default.
If you are using Google's GKE product, see [cAdvisor support](docs/GKE-cadvisor-support.md).
#### Authentication problem
The Prometheus `/targets` page will show the kubelet job with the error `403 Unauthorized`, when token authentication is not enabled. Ensure, that the `--authentication-token-webhook=true` flag is enabled on all kubelet configurations.
#### Authorization problem
The Prometheus `/targets` page will show the kubelet job with the error `401 Unauthorized`, when token authorization is not enabled. Ensure that the `--authorization-mode=Webhook` flag is enabled on all kubelet configurations.
### kube-state-metrics resource usage
In some environments, kube-state-metrics may need additional
resources. One driver for more resource needs, is a high number of
namespaces. There may be others.
kube-state-metrics resource allocation is managed by
[addon-resizer](https://github.com/kubernetes/autoscaler/tree/master/addon-resizer/nanny)
You can control it's parameters by setting variables in the
config. They default to:
``` jsonnet
kubeStateMetrics+:: {
baseCPU: '100m',
cpuPerNode: '2m',
baseMemory: '150Mi',
memoryPerNode: '30Mi',
}
```
## Contributing
All `.yaml` files in the `/manifests` folder are generated via
[Jsonnet](https://jsonnet.org/). Contributing changes will most likely include
the following process:
1. Make your changes in the respective `*.jsonnet` file.
2. Commit your changes (This is currently necessary due to our vendoring
process. This is likely to change in the future).
3. Update the pinned kube-prometheus dependency in `jsonnetfile.lock.json`: `jb
update`.
3. Generate dependent `*.yaml` files: `make generate-in-docker`.
4. Commit the generated changes.

16
build.sh Executable file
View File

@@ -0,0 +1,16 @@
#!/usr/bin/env bash
# This script uses arg $1 (name of *.jsonnet file to use) to generate the manifests/*.yaml files.
set -e
set -x
# only exit with zero if all commands of the pipeline exit successfully
set -o pipefail
# Make sure to start with a clean 'manifests' dir
rm -rf manifests
mkdir manifests
# optional, but we would like to generate yaml, not json
jsonnet -J vendor -m manifests "${1-example.jsonnet}" | xargs -I{} sh -c 'cat {} | gojsontoyaml > {}.yaml; rm -f {}' -- {}

View File

@@ -0,0 +1,36 @@
# Kubelet / cAdvisor special configuration updates for GKE
Prior to GKE 1.11, the kubelet does not support token
authentication. Until it does, Prometheus must use HTTP (not HTTPS)
for scraping.
You can configure this behavior through kube-prometheus with:
```
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-insecure-kubelet.libsonnet') +
{
_config+:: {
# ... config here
}
};
```
Or, you can patch and re-apply your existing manifests with:
On linux:
```
sed -i -e 's/https/http/g' manifests/prometheus-serviceMonitorKubelet.yaml
```
On MacOs:
```
sed -i '' -e 's/https/http/g' manifests/prometheus-serviceMonitorKubelet.yaml
```
After you have modified the yaml file please run
```
kubectl apply -f manifests/prometheus-serviceMonitorKubelet.yaml
```

View File

@@ -0,0 +1,293 @@
# Developing Prometheus Rules and Grafana Dashboards
`kube-prometheus` ships with a set of default [Prometheus rules](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) and [Grafana](http://grafana.com/) dashboards. At some point one might like to extend them, the purpose of this document is to explain how to do this.
All manifests of kube-prometheus are generated using [jsonnet](https://jsonnet.org/) and Prometheus rules and Grafana dashboards in specific follow the [Prometheus Monitoring Mixins proposal](https://docs.google.com/document/d/1A9xvzwqnFVSOZ5fD3blKODXfsat5fg6ZhnKu9LK3lB4/).
For both the Prometheus rules and the Grafana dashboards Kubernetes `ConfigMap`s are generated within kube-prometheus. In order to add additional rules and dashboards simply merge them onto the existing json objects. This document illustrates examples for rules as well as dashboards.
As a basis, all examples in this guide are based on the base example of the kube-prometheus [readme](../README.md):
[embedmd]:# (../example.jsonnet)
```jsonnet
local kp =
(import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```
## Prometheus rules
### Alerting rules
According to the [Prometheus Monitoring Mixins proposal](https://docs.google.com/document/d/1A9xvzwqnFVSOZ5fD3blKODXfsat5fg6ZhnKu9LK3lB4/) Prometheus alerting rules are under the key `prometheusAlerts` in the top level object, so in order to add an additional alerting rule, we can simply merge an extra rule into the existing object.
The format is exactly the Prometheus format, so there should be no changes necessary should you have existing rules that you want to include.
> Note that alerts can just as well be included into this file, using the jsonnet `import` function. In this example it is just inlined in order to demonstrate their use in a single file.
[embedmd]:# (../examples/prometheus-additional-alert-rule-example.jsonnet)
```jsonnet
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
prometheusAlerts+:: {
groups+: [
{
name: 'example-group',
rules: [
{
alert: 'Watchdog',
expr: 'vector(1)',
labels: {
severity: 'none',
},
annotations: {
description: 'This is a Watchdog meant to ensure that the entire alerting pipeline is functional.',
},
},
],
},
],
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```
### Recording rules
In order to add a recording rule, simply do the same with the `prometheusRules` field.
> Note that rules can just as well be included into this file, using the jsonnet `import` function. In this example it is just inlined in order to demonstrate their use in a single file.
[embedmd]:# (../examples/prometheus-additional-recording-rule-example.jsonnet)
```jsonnet
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
prometheusRules+:: {
groups+: [
{
name: 'example-group',
rules: [
{
record: 'some_recording_rule_name',
expr: 'vector(1)',
},
],
},
],
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```
### Pre-rendered rules
We acknowledge, that users may need to transition existing rules, and therefore allow an option to add additional pre-rendered rules. Luckily the yaml and json formats are very close so the yaml rules just need to be converted to json without any manual interaction needed. Just a tool to convert yaml to json is needed:
```
go get -u -v github.com/brancz/gojsontoyaml
```
And convert the existing rule file:
```
cat existingrule.yaml | gojsontoyaml -yamltojson > existingrule.json
```
Then import it in jsonnet:
[embedmd]:# (../examples/prometheus-additional-rendered-rule-example.jsonnet)
```jsonnet
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
prometheusAlerts+:: (import 'existingrule.json'),
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```
### Changing default rules
Along with adding additional rules, we give the user the option to filter or adjust the existing rules imported by `kube-prometheus/kube-prometheus.libsonnet`. The recording rules can be found in [kube-prometheus/rules](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus/jsonnet/kube-prometheus/rules) and [kubernetes-mixin/rules](https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/rules) while the alerting rules can be found in [kube-prometheus/alerts](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus/jsonnet/kube-prometheus/alerts) and [kubernetes-mixin/alerts](https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/alerts).
Knowing which rules to change, the user can now use functions from the [Jsonnet standard library](https://jsonnet.org/ref/stdlib.html) to make these changes. Below are examples of both a filter and an adjustment being made to the default rules. These changes can be assigned to a local variable and then added to the `local kp` object as seen in the examples above.
#### Filter
Here the alert `KubeStatefulSetReplicasMismatch` is being filtered out of the group `kubernetes-apps`. The default rule can be seen [here](https://github.com/kubernetes-monitoring/kubernetes-mixin/blob/master/alerts/apps_alerts.libsonnet).
```jsonnet
local filter = {
prometheusAlerts+:: {
groups: std.map(
function(group)
if group.name == 'kubernetes-apps' then
group {
rules: std.filter(function(rule)
rule.alert != "KubeStatefulSetReplicasMismatch",
group.rules
)
}
else
group,
super.groups
),
},
};
```
#### Adjustment
Here the expression for the alert used above is updated from its previous value. The default rule can be seen [here](https://github.com/kubernetes-monitoring/kubernetes-mixin/blob/master/alerts/apps_alerts.libsonnet).
```jsonnet
local update = {
prometheusAlerts+:: {
groups: std.map(
function(group)
if group.name == 'kubernetes-apps' then
group {
rules: std.map(
function(rule)
if rule.alert == "KubeStatefulSetReplicasMismatch" then
rule {
expr: "kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\",statefulset!=\"vault\"} != kube_statefulset_status_replicas{job=\"kube-state-metrics\",statefulset!=\"vault\"}"
}
else
rule,
group.rules
)
}
else
group,
super.groups
),
},
};
```
Using the example from above about adding in pre-rendered rules, the new local vaiables can be added in as follows:
```jsonnet
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + filter + update + {
prometheusAlerts+:: (import 'existingrule.json'),
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```
## Dashboards
Dashboards can either be added using jsonnet or simply a pre-rendered json dashboard.
### Jsonnet dashboard
We recommend using the [grafonnet](https://github.com/grafana/grafonnet-lib/) library for jsonnet, which gives you a simple DSL to generate Grafana dashboards. Following the [Prometheus Monitoring Mixins proposal](https://docs.google.com/document/d/1A9xvzwqnFVSOZ5fD3blKODXfsat5fg6ZhnKu9LK3lB4/) additional dashboards are added to the `grafanaDashboards` key, located in the top level object. To add new jsonnet dashboards, simply add one.
> Note that dashboards can just as well be included into this file, using the jsonnet `import` function. In this example it is just inlined in order to demonstrate their use in a single file.
[embedmd]:# (../examples/grafana-additional-jsonnet-dashboard-example.jsonnet)
```jsonnet
local grafana = import 'grafonnet/grafana.libsonnet';
local dashboard = grafana.dashboard;
local row = grafana.row;
local prometheus = grafana.prometheus;
local template = grafana.template;
local graphPanel = grafana.graphPanel;
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
grafanaDashboards+:: {
'my-dashboard.json':
dashboard.new('My Dashboard')
.addTemplate(
{
current: {
text: 'Prometheus',
value: 'Prometheus',
},
hide: 0,
label: null,
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: '',
type: 'datasource',
},
)
.addRow(
row.new()
.addPanel(graphPanel.new('My Panel', span=6, datasource='$datasource')
.addTarget(prometheus.target('vector(1)')))
),
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```
### Pre-rendered Grafana dashboards
As jsonnet is a superset of json, the jsonnet `import` function can be used to include Grafana dashboard json blobs. In this example we are importing a [provided example dashboard](../examples/example-grafana-dashboard.json).
[embedmd]:# (../examples/grafana-additional-rendered-dashboard-example.jsonnet)
```jsonnet
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
grafanaDashboards+:: {
'my-dashboard.json': (import 'example-grafana-dashboard.json'),
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```

View File

@@ -0,0 +1,101 @@
# Exposing Prometheus, Alertmanager and Grafana UIs via Ingress
In order to access the web interfaces via the Internet [Kubernetes Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) is a popular option. This guide explains, how Kubernetes Ingress can be setup, in order to expose the Prometheus, Alertmanager and Grafana UIs, that are included in the [kube-prometheus](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus) project.
Note: before continuing, it is recommended to first get familiar with the [kube-prometheus](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus) stack by itself.
## Prerequisites
Apart from a running Kubernetes cluster with a running [kube-prometheus](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus) stack, a Kubernetes Ingress controller must be installed and functional. This guide was tested with the [nginx-ingress-controller](https://github.com/kubernetes/ingress-nginx). If you wish to reproduce the exact result in as depicted in this guide we recommend using the nginx-ingress-controller.
## Setting up Ingress
The setup of Ingress objects is the same for Prometheus, Alertmanager and Grafana. Therefore this guides demonstrates it in detail for Prometheus as it can easily be adapted for the other applications.
As monitoring data may contain sensitive data, this guide describes how to setup Ingress with basic auth as an example of minimal security. Of course this should be adapted to the preferred authentication mean of any particular organization, but we feel it is important to at least provide an example with a minimum of security.
In order to setup basic auth, a secret with the `htpasswd` formatted file needs to be created. To do this, first install the [`htpasswd`](https://httpd.apache.org/docs/2.4/programs/htpasswd.html) tool.
To create the `htpasswd` formatted file called `auth` run:
```
htpasswd -c auth <username>
```
In order to use this a secret needs to be created containing the name of the `htpasswd`, and with annotations on the Ingress object basic auth can be configured.
Also, the applications provide external links to themselves in alerts and various places. When an ingress is used in front of the applications these links need to be based on the external URL's. This can be configured for each application in jsonnet.
```jsonnet
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local secret = k.core.v1.secret;
local ingress = k.extensions.v1beta1.ingress;
local ingressTls = ingress.mixin.spec.tlsType;
local ingressRule = ingress.mixin.spec.rulesType;
local httpIngressPath = ingressRule.mixin.http.pathsType;
local kp =
(import 'kube-prometheus/kube-prometheus.libsonnet') +
{
_config+:: {
namespace: 'monitoring',
},
prometheus+:: {
prometheus+: {
spec+: {
externalUrl: 'http://prometheus.example.com',
},
},
},
ingress+:: {
'prometheus-k8s':
ingress.new() +
ingress.mixin.metadata.withName($.prometheus.prometheus.metadata.name) +
ingress.mixin.metadata.withNamespace($.prometheus.prometheus.metadata.namespace) +
ingress.mixin.metadata.withAnnotations({
'nginx.ingress.kubernetes.io/auth-type': 'basic',
'nginx.ingress.kubernetes.io/auth-secret': 'basic-auth',
'nginx.ingress.kubernetes.io/auth-realm': 'Authentication Required',
}) +
ingress.mixin.spec.withRules(
ingressRule.new() +
ingressRule.withHost('prometheus.example.com') +
ingressRule.mixin.http.withPaths(
httpIngressPath.new() +
httpIngressPath.mixin.backend.withServiceName($.prometheus.service.metadata.name) +
httpIngressPath.mixin.backend.withServicePort('web')
),
),
},
} + {
ingress+:: {
'basic-auth-secret':
secret.new('basic-auth', { auth: std.base64(importstr 'auth') }) +
secret.mixin.metadata.withNamespace($._config.namespace),
},
};
k.core.v1.list.new([
kp.ingress['prometheus-k8s'],
kp.ingress['basic-auth-secret'],
])
```
In order to expose Alertmanager and Grafana, simply create additional fields containing an ingress object, but simply pointing at the `alertmanager` or `grafana` instead of the `prometheus-k8s` Service. Make sure to also use the correct port respectively, for Alertmanager it is also `web`, for Grafana it is `http`. Be sure to also specify the appropriate external URL.
In order to render the ingress objects similar to the other objects use as demonstrated in the [main readme](../README.md#usage):
```
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } +
{ ['ingress-' + name]: kp.ingress[name] for name in std.objectFields(kp.ingress) }
```
Note, that in comparison only the last line was added, the rest is identical to the original.
See [ingress.jsonnet](../examples/ingress.jsonnet) for an example implementation.

View File

@@ -0,0 +1,159 @@
<br>
<div class="alert alert-info" role="alert">
<i class="fa fa-exclamation-triangle"></i><b> Note:</b> Starting with v0.12.0, Prometheus Operator requires use of Kubernetes v1.7.x and up.
</div>
# Kube Prometheus on Kubeadm
The [kubeadm](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/) tool is linked by Kubernetes as the offical way to deploy and manage self-hosted clusters. Kubeadm does a lot of heavy lifting by automatically configuring your Kubernetes cluster with some common options. This guide is intended to show you how to deploy Prometheus, Prometheus Operator and Kube Prometheus to get you started monitoring your cluster that was deployed with Kubeadm.
This guide assumes you have a basic understanding of how to use the functionality the Prometheus Operator implements. If you haven't yet, we recommend reading through the [getting started guide](../../../Documentation/user-guides/getting-started.md) as well as the [alerting guide](../../../Documentation/user-guides/alerting.md).
## Kubeadm Pre-requisites
This guide assumes you have some familiarity with `kubeadm` or at least have deployed a cluster using `kubeadm`. By default, `kubeadm` does not expose two of the services that we will be monitoring. Therefore, in order to get the most out of the `kube-prometheus` package, we need to make some quick tweaks to the Kubernetes cluster. Since we will be monitoring the `kube-controller-manager` and `kube-scheduler`, we must expose them to the cluster.
By default, `kubeadm` runs these pods on your master and bound to `127.0.0.1`. There are a couple of ways to change this. The recommended way to change these features is to use the [kubeadm config file](https://kubernetes.io/docs/reference/generated/kubeadm/#config-file). An example configuration file can be used:
```yaml
apiVersion: kubeadm.k8s.io/v1alpha1
kind: MasterConfiguration
api:
advertiseAddress: 192.168.1.173
bindPort: 6443
authorizationModes:
- Node
- RBAC
certificatesDir: /etc/kubernetes/pki
cloudProvider:
etcd:
dataDir: /var/lib/etcd
endpoints: null
imageRepository: gcr.io/google_containers
kubernetesVersion: v1.8.3
networking:
dnsDomain: cluster.local
serviceSubnet: 10.96.0.0/12
nodeName: your-dev
tokenTTL: 24h0m0s
controllerManagerExtraArgs:
address: 0.0.0.0
schedulerExtraArgs:
address: 0.0.0.0
```
Notice the `schedulerExtraArgs` and `controllerManagerExtraArgs`. This exposes the `kube-controller-manager` and `kube-scheduler` services to the rest of the cluster. If you have kubernetes core components as pods in the kube-system namespace, ensure that the `kube-prometheus-exporter-kube-scheduler` and `kube-prometheus-exporter-kube-controller-manager` services' `spec.selector` values match those of pods.
In addition, we will be using `node-exporter` to monitor the `cAdvisor` service on all the nodes. This, however requires a change to the `kubelet` service on the master as well as all the nodes. According to the Kubernetes documentation
> The kubeadm deb package ships with configuration for how the kubelet should be run. Note that the `kubeadm` CLI command will never touch this drop-in file. This drop-in file belongs to the kubeadm deb/rpm package.
Again, we need to expose the `cadvisor` that is installed and managed by the `kubelet` daemon and allow webhook token authentication. To do so, we do the following on all the masters and nodes:
```bash
KUBEADM_SYSTEMD_CONF=/etc/systemd/system/kubelet.service.d/10-kubeadm.conf
sed -e "/cadvisor-port=0/d" -i "$KUBEADM_SYSTEMD_CONF"
if ! grep -q "authentication-token-webhook=true" "$KUBEADM_SYSTEMD_CONF"; then
sed -e "s/--authorization-mode=Webhook/--authentication-token-webhook=true --authorization-mode=Webhook/" -i "$KUBEADM_SYSTEMD_CONF"
fi
systemctl daemon-reload
systemctl restart kubelet
```
In case you already have a Kubernetes deployed with kubeadm, change the address kube-controller-manager and kube-scheduler listens in addition to previous kubelet change:
```
sed -e "s/- --address=127.0.0.1/- --address=0.0.0.0/" -i /etc/kubernetes/manifests/kube-controller-manager.yaml
sed -e "s/- --address=127.0.0.1/- --address=0.0.0.0/" -i /etc/kubernetes/manifests/kube-scheduler.yaml
```
With these changes, your Kubernetes cluster is ready.
## Metric Sources
Monitoring a Kubernetes cluster with Prometheus is a natural choice as Kubernetes components themselves are instrumented with Prometheus metrics, therefore those components simply have to be discovered by Prometheus and most of the cluster is monitored.
Metrics that are rather about cluster state than a single component's metrics is exposed by the add-on component [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics).
Additionally, to have an overview of cluster nodes' resources the Prometheus [node_exporter](https://github.com/prometheus/node_exporter) is used. The node_exporter allows monitoring a node's resources: CPU, memory and disk utilization and more.
Once you complete this guide you will monitor the following:
* cluster state via kube-state-metrics
* nodes via the node_exporter
* kubelets
* apiserver
* kube-scheduler
* kube-controller-manager
## Getting Up and Running Fast with Kube-Prometheus
To help get started more quickly with monitoring Kubernetes clusters, [kube-prometheus](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus) was created. It is a collection of manifests including dashboards and alerting rules that can easily be deployed. It utilizes the Prometheus Operator and all the manifests demonstrated in [this guide](../../../Documentation/user-guides/cluster-monitoring.md).
This section represent a quick installation and is not intended to teach you about all the components. The easiest way to get started is to clone this repository and use the `kube-prometheus` section of the code.
```
git clone https://github.com/coreos/prometheus-operator
cd prometheus-operator/contrib/kube-prometheus/
```
First, create the namespace in which you want the monitoring tool suite to be running.
```
export NAMESPACE='monitoring'
kubectl create namespace "$NAMESPACE"
```
Now we will create the components for the Prometheus operator
```
kubectl --namespace="$NAMESPACE" apply -f manifests/prometheus-operator
```
This will create all the Prometheus Operator components. You might need to wait a short amount of time before the Custom Resource Definitions are available in the cluster. You can wait for them:
```
until kubectl --namespace="$NAMESPACE" get alertmanagers.monitoring.coreos.com > /dev/null 2>&1; do sleep 1; printf "."; done
```
Next, we will install the node exporter and then kube-state-metrics:
```
kubectl --namespace="$NAMESPACE" apply -f manifests/node-exporter
kubectl --namespace="$NAMESPACE" apply -f manifests/kube-state-metrics
```
Then, we can deploy the grafana credentials. By default, the username/password will be `admin/admin`, you should change these for your production clusters.
```
kubectl --namespace="$NAMESPACE" apply -f manifests/grafana/grafana-credentials.yaml
```
Then install grafana itself:
```
kubectl --namespace="$NAMESPACE" apply -f manifests/grafana
```
Next up is the `Prometheus` object itself. We will deploy the application, and then the roles/role-bindings.
```
find manifests/prometheus -type f ! -name prometheus-k8s-roles.yaml ! -name prometheus-k8s-role-bindings.yaml -exec kubectl --namespace "$NAMESPACE" apply -f {} \;
kubectl apply -f manifests/prometheus/prometheus-k8s-roles.yaml
kubectl apply -f manifests/prometheus/prometheus-k8s-role-bindings.yaml
```
Finally, install the [Alertmanager](../../../Documentation/user-guides/alerting.md)
```
kubectl --namespace="$NAMESPACE" apply -f manifests/alertmanager
```
Now you should have a working cluster. After all the pods are ready, you should be able to reach:
* Prometheus UI on node port `30900`
* Alertmanager UI on node port `30903`
* Grafana on node port `30902`
These can of course be changed via the Service definitions. It is recommended to look at the [Exposing Prometheus and Alert Manager](../../../Documentation/user-guides/exposing-prometheus-and-alertmanager.md) documentation for more detailed information on how to expose these services.

View File

@@ -0,0 +1,64 @@
# Monitoring external etcd
This guide will help you monitor an external etcd cluster. When the etcd cluster is not hosted inside Kubernetes.
This is often the case with Kubernetes setups. This approach has been tested with kube-aws but the same principals apply to other tools.
Note that [etcd.jsonnet](../examples/etcd.jsonnet) & [kube-prometheus-static-etcd.libsonnet](../jsonnet/kube-prometheus/kube-prometheus-static-etcd.libsonnet) (which are described by a section of the [Readme](../README.md#static-etcd-configuration)) do the following:
* Put the three etcd TLS client files (CA & cert & key) into a secret in the namespace, and have Prometheus Operator load the secret.
* Create the following (to expose etcd metrics - port 2379): a Service, Endpoint, & ServiceMonitor.
# Step 1: Open the port
You now need to allow the nodes Prometheus are running on to talk to the etcd on the port 2379 (if 2379 is the port used by etcd to expose the metrics)
If using kube-aws, you will need to edit the etcd security group inbound, specifying the security group of your Kubernetes node (worker) as the source.
## kube-aws and EIP or ENI inconsistency
With kube-aws, each etcd node has two IP addresses:
* EC2 instance IP
* EIP or ENI (depending on the chosen method in yuour cluster.yaml)
For some reason, some etcd node answer to :2379/metrics on the intance IP (eth0), some others on the EIP|ENI address (eth1). See issue https://github.com/kubernetes-incubator/kube-aws/issues/923
It would be of course much better if we could hit the EPI/ENI all the time as they don't change even if the underlying EC2 intance goes down.
If specifying the Instance IP (eth0) in the Prometheus Operator ServiceMonitor, and the EC2 intance goes down, one would have to update the ServiceMonitor.
Another idea woud be to use the DNS entries of etcd, but those are not currently supported for EndPoints objects in Kubernetes.
# Step 2: verify
Go to the Prometheus UI on :9090/config and check that you have an etcd job entry:
```
- job_name: monitoring/etcd-k8s/0
scrape_interval: 30s
scrape_timeout: 10s
...
```
On the :9090/targets page:
* You should see "etcd" with the UP state. If not, check the Error column for more information.
* If no "etcd" targets are even shown on this page, prometheus isn't attempting to scrape it.
# Step 3: Grafana dashboard
## Find a dashboard you like
Try to load this dashboard:
https://grafana.com/dashboards/3070
## Save the dashboard in the configmap
As documented here, [Developing Alerts and Dashboards](developing-prometheus-rules-and-grafana-dashboards.md), the Grafana instances are stateless. The dashboards are automatically re-loaded from the ConfigMap.
So if you load a dashboard through the Grafana UI, it won't be kept unless saved in ConfigMap
Read [the document](developing-prometheus-rules-and-grafana-dashboards.md), but in summary:
### Copy your dashboard:
Once you are happy with the dashboard, export it and move it to `prometheus-operator/contrib/kube-prometheus/assets/grafana/` (ending in "-dashboard.json")
### Regenetate the grafana dashboard manifest:
`hack/scripts/generate-dashboards-configmap.sh > manifests/grafana/grafana-dashboards.yaml`
### Reload the manifest in Kubernetes:
` kubectl -n monitoring replace -f manifests/grafana/grafana-dashboards.yaml`
After a few minutes your dasboard will be available permanently to all Grafana instances

View File

@@ -0,0 +1,28 @@
# Monitoring other Kubernetes Namespaces
This guide will help you monitor applications in other Namespaces. By default the RBAC rules are only enabled for the `Default` and `kube-system` Namespace during Install.
# Setup
You have to give the list of the Namespaces that you want to be able to monitor.
This is done in the variable `prometheus.roleSpecificNamespaces`. You usually set this in your `.jsonnet` file when building the manifests.
Example to create the needed `Role` and `Rolebindig` for the Namespace `foo` :
```
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
prometheus+:: {
namespaces: ["default", "kube-system","foo"],
},
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
```

16
example.jsonnet Normal file
View File

@@ -0,0 +1,16 @@
local kp =
(import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -0,0 +1,17 @@
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
prometheus+:: {
namespaces+: ['my-namespace', 'my-second-namespace'],
},
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -0,0 +1,7 @@
((import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
alertmanager+: {
config: importstr 'alertmanager-config.yaml',
},
},
}).alertmanager.secret

View File

@@ -0,0 +1,22 @@
((import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
alertmanager+: {
config: |||
global:
resolve_timeout: 10m
route:
group_by: ['job']
group_wait: 30s
group_interval: 5m
repeat_interval: 12h
receiver: 'null'
routes:
- match:
alertname: Watchdog
receiver: 'null'
receivers:
- name: 'null'
|||,
},
},
}).alertmanager.secret

View File

@@ -0,0 +1,15 @@
# external alertmanager yaml
global:
resolve_timeout: 10m
route:
group_by: ['job']
group_wait: 30s
group_interval: 5m
repeat_interval: 12h
receiver: 'null'
routes:
- match:
alertname: Watchdog
receiver: 'null'
receivers:
- name: 'null'

2
examples/auth Normal file
View File

@@ -0,0 +1,2 @@
# This file should not ever be used, it's just a mock.
dontusethis:$apr1$heg6VIp7$1PSzJ/Z6fYboQ5pYrbgSy.

View File

@@ -0,0 +1,8 @@
apiVersion: v1
kind: Secret
metadata:
name: basic-auth
data:
password: dG9vcg== # toor
user: YWRtaW4= # admin
type: Opaque

View File

@@ -0,0 +1,22 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
k8s-apps: basic-auth-example
name: basic-auth-example
spec:
endpoints:
- basicAuth:
password:
name: basic-auth
key: password
username:
name: basic-auth
key: user
port: metrics
namespaceSelector:
matchNames:
- logging
selector:
matchLabels:
app: myapp

View File

0
examples/etcd-client.crt Normal file
View File

0
examples/etcd-client.key Normal file
View File

View File

@@ -0,0 +1,22 @@
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-static-etcd.libsonnet') + {
_config+:: {
namespace: 'monitoring',
etcd+:: {
ips: ['127.0.0.1'],
clientCA: importstr 'etcd-client-ca.crt',
clientKey: importstr 'etcd-client.key',
clientCert: importstr 'etcd-client.crt',
insecureSkipVerify: true,
},
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

53
examples/etcd.jsonnet Normal file
View File

@@ -0,0 +1,53 @@
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-static-etcd.libsonnet') + {
_config+:: {
namespace: 'monitoring',
// Reference info: https://github.com/coreos/prometheus-operator/blob/master/contrib/kube-prometheus/README.md#static-etcd-configuration
etcd+:: {
// Configure this to be the IP(s) to scrape - i.e. your etcd node(s) (use commas to separate multiple values).
ips: ['127.0.0.1'],
// Reference info:
// * https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitorspec (has endpoints)
// * https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#endpoint (has tlsConfig)
// * https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#tlsconfig (has: caFile, certFile, keyFile, serverName, & insecureSkipVerify)
// Set these three variables to the fully qualified directory path on your work machine to the certificate files that are valid to scrape etcd metrics with (check the apiserver container).
// Most likely these certificates are generated somewhere in an infrastructure repository, so using the jsonnet `importstr` function can
// be useful here. (Kube-aws stores these three files inside the credential folder.)
// All the sensitive information on the certificates will end up in a Kubernetes Secret.
clientCA: importstr 'etcd-client-ca.crt',
clientKey: importstr 'etcd-client.key',
clientCert: importstr 'etcd-client.crt',
// Note that you should specify a value EITHER for 'serverName' OR for 'insecureSkipVerify'. (Don't specify a value for both of them, and don't specify a value for neither of them.)
// * Specifying serverName: Ideally you should provide a valid value for serverName (and then insecureSkipVerify should be left as false - so that serverName gets used).
// * Specifying insecureSkipVerify: insecureSkipVerify is only to be used (i.e. set to true) if you cannot (based on how your etcd certificates were created) use a Subject Alternative Name.
// * If you specify a value:
// ** for both of these variables: When 'insecureSkipVerify: true' is specified, then also specifying a value for serverName won't hurt anything but it will be ignored.
// ** for neither of these variables: then you'll get authentication errors on the prom '/targets' page with your etcd targets.
// A valid name (DNS or Subject Alternative Name) that the client (i.e. prometheus) will use to verify the etcd TLS certificate.
// * Note that doing `nslookup etcd.kube-system.svc.cluster.local` (on a pod in a K8s cluster where kube-prometheus has been installed) shows that kube-prometheus sets up this hostname.
// * `openssl x509 -noout -text -in etcd-client.pem` will print the Subject Alternative Names.
serverName: 'etcd.kube-system.svc.cluster.local',
// When insecureSkipVerify isn't specified, the default value is "false".
//insecureSkipVerify: true,
// In case you have generated the etcd certificate with kube-aws:
// * If you only have one etcd node, you can use the value from 'etcd.internalDomainName' (specified in your kube-aws cluster.yaml) as the value for 'serverName'.
// * But if you have multiple etcd nodes, you will need to use 'insecureSkipVerify: true' (if using default certificate generators method), as the valid certificate domain
// will be different for each etcd node. (kube-aws default certificates are not valid against the IP - they were created for the DNS.)
},
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -0,0 +1,36 @@
kind: Service
apiVersion: v1
metadata:
name: example-app
labels:
tier: frontend
namespace: default
spec:
selector:
app: example-app
ports:
- name: web
protocol: TCP
port: 8080
targetPort: web
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: example-app
namespace: default
spec:
replicas: 4
template:
metadata:
labels:
app: example-app
version: 1.1.3
spec:
containers:
- name: example-app
image: quay.io/fabxc/prometheus_demo_service
ports:
- name: web
containerPort: 8080
protocol: TCP

View File

@@ -0,0 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: RoleBinding
metadata:
name: prometheus-frontend
namespace: monitoring
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: alertmanager-discovery
subjects:
- kind: ServiceAccount
name: prometheus-frontend
namespace: default

View File

@@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: Role
metadata:
name: alertmanager-discovery
namespace: monitoring
rules:
- apiGroups: [""]
resources:
- services
- endpoints
- pods
verbs: ["list", "watch"]

View File

@@ -0,0 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: RoleBinding
metadata:
name: prometheus-frontend
namespace: default
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-frontend
subjects:
- kind: ServiceAccount
name: prometheus-frontend
namespace: default

View File

@@ -0,0 +1,17 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: Role
metadata:
name: prometheus-frontend
namespace: default
rules:
- apiGroups: [""]
resources:
- nodes
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources:
- configmaps
verbs: ["get"]

View File

@@ -0,0 +1,5 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus-frontend
namespace: default

View File

@@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: prometheus-frontend
namespace: default
spec:
type: NodePort
ports:
- name: web
nodePort: 30100
port: 9090
protocol: TCP
targetPort: web
selector:
prometheus: frontend

View File

@@ -0,0 +1,25 @@
apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
name: frontend
namespace: default
labels:
prometheus: frontend
spec:
serviceAccountName: prometheus-frontend
version: v1.7.1
serviceMonitorSelector:
matchLabels:
tier: frontend
resources:
requests:
# 2Gi is default, but won't schedule if you don't have a node with >2Gi
# memory. Modify based on your target and time-series count for
# production use. This value is mainly meant for demonstration/testing
# purposes.
memory: 400Mi
alerting:
alertmanagers:
- namespace: monitoring
name: alertmanager-main
port: web

View File

@@ -0,0 +1,19 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: frontend
namespace: default
labels:
tier: frontend
spec:
selector:
matchLabels:
tier: frontend
targetLabels:
- tier
endpoints:
- port: web
interval: 10s
namespaceSelector:
matchNames:
- default

View File

@@ -0,0 +1,177 @@
{
"annotations": {
"list": [
]
},
"editable": false,
"gnetid": null,
"graphtooltip": 0,
"hidecontrols": false,
"id": null,
"links": [
],
"refresh": "",
"rows": [
{
"collapse": false,
"collapsed": false,
"height": "250px",
"panels": [
{
"aliascolors": {
},
"bars": false,
"dashlength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridpos": {
},
"id": 2,
"legend": {
"alignastable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightside": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullpointmode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesoverrides": [
],
"spacelength": 10,
"span": 6,
"stack": false,
"steppedline": false,
"targets": [
{
"expr": "vector(1)",
"format": "time_series",
"intervalfactor": 2,
"legendformat": "",
"refid": "a"
}
],
"thresholds": [
],
"timefrom": null,
"timeshift": null,
"title": "my panel",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "short",
"label": null,
"logbase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logbase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatiteration": null,
"repeatrowid": null,
"showtitle": false,
"title": "dashboard row",
"titlesize": "h6",
"type": "row"
}
],
"schemaversion": 14,
"style": "dark",
"tags": [
],
"templating": {
"list": [
{
"current": {
"text": "prometheus",
"value": "prometheus"
},
"hide": 0,
"label": null,
"name": "datasource",
"options": [
],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "my dashboard",
"version": 0
}

View File

@@ -0,0 +1 @@
{"groups":[{"name":"example-group","rules":[{"alert":"Watchdog","annotations":{"description":"This is a Watchdog meant to ensure that the entire alerting pipeline is functional."},"expr":"vector(1)","labels":{"severity":"none"}}]}]}

View File

@@ -0,0 +1,9 @@
groups:
- name: example-group
rules:
- alert: Watchdog
expr: vector(1)
labels:
severity: "none"
annotations:
description: This is a Watchdog meant to ensure that the entire alerting pipeline is functional.

View File

@@ -0,0 +1,45 @@
local grafana = import 'grafonnet/grafana.libsonnet';
local dashboard = grafana.dashboard;
local row = grafana.row;
local prometheus = grafana.prometheus;
local template = grafana.template;
local graphPanel = grafana.graphPanel;
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
grafanaDashboards+:: {
'my-dashboard.json':
dashboard.new('My Dashboard')
.addTemplate(
{
current: {
text: 'Prometheus',
value: 'Prometheus',
},
hide: 0,
label: null,
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: '',
type: 'datasource',
},
)
.addRow(
row.new()
.addPanel(graphPanel.new('My Panel', span=6, datasource='$datasource')
.addTarget(prometheus.target('vector(1)')))
),
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -0,0 +1,16 @@
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
grafanaDashboards+:: {
'my-dashboard.json': (import 'example-grafana-dashboard.json'),
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

104
examples/ingress.jsonnet Normal file
View File

@@ -0,0 +1,104 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local secret = k.core.v1.secret;
local ingress = k.extensions.v1beta1.ingress;
local ingressTls = ingress.mixin.spec.tlsType;
local ingressRule = ingress.mixin.spec.rulesType;
local httpIngressPath = ingressRule.mixin.http.pathsType;
local kp =
(import 'kube-prometheus/kube-prometheus.libsonnet') +
{
_config+:: {
namespace: 'monitoring',
},
// Configure External URL's per application
alertmanager+:: {
alertmanager+: {
spec+: {
externalUrl: 'http://alertmanager.example.com',
},
},
},
grafana+:: {
config+: {
sections+: {
server+: {
root_url: 'http://grafana.example.com/',
},
},
},
},
prometheus+:: {
prometheus+: {
spec+: {
externalUrl: 'http://prometheus.example.com',
},
},
},
// Create ingress objects per application
ingress+:: {
'alertmanager-main':
ingress.new() +
ingress.mixin.metadata.withName('alertmanager-main') +
ingress.mixin.metadata.withNamespace($._config.namespace) +
ingress.mixin.metadata.withAnnotations({
'nginx.ingress.kubernetes.io/auth-type': 'basic',
'nginx.ingress.kubernetes.io/auth-secret': 'basic-auth',
'nginx.ingress.kubernetes.io/auth-realm': 'Authentication Required',
}) +
ingress.mixin.spec.withRules(
ingressRule.new() +
ingressRule.withHost('alertmanager.example.com') +
ingressRule.mixin.http.withPaths(
httpIngressPath.new() +
httpIngressPath.mixin.backend.withServiceName('alertmanager-main') +
httpIngressPath.mixin.backend.withServicePort('web')
),
),
grafana:
ingress.new() +
ingress.mixin.metadata.withName('grafana') +
ingress.mixin.metadata.withNamespace($._config.namespace) +
ingress.mixin.metadata.withAnnotations({
'nginx.ingress.kubernetes.io/auth-type': 'basic',
'nginx.ingress.kubernetes.io/auth-secret': 'basic-auth',
'nginx.ingress.kubernetes.io/auth-realm': 'Authentication Required',
}) +
ingress.mixin.spec.withRules(
ingressRule.new() +
ingressRule.withHost('grafana.example.com') +
ingressRule.mixin.http.withPaths(
httpIngressPath.new() +
httpIngressPath.mixin.backend.withServiceName('grafana') +
httpIngressPath.mixin.backend.withServicePort('http')
),
),
'prometheus-k8s':
ingress.new() +
ingress.mixin.metadata.withName('prometheus-k8s') +
ingress.mixin.metadata.withNamespace($._config.namespace) +
ingress.mixin.metadata.withAnnotations({
'nginx.ingress.kubernetes.io/auth-type': 'basic',
'nginx.ingress.kubernetes.io/auth-secret': 'basic-auth',
'nginx.ingress.kubernetes.io/auth-realm': 'Authentication Required',
}) +
ingress.mixin.spec.withRules(
ingressRule.new() +
ingressRule.withHost('prometheus.example.com') +
ingressRule.mixin.http.withPaths(
httpIngressPath.new() +
httpIngressPath.mixin.backend.withServiceName('prometheus-k8s') +
httpIngressPath.mixin.backend.withServicePort('web')
),
),
},
} + {
// Create basic auth secret - replace 'auth' file with your own
ingress+:: {
'basic-auth-secret':
secret.new('basic-auth', { auth: std.base64(importstr 'auth') }) +
secret.mixin.metadata.withNamespace($._config.namespace),
},
};
{ [name + '-ingress']: kp.ingress[name] for name in std.objectFields(kp.ingress) }

View File

@@ -0,0 +1,14 @@
local mixin = import 'kube-prometheus/kube-prometheus-config-mixins.libsonnet';
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
} + mixin.withImageRepository('internal-registry.com/organization');
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -0,0 +1,7 @@
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -0,0 +1,2 @@
(import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-bootkube.libsonnet')

View File

@@ -0,0 +1,3 @@
(import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-kops.libsonnet') +
(import 'kube-prometheus/kube-prometheus-kops-coredns.libsonnet')

View File

@@ -0,0 +1,2 @@
(import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-kops.libsonnet')

View File

@@ -0,0 +1,2 @@
(import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-kube-aws.libsonnet')

View File

@@ -0,0 +1,2 @@
(import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-kubeadm.libsonnet')

View File

@@ -0,0 +1,2 @@
(import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-kubespray.libsonnet')

View File

@@ -0,0 +1,2 @@
(import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-node-ports.libsonnet')

View File

@@ -0,0 +1,9 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local daemonset = k.apps.v1beta2.daemonSet;
((import 'kube-prometheus/kube-prometheus.libsonnet') + {
nodeExporter+: {
daemonset+:
daemonset.mixin.metadata.withNamespace('my-custom-namespace'),
},
}).nodeExporter.daemonset

View File

@@ -0,0 +1,28 @@
local kp =
(import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
};
local manifests =
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) };
local kustomizationResourceFile(name) = './manifests/' + name + '.yaml';
local kustomization = {
apiVersion: 'kustomize.config.k8s.io/v1beta1',
kind: 'Kustomization',
resources: std.map(kustomizationResourceFile, std.objectFields(manifests)),
};
manifests {
'../kustomization': kustomization,
}

59
examples/minikube.jsonnet Normal file
View File

@@ -0,0 +1,59 @@
local kp =
(import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-kubeadm.libsonnet') +
// Note that NodePort type services is likely not a good idea for your production use case, it is only used for demonstration purposes here.
(import 'kube-prometheus/kube-prometheus-node-ports.libsonnet') +
{
_config+:: {
namespace: 'monitoring',
alertmanager+:: {
config: importstr 'alertmanager-config.yaml',
},
grafana+:: {
config: { // http://docs.grafana.org/installation/configuration/
sections: {
// Do not require grafana users to login/authenticate
'auth.anonymous': { enabled: true },
},
},
},
},
// For simplicity, each of the following values for 'externalUrl':
// * assume that `minikube ip` prints "192.168.99.100"
// * hard-code the NodePort for each app
prometheus+:: {
prometheus+: {
// Reference info: https://coreos.com/operators/prometheus/docs/latest/api.html#prometheusspec
spec+: {
// An e.g. of the purpose of this is so the "Source" links on http://<alert-manager>/#/alerts are valid.
externalUrl: 'http://192.168.99.100:30900',
// Reference info: "external_labels" on https://prometheus.io/docs/prometheus/latest/configuration/configuration/
externalLabels: {
// This 'cluster' label will be included on every firing prometheus alert. (This is more useful
// when running multiple clusters in a shared environment (e.g. AWS) with other users.)
cluster: 'minikube-<INSERT YOUR USERNAME HERE>',
},
},
},
},
alertmanager+:: {
alertmanager+: {
// Reference info: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#alertmanagerspec
spec+: {
externalUrl: 'http://192.168.99.100:30903',
logLevel: 'debug', // So firing alerts show up in log
},
},
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -0,0 +1,32 @@
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
prometheusAlerts+:: {
groups+: [
{
name: 'example-group',
rules: [
{
alert: 'Watchdog',
expr: 'vector(1)',
labels: {
severity: 'none',
},
annotations: {
description: 'This is a Watchdog meant to ensure that the entire alerting pipeline is functional.',
},
},
],
},
],
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -0,0 +1,26 @@
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
prometheusRules+:: {
groups+: [
{
name: 'example-group',
rules: [
{
record: 'some_recording_rule_name',
expr: 'vector(1)',
},
],
},
],
},
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -0,0 +1,11 @@
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
prometheusAlerts+:: (import 'existingrule.json'),
};
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -0,0 +1,9 @@
((import 'kube-prometheus/kube-prometheus.libsonnet') + {
prometheus+: {
prometheus+: {
metadata+: {
name: 'my-name',
},
},
},
}).prometheus.prometheus

View File

@@ -0,0 +1,58 @@
// Reference info: documentation for https://github.com/ksonnet/ksonnet-lib can be found at http://g.bryan.dev.hepti.center
//
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; // https://github.com/ksonnet/ksonnet-lib/blob/master/ksonnet.beta.3/k.libsonnet - imports k8s.libsonnet
// * https://github.com/ksonnet/ksonnet-lib/blob/master/ksonnet.beta.3/k8s.libsonnet defines things such as "persistentVolumeClaim:: {"
//
local pvc = k.core.v1.persistentVolumeClaim; // https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.11/#persistentvolumeclaim-v1-core (defines variable named 'spec' of type 'PersistentVolumeClaimSpec')
local kp =
(import 'kube-prometheus/kube-prometheus.libsonnet') +
(import 'kube-prometheus/kube-prometheus-bootkube.libsonnet') +
{
_config+:: {
namespace: 'monitoring',
},
prometheus+:: {
prometheus+: {
spec+: { // https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#prometheusspec
// If a value isn't specified for 'retention', then by default the '--storage.tsdb.retention=24h' arg will be passed to prometheus by prometheus-operator.
// The possible values for a prometheus <duration> are:
// * https://github.com/prometheus/common/blob/c7de230/model/time.go#L178 specifies "^([0-9]+)(y|w|d|h|m|s|ms)$" (years weeks days hours minutes seconds milliseconds)
retention: '30d',
// Reference info: https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md
// By default (if the following 'storage.volumeClaimTemplate' isn't created), prometheus will be created with an EmptyDir for the 'prometheus-k8s-db' volume (for the prom tsdb).
// This 'storage.volumeClaimTemplate' causes the following to be automatically created (via dynamic provisioning) for each prometheus pod:
// * PersistentVolumeClaim (and a corresponding PersistentVolume)
// * the actual volume (per the StorageClassName specified below)
storage: { // https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#storagespec
volumeClaimTemplate: // (same link as above where the 'pvc' variable is defined)
pvc.new() + // http://g.bryan.dev.hepti.center/core/v1/persistentVolumeClaim/#core.v1.persistentVolumeClaim.new
pvc.mixin.spec.withAccessModes('ReadWriteOnce') +
// https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.11/#resourcerequirements-v1-core (defines 'requests'),
// and https://kubernetes.io/docs/concepts/policy/resource-quotas/#storage-resource-quota (defines 'requests.storage')
pvc.mixin.spec.resources.withRequests({ storage: '100Gi' }) +
// A StorageClass of the following name (which can be seen via `kubectl get storageclass` from a node in the given K8s cluster) must exist prior to kube-prometheus being deployed.
pvc.mixin.spec.withStorageClassName('ssd'),
// The following 'selector' is only needed if you're using manual storage provisioning (https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md#manual-storage-provisioning).
// And note that this is not supported/allowed by AWS - uncommenting the following 'selector' line (when deploying kube-prometheus to a K8s cluster in AWS) will cause the pvc to be stuck in the Pending status and have the following error:
// * 'Failed to provision volume with StorageClass "ssd": claim.Spec.Selector is not supported for dynamic provisioning on AWS'
//pvc.mixin.spec.selector.withMatchLabels({}),
}, // storage
}, // spec
}, // prometheus
}, // prometheus
};
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -0,0 +1,7 @@
apiserver-key.pem
apiserver.csr
apiserver.pem
metrics-ca-config.json
metrics-ca.crt
metrics-ca.key
cm-adapter-serving-certs.yaml

View File

@@ -0,0 +1,21 @@
# Custom Metrics API
The custom metrics API allows the HPA v2 to scale based on arbirary metrics.
This directory contains an example deployment which extends the Prometheus Adapter, deployed with kube-prometheus, serve the [Custom Metrics API](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/instrumentation/custom-metrics-api.md) by talking to Prometheus running inside the cluster.
Make sure you have the Prometheus Adapter up and running in the `monitoring` namespace.
You can deploy everything in the `monitoring` namespace using `./deploy.sh`.
When you're done, you can teardown using the `./teardown.sh` script.
### Sample App
Additionally, this directory contains a sample app that uses the [Horizontal Pod Autoscaler](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/) to scale the Deployment's replicas of Pods up and down as needed.
Deploy this app by running `kubectl apply -f sample-app.yaml`.
Make the app accessible on your system, for example by using `kubectl port-forward svc/sample-app 8080`. Next you need to put some load on its http endpoints.
A tool like [hey](https://github.com/rakyll/hey) is helpful for doing so: `hey -c 20 -n 100000000 http://localhost:8080/metrics`
There is an even more detailed information on this sample app at [luxas/kubeadm-workshop](https://github.com/luxas/kubeadm-workshop#deploying-the-prometheus-operator-for-monitoring-services-in-the-cluster).

View File

@@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: custom-metrics-server-resources
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: custom-metrics-server-resources
subjects:
- kind: ServiceAccount
name: prometheus-adapter
namespace: monitoring

View File

@@ -0,0 +1,13 @@
apiVersion: apiregistration.k8s.io/v1beta1
kind: APIService
metadata:
name: v1beta1.custom.metrics.k8s.io
spec:
service:
name: prometheus-adapter
namespace: monitoring
group: custom.metrics.k8s.io
version: v1beta1
insecureSkipTLSVerify: true
groupPriorityMinimum: 100
versionPriority: 100

View File

@@ -0,0 +1,9 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: custom-metrics-server-resources
rules:
- apiGroups:
- custom.metrics.k8s.io
resources: ["*"]
verbs: ["*"]

View File

@@ -0,0 +1,98 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: adapter-config
namespace: monitoring
data:
config.yaml: |
rules:
- seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}'
seriesFilters: []
resources:
overrides:
namespace:
resource: namespace
pod_name:
resource: pod
name:
matches: ^container_(.*)_seconds_total$
as: ""
metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[1m])) by (<<.GroupBy>>)
- seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}'
seriesFilters:
- isNot: ^container_.*_seconds_total$
resources:
overrides:
namespace:
resource: namespace
pod_name:
resource: pod
name:
matches: ^container_(.*)_total$
as: ""
metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[1m])) by (<<.GroupBy>>)
- seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}'
seriesFilters:
- isNot: ^container_.*_total$
resources:
overrides:
namespace:
resource: namespace
pod_name:
resource: pod
name:
matches: ^container_(.*)$
as: ""
metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}) by (<<.GroupBy>>)
- seriesQuery: '{namespace!="",__name__!~"^container_.*"}'
seriesFilters:
- isNot: .*_total$
resources:
template: <<.Resource>>
name:
matches: ""
as: ""
metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)
- seriesQuery: '{namespace!="",__name__!~"^container_.*"}'
seriesFilters:
- isNot: .*_seconds_total
resources:
template: <<.Resource>>
name:
matches: ^(.*)_total$
as: ""
metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)
- seriesQuery: '{namespace!="",__name__!~"^container_.*"}'
seriesFilters: []
resources:
template: <<.Resource>>
name:
matches: ^(.*)_seconds_total$
as: ""
metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)
resourceRules:
cpu:
containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)
nodeQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>, id='/'}[1m])) by (<<.GroupBy>>)
resources:
overrides:
node:
resource: node
namespace:
resource: namespace
pod_name:
resource: pod
containerLabel: container_name
memory:
containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>}) by (<<.GroupBy>>)
nodeQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,id='/'}) by (<<.GroupBy>>)
resources:
overrides:
node:
resource: node
namespace:
resource: namespace
pod_name:
resource: pod
containerLabel: container_name
window: 1m

View File

@@ -0,0 +1,7 @@
#!/usr/bin/env bash
kubectl apply -n monitoring -f custom-metrics-apiserver-resource-reader-cluster-role-binding.yaml
kubectl apply -n monitoring -f custom-metrics-apiservice.yaml
kubectl apply -n monitoring -f custom-metrics-cluster-role.yaml
kubectl apply -n monitoring -f custom-metrics-configmap.yaml
kubectl apply -n monitoring -f hpa-custom-metrics-cluster-role-binding.yaml

View File

@@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: hpa-controller-custom-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: custom-metrics-server-resources
subjects:
- kind: ServiceAccount
name: horizontal-pod-autoscaler
namespace: kube-system

View File

@@ -0,0 +1,67 @@
kind: ServiceMonitor
apiVersion: monitoring.coreos.com/v1
metadata:
name: sample-app
labels:
app: sample-app
spec:
selector:
matchLabels:
app: sample-app
endpoints:
- port: http
interval: 5s
---
apiVersion: v1
kind: Service
metadata:
name: sample-app
labels:
app: sample-app
spec:
ports:
- name: http
port: 8080
targetPort: 8080
selector:
app: sample-app
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: sample-app
labels:
app: sample-app
spec:
replicas: 1
selector:
matchLabels:
app: sample-app
template:
metadata:
labels:
app: sample-app
spec:
containers:
- image: luxas/autoscale-demo:v0.1.2
name: metrics-provider
ports:
- name: http
containerPort: 8080
---
kind: HorizontalPodAutoscaler
apiVersion: autoscaling/v2beta1
metadata:
name: sample-app
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: sample-app
minReplicas: 1
maxReplicas: 10
metrics:
- type: Pods
pods:
metricName: http_requests
targetAverageValue: 500m

View File

@@ -0,0 +1,7 @@
#!/usr/bin/env bash
kubectl delete -n monitoring -f custom-metrics-apiserver-resource-reader-cluster-role-binding.yaml
kubectl delete -n monitoring -f custom-metrics-apiservice.yaml
kubectl delete -n monitoring -f custom-metrics-cluster-role.yaml
kubectl delete -n monitoring -f custom-metrics-configmap.yaml
kubectl delete -n monitoring -f hpa-custom-metrics-cluster-role-binding.yaml

View File

@@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: metrics-server:system:auth-delegator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:auth-delegator
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system

View File

@@ -0,0 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: RoleBinding
metadata:
name: metrics-server-auth-reader
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system

View File

@@ -0,0 +1,13 @@
apiVersion: apiregistration.k8s.io/v1beta1
kind: APIService
metadata:
name: v1beta1.metrics.k8s.io
spec:
service:
name: metrics-server
namespace: kube-system
group: metrics.k8s.io
version: v1beta1
insecureSkipTLSVerify: true
groupPriorityMinimum: 100
versionPriority: 100

View File

@@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: system:metrics-server
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:metrics-server
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system

View File

@@ -0,0 +1,24 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: system:metrics-server
rules:
- apiGroups:
- ""
resources:
- pods
- nodes
- nodes/stats
- namespaces
verbs:
- get
- list
- watch
- apiGroups:
- "extensions"
resources:
- deployments
verbs:
- get
- list
- watch

View File

@@ -0,0 +1,25 @@
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: metrics-server
namespace: kube-system
labels:
k8s-app: metrics-server
spec:
selector:
matchLabels:
k8s-app: metrics-server
template:
metadata:
name: metrics-server
labels:
k8s-app: metrics-server
spec:
serviceAccountName: metrics-server
containers:
- name: metrics-server
image: gcr.io/google_containers/metrics-server-amd64:v0.2.0
imagePullPolicy: Always
command:
- /metrics-server
- --source=kubernetes.summary_api:''

View File

@@ -0,0 +1,5 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: metrics-server
namespace: kube-system

View File

@@ -0,0 +1,14 @@
apiVersion: v1
kind: Service
metadata:
name: metrics-server
namespace: kube-system
labels:
kubernetes.io/name: "Metrics-server"
spec:
selector:
k8s-app: metrics-server
ports:
- port: 443
protocol: TCP
targetPort: 443

17
grafana-image/Dockerfile Normal file
View File

@@ -0,0 +1,17 @@
FROM debian:9.3-slim
ARG GRAFANA_VERSION
RUN apt-get update && apt-get install -qq -y wget tar sqlite && \
wget -O /tmp/grafana.tar.gz https://s3-us-west-2.amazonaws.com/grafana-releases/release/grafana-$GRAFANA_VERSION.linux-x64.tar.gz && \
tar -zxvf /tmp/grafana.tar.gz -C /tmp && mv /tmp/grafana-$GRAFANA_VERSION /grafana && \
rm -rf /tmp/grafana.tar.gz
ADD config.ini /grafana/conf/config.ini
USER nobody
EXPOSE 3000
VOLUME [ "/data" ]
WORKDIR /grafana
ENTRYPOINT [ "/grafana/bin/grafana-server" ]
CMD [ "-config=/grafana/conf/config.ini" ]

5
grafana-image/Makefile Normal file
View File

@@ -0,0 +1,5 @@
VERSION=5.0.3
IMAGE_TAG=$(VERSION)
container:
docker build --build-arg GRAFANA_VERSION=$(VERSION) -t quay.io/coreos/monitoring-grafana:$(IMAGE_TAG) .

16
grafana-image/config.ini Normal file
View File

@@ -0,0 +1,16 @@
[database]
path = /data/grafana.db
[paths]
data = /data
logs = /data/log
plugins = /data/plugins
[session]
provider = memory
[auth.basic]
enabled = false
[auth.anonymous]
enabled = true

View File

@@ -0,0 +1,9 @@
#!/usr/bin/env bash
# exit immediately when a command fails
set -e
# only exit with zero if all commands of the pipeline exit successfully
set -o pipefail
# error on unset variables
set -u
kubectl apply -f examples/example-app

View File

@@ -0,0 +1,9 @@
#!/usr/bin/env bash
# exit immediately when a command fails
set -e
# only exit with zero if all commands of the pipeline exit successfully
set -o pipefail
# error on unset variables
set -u
kubectl delete -f examples/example-app

2
jsonnet/kube-prometheus/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
jsonnetfile.lock.json
vendor/

View File

@@ -0,0 +1,125 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
{
_config+:: {
namespace: 'default',
versions+:: {
alertmanager: 'v0.16.1',
},
imageRepos+:: {
alertmanager: 'quay.io/prometheus/alertmanager',
},
alertmanager+:: {
name: $._config.alertmanager.name,
config: {
global: {
resolve_timeout: '5m',
},
route: {
group_by: ['job'],
group_wait: '30s',
group_interval: '5m',
repeat_interval: '12h',
receiver: 'null',
routes: [
{
receiver: 'null',
match: {
alertname: 'Watchdog',
},
},
],
},
receivers: [
{
name: 'null',
},
],
},
replicas: 3,
},
},
alertmanager+:: {
secret:
local secret = k.core.v1.secret;
if std.type($._config.alertmanager.config) == 'object' then
secret.new('alertmanager-' + $._config.alertmanager.name, { 'alertmanager.yaml': std.base64(std.manifestYamlDoc($._config.alertmanager.config)) }) +
secret.mixin.metadata.withNamespace($._config.namespace)
else
secret.new('alertmanager-' + $._config.alertmanager.name, { 'alertmanager.yaml': std.base64($._config.alertmanager.config) }) +
secret.mixin.metadata.withNamespace($._config.namespace),
serviceAccount:
local serviceAccount = k.core.v1.serviceAccount;
serviceAccount.new('alertmanager-' + $._config.alertmanager.name) +
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
service:
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
local alertmanagerPort = servicePort.newNamed('web', 9093, 'web');
service.new('alertmanager-' + $._config.alertmanager.name, { app: 'alertmanager', alertmanager: $._config.alertmanager.name }, alertmanagerPort) +
service.mixin.spec.withSessionAffinity('ClientIP') +
service.mixin.metadata.withNamespace($._config.namespace) +
service.mixin.metadata.withLabels({ alertmanager: $._config.alertmanager.name }),
serviceMonitor:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'ServiceMonitor',
metadata: {
name: 'alertmanager',
namespace: $._config.namespace,
labels: {
'k8s-app': 'alertmanager',
},
},
spec: {
selector: {
matchLabels: {
alertmanager: $._config.alertmanager.name,
},
},
endpoints: [
{
port: 'web',
interval: '30s',
},
],
},
},
alertmanager:
{
apiVersion: 'monitoring.coreos.com/v1',
kind: 'Alertmanager',
metadata: {
name: $._config.alertmanager.name,
namespace: $._config.namespace,
labels: {
alertmanager: $._config.alertmanager.name,
},
},
spec: {
replicas: $._config.alertmanager.replicas,
version: $._config.versions.alertmanager,
baseImage: $._config.imageRepos.alertmanager,
nodeSelector: { 'beta.kubernetes.io/os': 'linux' },
serviceAccountName: 'alertmanager-' + $._config.alertmanager.name,
securityContext: {
runAsUser: 1000,
runAsNonRoot: true,
fsGroup: 2000,
},
},
},
},
}

View File

@@ -0,0 +1,52 @@
{
prometheusAlerts+:: {
groups+: [
{
name: 'alertmanager.rules',
rules: [
{
alert: 'AlertmanagerConfigInconsistent',
annotations: {
message: 'The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.',
},
expr: |||
count_values("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{%(prometheusOperatorSelector)s,controller="alertmanager"}, "service", "alertmanager-$1", "name", "(.*)") != 1
||| % $._config,
'for': '5m',
labels: {
severity: 'critical',
},
},
{
alert: 'AlertmanagerFailedReload',
annotations: {
message: "Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}.",
},
expr: |||
alertmanager_config_last_reload_successful{%(alertmanagerSelector)s} == 0
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert:'AlertmanagerMembersInconsistent',
annotations:{
message: 'Alertmanager has not found all other members of the cluster.',
},
expr: |||
alertmanager_cluster_members{%(alertmanagerSelector)s}
!= on (service) GROUP_LEFT()
count by (service) (alertmanager_cluster_members{%(alertmanagerSelector)s})
||| % $._config,
'for': '5m',
labels: {
severity: 'critical',
},
},
],
},
],
},
}

View File

@@ -0,0 +1,5 @@
(import 'alertmanager.libsonnet') +
(import 'general.libsonnet') +
(import 'node.libsonnet') +
(import 'prometheus.libsonnet') +
(import 'prometheus-operator.libsonnet')

View File

@@ -0,0 +1,38 @@
{
prometheusAlerts+:: {
groups+: [
{
name: 'general.rules',
rules: [
{
alert: 'TargetDown',
annotations: {
message: '{{ $value }}% of the {{ $labels.job }} targets are down.',
},
expr: '100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10',
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert: 'Watchdog',
annotations: {
message: |||
This is an alert meant to ensure that the entire alerting pipeline is functional.
This alert is always firing, therefore it should always be firing in Alertmanager
and always fire against a receiver. There are integrations with various notification
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
|||,
},
expr: 'vector(1)',
labels: {
severity: 'none',
},
},
],
},
],
},
}

View File

@@ -0,0 +1,112 @@
{
prometheusAlerts+:: {
groups+: [
{
name: 'kube-prometheus-node-alerting.rules',
rules: [
{
alert: 'NodeDiskRunningFull',
annotations: {
message: 'Device {{ $labels.device }} of node-exporter {{ $labels.namespace }}/{{ $labels.pod }} will be full within the next 24 hours.',
},
expr: |||
(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[6h], 3600 * 24) < 0)
||| % $._config,
'for': '30m',
labels: {
severity: 'warning',
},
},
{
alert: 'NodeDiskRunningFull',
annotations: {
message: 'Device {{ $labels.device }} of node-exporter {{ $labels.namespace }}/{{ $labels.pod }} will be full within the next 2 hours.',
},
expr: |||
(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[30m], 3600 * 2) < 0)
||| % $._config,
'for': '10m',
labels: {
severity: 'critical',
},
},
],
},
{
name: 'node-time',
rules: [
{
alert: 'ClockSkewDetected',
annotations: {
message: 'Clock skew detected on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}. Ensure NTP is configured correctly on this host.',
},
expr: |||
node_ntp_offset_seconds{%(nodeExporterSelector)s} < -0.03 or node_ntp_offset_seconds{%(nodeExporterSelector)s} > 0.03
||| % $._config,
'for': '2m',
labels: {
severity: 'warning',
},
},
],
},
{
name: 'node-network',
rules: [
{
alert: 'NetworkReceiveErrors',
annotations: {
message: 'Network interface "{{ $labels.device }}" showing receive errors on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"',
},
expr: |||
rate(node_network_receive_errs_total{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 0
||| % $._config,
'for': '2m',
labels: {
severity: 'warning',
},
},
{
alert: 'NetworkTransmitErrors',
annotations: {
message: 'Network interface "{{ $labels.device }}" showing transmit errors on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"',
},
expr: |||
rate(node_network_transmit_errs_total{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 0
||| % $._config,
'for': '2m',
labels: {
severity: 'warning',
},
},
{
alert: 'NodeNetworkInterfaceDown',
annotations: {
message: 'Network interface "{{ $labels.device }}" down on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"',
},
expr: |||
node_network_up{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s} == 0
||| % $._config,
'for': '2m',
labels: {
severity: 'warning',
},
},
{
alert: 'NodeNetworkInterfaceFlapping',
annotations: {
message: 'Network interface "{{ $labels.device }}" changing it\'s up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"',
},
expr: |||
changes(node_network_up{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 2
||| % $._config,
'for': '2m',
labels: {
severity: 'warning',
},
},
],
},
],
},
}

View File

@@ -0,0 +1,37 @@
{
prometheusAlerts+:: {
groups+: [
{
name: 'prometheus-operator',
rules: [
{
alert: 'PrometheusOperatorReconcileErrors',
expr: |||
rate(prometheus_operator_reconcile_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
message: 'Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace }} Namespace.',
},
'for': '10m',
},
{
alert: 'PrometheusOperatorNodeLookupErrors',
expr: |||
rate(prometheus_operator_node_address_lookup_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
message: 'Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.',
},
'for': '10m',
},
],
},
],
},
}

View File

@@ -0,0 +1,151 @@
{
prometheusAlerts+:: {
groups+: [
{
name: 'prometheus.rules',
rules: [
{
alert: 'PrometheusConfigReloadFailed',
annotations: {
description: "Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}}",
summary: "Reloading Prometheus' configuration failed",
},
expr: |||
prometheus_config_last_reload_successful{%(prometheusSelector)s} == 0
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusNotificationQueueRunningFull',
annotations: {
description: "Prometheus' alert notification queue is running full for {{$labels.namespace}}/{{ $labels.pod}}",
summary: "Prometheus' alert notification queue is running full",
},
expr: |||
predict_linear(prometheus_notifications_queue_length{%(prometheusSelector)s}[5m], 60 * 30) > prometheus_notifications_queue_capacity{%(prometheusSelector)s}
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusErrorSendingAlerts',
annotations: {
description: 'Errors while sending alerts from Prometheus {{$labels.namespace}}/{{ $labels.pod}} to Alertmanager {{$labels.Alertmanager}}',
summary: 'Errors while sending alert from Prometheus',
},
expr: |||
rate(prometheus_notifications_errors_total{%(prometheusSelector)s}[5m]) / rate(prometheus_notifications_sent_total{%(prometheusSelector)s}[5m]) > 0.01
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusErrorSendingAlerts',
annotations: {
description: 'Errors while sending alerts from Prometheus {{$labels.namespace}}/{{ $labels.pod}} to Alertmanager {{$labels.Alertmanager}}',
summary: 'Errors while sending alerts from Prometheus',
},
expr: |||
rate(prometheus_notifications_errors_total{%(prometheusSelector)s}[5m]) / rate(prometheus_notifications_sent_total{%(prometheusSelector)s}[5m]) > 0.03
||| % $._config,
'for': '10m',
labels: {
severity: 'critical',
},
},
{
alert: 'PrometheusNotConnectedToAlertmanagers',
annotations: {
description: 'Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected to any Alertmanagers',
summary: 'Prometheus is not connected to any Alertmanagers',
},
expr: |||
prometheus_notifications_alertmanagers_discovered{%(prometheusSelector)s} < 1
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusTSDBReloadsFailing',
annotations: {
description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}} reload failures over the last four hours.',
summary: 'Prometheus has issues reloading data blocks from disk',
},
expr: |||
increase(prometheus_tsdb_reloads_failures_total{%(prometheusSelector)s}[2h]) > 0
||| % $._config,
'for': '12h',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusTSDBCompactionsFailing',
annotations: {
description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}} compaction failures over the last four hours.',
summary: 'Prometheus has issues compacting sample blocks',
},
expr: |||
increase(prometheus_tsdb_compactions_failed_total{%(prometheusSelector)s}[2h]) > 0
||| % $._config,
'for': '12h',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusTSDBWALCorruptions',
annotations: {
description: '{{$labels.job}} at {{$labels.instance}} has a corrupted write-ahead log (WAL).',
summary: 'Prometheus write-ahead log is corrupted',
},
expr: |||
prometheus_tsdb_wal_corruptions_total{%(prometheusSelector)s} > 0
||| % $._config,
'for': '4h',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusNotIngestingSamples',
annotations: {
description: "Prometheus {{ $labels.namespace }}/{{ $labels.pod}} isn't ingesting samples.",
summary: "Prometheus isn't ingesting samples",
},
expr: |||
rate(prometheus_tsdb_head_samples_appended_total{%(prometheusSelector)s}[5m]) <= 0
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusTargetScrapesDuplicate',
annotations: {
description: '{{$labels.namespace}}/{{$labels.pod}} has many samples rejected due to duplicate timestamps but different values',
summary: 'Prometheus has many samples rejected',
},
expr: |||
increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{%(prometheusSelector)s}[5m]) > 0
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
],
},
],
},
}

View File

@@ -0,0 +1,157 @@
# TODO(metalmatze): This file is temporarily saved here for later reference
# until we find out how to integrate the tests into our jsonnet stack.
rule_files:
- rules.yaml
evaluation_interval: 1m
tests:
- interval: 1m
input_series:
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.0",namespace="monitoring",pod="alertmanager-main-0",service="alertmanager-main"}'
values: '3 3 3 3 3 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0'
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.1",namespace="monitoring",pod="alertmanager-main-1",service="alertmanager-main"}'
values: '3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3'
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.2",namespace="monitoring",pod="alertmanager-main-2",service="alertmanager-main"}'
values: '3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3'
alert_rule_test:
- eval_time: 5m
alertname: AlertmanagerMembersInconsistent
- eval_time: 11m
alertname: AlertmanagerMembersInconsistent
exp_alerts:
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.0
namespace: monitoring
pod: alertmanager-main-0
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- eval_time: 17m
alertname: AlertmanagerMembersInconsistent
exp_alerts:
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.0
namespace: monitoring
pod: alertmanager-main-0
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- eval_time: 23m
alertname: AlertmanagerMembersInconsistent
exp_alerts:
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.0
namespace: monitoring
pod: alertmanager-main-0
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- interval: 1m
input_series:
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.0",namespace="monitoring",pod="alertmanager-main-0",service="alertmanager-main"}'
values: '3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.1",namespace="monitoring",pod="alertmanager-main-1",service="alertmanager-main"}'
values: '3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2'
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.2",namespace="monitoring",pod="alertmanager-main-2",service="alertmanager-main"}'
values: '3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2'
alert_rule_test:
- eval_time: 5m
alertname: AlertmanagerMembersInconsistent
- eval_time: 11m
alertname: AlertmanagerMembersInconsistent
exp_alerts:
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.0
namespace: monitoring
pod: alertmanager-main-0
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.1
namespace: monitoring
pod: alertmanager-main-1
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.2
namespace: monitoring
pod: alertmanager-main-2
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- eval_time: 17m
alertname: AlertmanagerMembersInconsistent
exp_alerts:
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.0
namespace: monitoring
pod: alertmanager-main-0
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.1
namespace: monitoring
pod: alertmanager-main-1
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.2
namespace: monitoring
pod: alertmanager-main-2
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- eval_time: 23m
alertname: AlertmanagerMembersInconsistent
exp_alerts:
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.0
namespace: monitoring
pod: alertmanager-main-0
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.1
namespace: monitoring
pod: alertmanager-main-1
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.2
namespace: monitoring
pod: alertmanager-main-2
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'

View File

@@ -0,0 +1,54 @@
{
"dependencies": [
{
"name": "ksonnet",
"source": {
"git": {
"remote": "https://github.com/ksonnet/ksonnet-lib",
"subdir": ""
}
},
"version": "master"
},
{
"name": "kubernetes-mixin",
"source": {
"git": {
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
"subdir": ""
}
},
"version": "master"
},
{
"name": "grafana",
"source": {
"git": {
"remote": "https://github.com/brancz/kubernetes-grafana",
"subdir": "grafana"
}
},
"version": "master"
},
{
"name": "prometheus-operator",
"source": {
"git": {
"remote": "https://github.com/coreos/prometheus-operator",
"subdir": "jsonnet/prometheus-operator"
}
},
"version": "v0.29.0"
},
{
"name": "etcd-mixin",
"source": {
"git": {
"remote": "https://github.com/coreos/etcd",
"subdir": "Documentation/etcd-mixin"
}
},
"version": "master"
}
]
}

View File

@@ -0,0 +1,39 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local statefulSet = k.apps.v1beta2.statefulSet;
local affinity = statefulSet.mixin.spec.template.spec.affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecutionType;
local matchExpression = affinity.mixin.podAffinityTerm.labelSelector.matchExpressionsType;
{
local antiaffinity(key, values) = {
affinity: {
podAntiAffinity: {
preferredDuringSchedulingIgnoredDuringExecution: [
affinity.new() +
affinity.withWeight(100) +
affinity.mixin.podAffinityTerm.withNamespaces($._config.namespace) +
affinity.mixin.podAffinityTerm.withTopologyKey('kubernetes.io/hostname') +
affinity.mixin.podAffinityTerm.labelSelector.withMatchExpressions([
matchExpression.new() +
matchExpression.withKey(key) +
matchExpression.withOperator('In') +
matchExpression.withValues(values),
]),
],
},
},
},
alertmanager+:: {
alertmanager+: {
spec+:
antiaffinity('alertmanager', [$._config.alertmanager.name]),
},
},
prometheus+: {
prometheus+: {
spec+:
antiaffinity('prometheus', [$._config.prometheus.name]),
},
},
}

View File

@@ -0,0 +1,23 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
prometheus+:: {
kubeControllerManagerPrometheusDiscoveryService:
service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
service.mixin.spec.withClusterIp('None'),
kubeSchedulerPrometheusDiscoveryService:
service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('http-metrics', 10251, 10251)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
service.mixin.spec.withClusterIp('None'),
kubeDnsPrometheusDiscoveryService:
service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('http-metrics-skydns', 10055, 10055), servicePort.newNamed('http-metrics-dnsmasq', 10054, 10054)]) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) +
service.mixin.spec.withClusterIp('None'),
},
}

View File

@@ -0,0 +1,20 @@
local l = import 'lib/lib.libsonnet';
// withImageRepository is a mixin that replaces all images prefixes by repository. eg.
// quay.io/coreos/addon-resizer -> $repository/addon-resizer
// grafana/grafana -> grafana $repository/grafana
local withImageRepository(repository) = {
local oldRepos = super._config.imageRepos,
local substituteRepository(image, repository) =
if repository == null then image else repository + '/' + l.imageName(image),
_config+:: {
imageRepos:: {
[field]: substituteRepository(oldRepos[field], repository),
for field in std.objectFields(oldRepos)
}
},
};
{
withImageRepository:: withImageRepository,
}

View File

@@ -0,0 +1,25 @@
{
prometheus+:: {
serviceMonitorKubelet+:
{
spec+: {
endpoints: [
{
port: 'http-metrics',
scheme: 'http',
interval: '30s',
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
},
{
port: 'http-metrics',
scheme: 'http',
path: '/metrics/cadvisor',
interval: '30s',
honorLabels: true,
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
},
],
},
},
},
}

View File

@@ -0,0 +1,13 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
prometheus+:: {
kubeDnsPrometheusDiscoveryService:
service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('metrics', 9153, 9153)]) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) +
service.mixin.spec.withClusterIp('None'),
},
}

View File

@@ -0,0 +1,23 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
prometheus+:: {
kubeControllerManagerPrometheusDiscoveryService:
service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
service.mixin.spec.withClusterIp('None'),
kubeSchedulerPrometheusDiscoveryService:
service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('http-metrics', 10251, 10251)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
service.mixin.spec.withClusterIp('None'),
kubeDnsPrometheusDiscoveryService:
service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('metrics', 10055, 10055), servicePort.newNamed('http-metrics-dnsmasq', 10054, 10054)]) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) +
service.mixin.spec.withClusterIp('None'),
},
}

View File

@@ -0,0 +1,8 @@
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet');
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }

View File

@@ -0,0 +1,18 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
prometheus+: {
kubeControllerManagerPrometheusDiscoveryService:
service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
service.mixin.spec.withClusterIp('None'),
kubeSchedulerPrometheusDiscoveryService:
service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('http-metrics', 10251, 10251)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
service.mixin.spec.withClusterIp('None'),
},
}

View File

@@ -0,0 +1,18 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
prometheus+: {
kubeControllerManagerPrometheusDiscoveryService:
service.new('kube-controller-manager-prometheus-discovery', { component: 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
service.mixin.spec.withClusterIp('None'),
kubeSchedulerPrometheusDiscoveryService:
service.new('kube-scheduler-prometheus-discovery', { component: 'kube-scheduler' }, servicePort.newNamed('http-metrics', 10251, 10251)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
service.mixin.spec.withClusterIp('None'),
},
}

View File

@@ -0,0 +1,18 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
prometheus+: {
kubeControllerManagerPrometheusDiscoveryService:
service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
service.mixin.spec.withClusterIp('None'),
kubeSchedulerPrometheusDiscoveryService:
service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('http-metrics', 10251, 10251)) +
service.mixin.metadata.withNamespace('kube-system') +
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
service.mixin.spec.withClusterIp('None'),
},
}

View File

@@ -0,0 +1,28 @@
// On managed Kubernetes clusters some of the control plane components are not exposed to customers.
// Disable scrape jobs and service monitors for these components by overwriting 'kube-prometheus.libsonnet' defaults
// Note this doesn't disable generation of associated alerting rules but the rules don't trigger
{
_config+:: {
// This snippet walks the original object (super.jobs, set as temp var j) and creates a replacement jobs object
// excluding any members of the set specified (eg: controller and scheduler).
local j = super.jobs,
jobs: {
[k]: j[k]
for k in std.objectFields(j)
if !std.setMember(k, ['KubeControllerManager', 'KubeScheduler'])
},
},
// Same as above but for ServiceMonitor's
local p = super.prometheus,
prometheus: {
[q]: p[q]
for q in std.objectFields(p)
if !std.setMember(q, ['serviceMonitorKubeControllerManager', 'serviceMonitorKubeScheduler'])
},
// TODO: disable generationg of alerting rules
// manifests/prometheus-rules.yaml:52: - name: kube-scheduler.rules
}

View File

@@ -0,0 +1,21 @@
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local service = k.core.v1.service;
local servicePort = k.core.v1.service.mixin.spec.portsType;
{
prometheus+: {
service+:
service.mixin.spec.withPorts(servicePort.newNamed('web', 9090, 'web') + servicePort.withNodePort(30900)) +
service.mixin.spec.withType('NodePort'),
},
alertmanager+: {
service+:
service.mixin.spec.withPorts(servicePort.newNamed('web', 9093, 'web') + servicePort.withNodePort(30903)) +
service.mixin.spec.withType('NodePort'),
},
grafana+: {
service+:
service.mixin.spec.withPorts(servicePort.newNamed('http', 3000, 'http') + servicePort.withNodePort(30902)) +
service.mixin.spec.withType('NodePort'),
},
}

Some files were not shown because too many files have changed in this diff Show More