Merge branch 'prometheus-operator'
This commit is contained in:
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
tmp/
|
||||
minikube-manifests/
|
||||
vendor/
|
||||
./auth
|
65
Makefile
Normal file
65
Makefile
Normal file
@@ -0,0 +1,65 @@
|
||||
JSONNET_FMT := jsonnet fmt -n 2 --max-blank-lines 2 --string-style s --comment-style s
|
||||
|
||||
JB_BINARY:=$(GOPATH)/bin/jb
|
||||
EMBEDMD_BINARY:=$(GOPATH)/bin/embedmd
|
||||
|
||||
all: generate fmt test
|
||||
|
||||
../../hack/jsonnet-docker-image: ../../scripts/jsonnet/Dockerfile
|
||||
# Create empty target file, for the sole purpose of recording when this target
|
||||
# was last executed via the last-modification timestamp on the file. See
|
||||
# https://www.gnu.org/software/make/manual/make.html#Empty-Targets
|
||||
docker build -f - -t po-jsonnet . < ../../scripts/jsonnet/Dockerfile
|
||||
touch $@
|
||||
|
||||
generate-in-docker: ../../hack/jsonnet-docker-image
|
||||
@echo ">> Compiling assets and generating Kubernetes manifests"
|
||||
docker run \
|
||||
--rm \
|
||||
-u=$(shell id -u $(USER)):$(shell id -g $(USER)) \
|
||||
-v $(shell dirname $(dir $(abspath $(dir $$PWD)))):/go/src/github.com/coreos/prometheus-operator/ \
|
||||
-v $(shell go env GOCACHE):/.cache/go-build \
|
||||
--workdir /go/src/github.com/coreos/prometheus-operator/contrib/kube-prometheus \
|
||||
po-jsonnet make generate
|
||||
|
||||
generate: manifests **.md
|
||||
|
||||
**.md: $(EMBEDMD_BINARY) $(shell find examples) build.sh example.jsonnet
|
||||
$(EMBEDMD_BINARY) -w `find . -name "*.md" | grep -v vendor`
|
||||
|
||||
manifests: vendor example.jsonnet build.sh
|
||||
rm -rf manifests
|
||||
./build.sh ./examples/kustomize.jsonnet
|
||||
|
||||
vendor: $(JB_BINARY) jsonnetfile.json jsonnetfile.lock.json
|
||||
rm -rf vendor
|
||||
$(JB_BINARY) install
|
||||
|
||||
fmt:
|
||||
find . -name 'vendor' -prune -o -name '*.libsonnet' -o -name '*.jsonnet' -print | \
|
||||
xargs -n 1 -- $(JSONNET_FMT) -i
|
||||
|
||||
test: $(JB_BINARY)
|
||||
$(JB_BINARY) install
|
||||
./test.sh
|
||||
|
||||
test-e2e:
|
||||
go test -timeout 55m -v ./tests/e2e -count=1
|
||||
|
||||
test-in-docker: ../../hack/jsonnet-docker-image
|
||||
@echo ">> Compiling assets and generating Kubernetes manifests"
|
||||
docker run \
|
||||
--rm \
|
||||
-u=$(shell id -u $(USER)):$(shell id -g $(USER)) \
|
||||
-v $(shell dirname $(dir $(abspath $(dir $$PWD)))):/go/src/github.com/coreos/prometheus-operator/ \
|
||||
-v $(shell go env GOCACHE):/.cache/go-build \
|
||||
--workdir /go/src/github.com/coreos/prometheus-operator/contrib/kube-prometheus \
|
||||
po-jsonnet make test
|
||||
|
||||
$(JB_BINARY):
|
||||
go get -u github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb
|
||||
|
||||
$(EMBEDMD_BINARY):
|
||||
go get github.com/campoy/embedmd
|
||||
|
||||
.PHONY: generate generate-in-docker test test-in-docker fmt
|
634
README.md
634
README.md
@@ -1,9 +1,633 @@
|
||||
# kube-prometheus
|
||||
|
||||
This repository collects Kubernetes manifests, dashboards, and alerting rules
|
||||
combined with documentation and scripts to provide single-command deployments
|
||||
of end-to-end Kubernetes cluster monitoring.
|
||||
> Note that everything in the `contrib/kube-prometheus/` directory is experimental and may change significantly at any time.
|
||||
|
||||
# This repository has moved
|
||||
This repository collects Kubernetes manifests, [Grafana](http://grafana.com/) dashboards, and [Prometheus rules](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) combined with documentation and scripts to provide easy to operate end-to-end Kubernetes cluster monitoring with [Prometheus](https://prometheus.io/) using the Prometheus Operator.
|
||||
|
||||
This repository has been merged with the [Prometheus Operator](https://github.com/coreos/prometheus-operator). It can now be found under [`contrib/kube-prometheus`](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus).
|
||||
The content of this project is written in [jsonnet](http://jsonnet.org/). This project could both be described as a package as well as a library.
|
||||
|
||||
Components included in this package:
|
||||
|
||||
* The [Prometheus Operator](https://github.com/coreos/prometheus-operator)
|
||||
* Highly available [Prometheus](https://prometheus.io/)
|
||||
* Highly available [Alertmanager](https://github.com/prometheus/alertmanager)
|
||||
* [Prometheus node-exporter](https://github.com/prometheus/node_exporter)
|
||||
* [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics)
|
||||
* [Grafana](https://grafana.com/)
|
||||
|
||||
This stack is meant for cluster monitoring, so it is pre-configured to collect metrics from all Kubernetes components. In addition to that it delivers a default set of dashboards and alerting rules. Many of the useful dashboards and alerts come from the [kubernetes-mixin project](https://github.com/kubernetes-monitoring/kubernetes-mixin), similar to this project it provides composable jsonnet as a library for users to customize to their needs.
|
||||
|
||||
## Table of contents
|
||||
|
||||
* [Prerequisites](#prerequisites)
|
||||
* [minikube](#minikube)
|
||||
* [Quickstart](#quickstart)
|
||||
* [Customizing Kube-Prometheus](#customizing-kube-prometheus)
|
||||
* [Installing](#installing)
|
||||
* [Compiling](#compiling)
|
||||
* [Containerized Installing and Compiling](#containerized-installing-and-compiling)
|
||||
* [Configuration](#configuration)
|
||||
* [Customization Examples](#customization-examples)
|
||||
* [Cluster Creation Tools](#cluster-creation-tools)
|
||||
* [Internal Registries](#internal-registries)
|
||||
* [NodePorts](#nodeports)
|
||||
* [Prometheus Object Name](#prometheus-object-name)
|
||||
* [node-exporter DaemonSet namespace](#node-exporter-daemonset-namespace)
|
||||
* [Alertmanager configuration](#alertmanager-configuration)
|
||||
* [Static etcd configuration](#static-etcd-configuration)
|
||||
* [Pod Anti-Affinity](#pod-anti-affinity)
|
||||
* [Customizing Prometheus alerting/recording rules and Grafana dashboards](#customizing-prometheus-alertingrecording-rules-and-grafana-dashboards)
|
||||
* [Exposing Prometheus/Alermanager/Grafana via Ingress](#exposing-prometheusalermanagergrafana-via-ingress)
|
||||
* [Minikube Example](#minikube-example)
|
||||
* [Troubleshooting](#troubleshooting)
|
||||
* [Error retrieving kubelet metrics](#error-retrieving-kubelet-metrics)
|
||||
* [kube-state-metrics resource usage](#kube-state-metrics-resource-usage)
|
||||
* [Contributing](#contributing)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
You will need a Kubernetes cluster, that's it! By default it is assumed, that the kubelet uses token authentication and authorization, as otherwise Prometheus needs a client certificate, which gives it full access to the kubelet, rather than just the metrics. Token authentication and authorization allows more fine grained and easier access control.
|
||||
|
||||
This means the kubelet configuration must contain these flags:
|
||||
|
||||
* `--authentication-token-webhook=true` This flag enables, that a `ServiceAccount` token can be used to authenticate against the kubelet(s).
|
||||
* `--authorization-mode=Webhook` This flag enables, that the kubelet will perform an RBAC request with the API to determine, whether the requesting entity (Prometheus in this case) is allow to access a resource, in specific for this project the `/metrics` endpoint.
|
||||
|
||||
This stack provides [resource metrics](https://github.com/kubernetes/metrics#resource-metrics-api) by deploying the [Prometheus Adapter](https://github.com/DirectXMan12/k8s-prometheus-adapter/).
|
||||
This adapter is an Extension API Server and Kubernetes needs to be have this feature enabled, otherwise the adapter has no effect, but is still deployed.
|
||||
|
||||
### minikube
|
||||
|
||||
In order to just try out this stack, start minikube with the following command:
|
||||
|
||||
```
|
||||
$ minikube delete && minikube start --kubernetes-version=v1.13.2 --memory=4096 --bootstrapper=kubeadm --extra-config=kubelet.authentication-token-webhook=true --extra-config=kubelet.authorization-mode=Webhook --extra-config=scheduler.address=0.0.0.0 --extra-config=controller-manager.address=0.0.0.0
|
||||
```
|
||||
|
||||
> The kube-prometheus stack includes a resource metrics API server, like the metrics-server does. So ensure the metrics-server plugin is disabled on minikube:
|
||||
>
|
||||
> ```
|
||||
> minikube addons disable metrics-server
|
||||
> ```
|
||||
|
||||
## Quickstart
|
||||
|
||||
This project is intended to be used as a library (i.e. the intent is not for you to create your own modified copy of this repository).
|
||||
|
||||
Though for a quickstart a compiled version of the Kubernetes [manifests](manifests) generated with this library (specifically with `example.jsonnet`) is checked into this repository in order to try the content out quickly. To try out the stack un-customized run:
|
||||
* Simply create the stack:
|
||||
```
|
||||
$ kubectl create -f manifests/
|
||||
|
||||
# It can take a few seconds for the above 'create manifests' command to fully create the following resources, so verify the resources are ready before proceeding.
|
||||
$ until kubectl get customresourcedefinitions servicemonitors.monitoring.coreos.com ; do date; sleep 1; echo ""; done
|
||||
$ until kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done
|
||||
|
||||
$ kubectl apply -f manifests/ # This command sometimes may need to be done twice (to workaround a race condition).
|
||||
```
|
||||
|
||||
* And to teardown the stack:
|
||||
```
|
||||
$ kubectl delete -f manifests/
|
||||
```
|
||||
|
||||
### Access the dashboards
|
||||
|
||||
Prometheus, Grafana, and Alertmanager dashboards can be accessed quickly using `kubectl port-forward` after running the quickstart via the commands below. Kubernetes 1.10 or later is required.
|
||||
|
||||
> Note: There are instructions on how to route to these pods behind an ingress controller in the [Exposing Prometheus/Alermanager/Grafana via Ingress](#exposing-prometheusalermanagergrafana-via-ingress) section.
|
||||
|
||||
Prometheus
|
||||
|
||||
```shell
|
||||
kubectl --namespace monitoring port-forward svc/prometheus-k8s 9090
|
||||
```
|
||||
|
||||
Then access via [http://localhost:9090](http://localhost:9090)
|
||||
|
||||
Grafana
|
||||
|
||||
```shell
|
||||
kubectl --namespace monitoring port-forward svc/grafana 3000
|
||||
```
|
||||
|
||||
Then access via [http://localhost:3000](http://localhost:3000) and use the default grafana user:password of `admin:admin`.
|
||||
|
||||
Alert Manager
|
||||
|
||||
```shell
|
||||
kubectl --namespace monitoring port-forward svc/alertmanager-main 9093
|
||||
```
|
||||
|
||||
Then access via [http://localhost:9093](http://localhost:9093)
|
||||
|
||||
## Customizing Kube-Prometheus
|
||||
|
||||
This section:
|
||||
* describes how to customize the kube-prometheus library via compiling the kube-prometheus manifests yourself (as an alternative to the [Quickstart section](#Quickstart)).
|
||||
* still doesn't require you to make a copy of this entire repository, but rather only a copy of a few select files.
|
||||
|
||||
### Installing
|
||||
|
||||
The content of this project consists of a set of [jsonnet](http://jsonnet.org/) files making up a library to be consumed.
|
||||
|
||||
Install this library in your own project with [jsonnet-bundler](https://github.com/jsonnet-bundler/jsonnet-bundler#install) (the jsonnet package manager):
|
||||
```
|
||||
$ mkdir my-kube-prometheus; cd my-kube-prometheus
|
||||
$ jb init # Creates the initial/empty `jsonnetfile.json`
|
||||
# Install the kube-prometheus dependency
|
||||
$ jb install github.com/coreos/prometheus-operator/contrib/kube-prometheus/jsonnet/kube-prometheus # Creates `vendor/` & `jsonnetfile.lock.json`, and fills in `jsonnetfile.json`
|
||||
```
|
||||
|
||||
> `jb` can be installed with `go get github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb`
|
||||
|
||||
> An e.g. of how to install a given version of this library: `jb install github.com/coreos/prometheus-operator/contrib/kube-prometheus/jsonnet/kube-prometheus/@v0.22.0`
|
||||
|
||||
In order to update the kube-prometheus dependency, simply use the jsonnet-bundler update functionality:
|
||||
`$ jb update`
|
||||
|
||||
### Compiling
|
||||
|
||||
e.g. of how to compile the manifests: `./build.sh example.jsonnet`
|
||||
|
||||
> before compiling, install `gojsontoyaml` tool with `go get github.com/brancz/gojsontoyaml`
|
||||
|
||||
Here's [example.jsonnet](example.jsonnet):
|
||||
|
||||
[embedmd]:# (example.jsonnet)
|
||||
```jsonnet
|
||||
local kp =
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
},
|
||||
};
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
||||
|
||||
```
|
||||
|
||||
And here's the [build.sh](build.sh) script (which uses `vendor/` to render all manifests in a json structure of `{filename: manifest-content}`):
|
||||
|
||||
[embedmd]:# (build.sh)
|
||||
```sh
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# This script uses arg $1 (name of *.jsonnet file to use) to generate the manifests/*.yaml files.
|
||||
|
||||
set -e
|
||||
set -x
|
||||
# only exit with zero if all commands of the pipeline exit successfully
|
||||
set -o pipefail
|
||||
|
||||
# Make sure to start with a clean 'manifests' dir
|
||||
rm -rf manifests
|
||||
mkdir manifests
|
||||
|
||||
# optional, but we would like to generate yaml, not json
|
||||
jsonnet -J vendor -m manifests "${1-example.jsonnet}" | xargs -I{} sh -c 'cat {} | gojsontoyaml > {}.yaml; rm -f {}' -- {}
|
||||
|
||||
```
|
||||
|
||||
> Note you need `jsonnet` (`go get github.com/google/go-jsonnet/cmd/jsonnet`) and `gojsontoyaml` (`go get github.com/brancz/gojsontoyaml`) installed to run `build.sh`. If you just want json output, not yaml, then you can skip the pipe and everything afterwards.
|
||||
|
||||
This script runs the jsonnet code, then reads each key of the generated json and uses that as the file name, and writes the value of that key to that file, and converts each json manifest to yaml.
|
||||
|
||||
### Apply the kube-prometheus stack
|
||||
The previous steps (compilation) has created a bunch of manifest files in the manifest/ folder.
|
||||
Now simply use kubectl to install Prometheus and Grafana as per your configuration:
|
||||
|
||||
`kubectl apply -f manifests/`
|
||||
|
||||
Check the monitoring namespace (or the namespace you have specific in `namespace: `) and make sure the pods are running. Prometheus and Grafana should be up and running soon.
|
||||
|
||||
### Containerized Installing and Compiling
|
||||
|
||||
If you don't care to have `jb` nor `jsonnet` nor `gojsontoyaml` installed, then build the `po-jsonnet` Docker image (this is something you'll need a copy of this repository for). Do the following from this `kube-prometheus` directory:
|
||||
```
|
||||
$ make ../../hack/jsonnet-docker-image
|
||||
```
|
||||
|
||||
Then you can do commands such as the following:
|
||||
```
|
||||
docker run \
|
||||
--rm \
|
||||
-v `pwd`:`pwd` \
|
||||
--workdir `pwd` \
|
||||
po-jsonnet jb init
|
||||
|
||||
docker run \
|
||||
--rm \
|
||||
-v `pwd`:`pwd` \
|
||||
--workdir `pwd` \
|
||||
po-jsonnet jb install github.com/coreos/prometheus-operator/contrib/kube-prometheus/jsonnet/kube-prometheus
|
||||
|
||||
docker run \
|
||||
--rm \
|
||||
-v `pwd`:`pwd` \
|
||||
--workdir `pwd` \
|
||||
po-jsonnet ./build.sh example.jsonnet
|
||||
```
|
||||
|
||||
## Update from upstream project
|
||||
You may wish to fetch changes made on this project so they are available to you.
|
||||
|
||||
### Update jb
|
||||
jb may have been updated so it's a good idea to get the latest version of this binary
|
||||
|
||||
```
|
||||
go get -u github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb
|
||||
```
|
||||
|
||||
### Update kube-prometheus
|
||||
The command below will sync with upstream project.
|
||||
```
|
||||
jb update
|
||||
```
|
||||
|
||||
### Compile the manifests and apply
|
||||
Once updated, just follow the instructions under "Compiling" and "Apply the kube-prometheus stack" to apply the changes to your cluster.
|
||||
|
||||
|
||||
## Configuration
|
||||
|
||||
Jsonnet has the concept of hidden fields. These are fields, that are not going to be rendered in a result. This is used to configure the kube-prometheus components in jsonnet. In the example jsonnet code of the above [Usage section](#Usage), you can see an example of this, where the `namespace` is being configured to be `monitoring`. In order to not override the whole object, use the `+::` construct of jsonnet, to merge objects, this way you can override individual settings, but retain all other settings and defaults.
|
||||
|
||||
These are the available fields with their respective default values:
|
||||
```
|
||||
{
|
||||
_config+:: {
|
||||
namespace: "default",
|
||||
|
||||
versions+:: {
|
||||
alertmanager: "v0.16.1",
|
||||
nodeExporter: "v0.17.0",
|
||||
kubeStateMetrics: "v1.5.0",
|
||||
kubeRbacProxy: "v0.4.1",
|
||||
addonResizer: "1.8.4",
|
||||
prometheusOperator: "v0.29.0",
|
||||
prometheus: "v2.5.0",
|
||||
},
|
||||
|
||||
imageRepos+:: {
|
||||
prometheus: "quay.io/prometheus/prometheus",
|
||||
alertmanager: "quay.io/prometheus/alertmanager",
|
||||
kubeStateMetrics: "quay.io/coreos/kube-state-metrics",
|
||||
kubeRbacProxy: "quay.io/coreos/kube-rbac-proxy",
|
||||
addonResizer: "k8s.gcr.io/addon-resizer",
|
||||
nodeExporter: "quay.io/prometheus/node-exporter",
|
||||
prometheusOperator: "quay.io/coreos/prometheus-operator",
|
||||
},
|
||||
|
||||
prometheus+:: {
|
||||
names: 'k8s',
|
||||
replicas: 2,
|
||||
rules: {},
|
||||
},
|
||||
|
||||
alertmanager+:: {
|
||||
name: 'main',
|
||||
config: |||
|
||||
global:
|
||||
resolve_timeout: 5m
|
||||
route:
|
||||
group_by: ['job']
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 12h
|
||||
receiver: 'null'
|
||||
routes:
|
||||
- match:
|
||||
alertname: Watchdog
|
||||
receiver: 'null'
|
||||
receivers:
|
||||
- name: 'null'
|
||||
|||,
|
||||
replicas: 3,
|
||||
},
|
||||
|
||||
kubeStateMetrics+:: {
|
||||
collectors: '', // empty string gets a default set
|
||||
scrapeInterval: '30s',
|
||||
scrapeTimeout: '30s',
|
||||
|
||||
baseCPU: '100m',
|
||||
baseMemory: '150Mi',
|
||||
cpuPerNode: '2m',
|
||||
memoryPerNode: '30Mi',
|
||||
},
|
||||
|
||||
nodeExporter+:: {
|
||||
port: 9100,
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
The grafana definition is located in a different project (https://github.com/brancz/kubernetes-grafana), but needed configuration can be customized from the same top level `_config` field. For example to allow anonymous access to grafana, add the following `_config` section:
|
||||
```
|
||||
grafana+:: {
|
||||
config: { // http://docs.grafana.org/installation/configuration/
|
||||
sections: {
|
||||
"auth.anonymous": {enabled: true},
|
||||
},
|
||||
},
|
||||
},
|
||||
```
|
||||
|
||||
## Customization Examples
|
||||
|
||||
Jsonnet is a turing complete language, any logic can be reflected in it. It also has powerful merge functionalities, allowing sophisticated customizations of any kind simply by merging it into the object the library provides.
|
||||
|
||||
### Cluster Creation Tools
|
||||
|
||||
A common example is that not all Kubernetes clusters are created exactly the same way, meaning the configuration to monitor them may be slightly different. For [kubeadm](examples/jsonnet-snippets/kubeadm.jsonnet), [bootkube](examples/jsonnet-snippets/bootkube.jsonnet), [kops](examples/jsonnet-snippets/kops.jsonnet) and [kubespray](examples/jsonnet-snippets/kubespray.jsonnet) clusters there are mixins available to easily configure these:
|
||||
|
||||
kubeadm:
|
||||
|
||||
[embedmd]:# (examples/jsonnet-snippets/kubeadm.jsonnet)
|
||||
```jsonnet
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-kubeadm.libsonnet')
|
||||
```
|
||||
|
||||
bootkube:
|
||||
|
||||
[embedmd]:# (examples/jsonnet-snippets/bootkube.jsonnet)
|
||||
```jsonnet
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-bootkube.libsonnet')
|
||||
```
|
||||
|
||||
kops:
|
||||
|
||||
[embedmd]:# (examples/jsonnet-snippets/kops.jsonnet)
|
||||
```jsonnet
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-kops.libsonnet')
|
||||
```
|
||||
|
||||
kops with CoreDNS:
|
||||
|
||||
If your kops cluster is using CoreDNS, there is an additional mixin to import.
|
||||
|
||||
[embedmd]:# (examples/jsonnet-snippets/kops-coredns.jsonnet)
|
||||
```jsonnet
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-kops.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-kops-coredns.libsonnet')
|
||||
```
|
||||
|
||||
kubespray:
|
||||
|
||||
[embedmd]:# (examples/jsonnet-snippets/kubespray.jsonnet)
|
||||
```jsonnet
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-kubespray.libsonnet')
|
||||
```
|
||||
|
||||
kube-aws:
|
||||
|
||||
[embedmd]:# (examples/jsonnet-snippets/kube-aws.jsonnet)
|
||||
```jsonnet
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-kube-aws.libsonnet')
|
||||
```
|
||||
|
||||
### Internal Registry
|
||||
|
||||
Some Kubernetes installations source all their images from an internal registry. kube-prometheus supports this use case and helps the user synchronize every image it uses to the internal registry and generate manifests pointing at the internal registry.
|
||||
|
||||
To produce the `docker pull/tag/push` commands that will synchronize upstream images to `internal-registry.com/organization` (after having run the `jb` command to populate the vendor directory):
|
||||
|
||||
```shell
|
||||
$ jsonnet -J vendor -S --tla-str repository=internal-registry.com/organization sync-to-internal-registry.jsonnet
|
||||
docker pull k8s.gcr.io/addon-resizer:1.8.4
|
||||
docker tag k8s.gcr.io/addon-resizer:1.8.4 internal-registry.com/organization/addon-resizer:1.8.4
|
||||
docker push internal-registry.com/organization/addon-resizer:1.8.4
|
||||
docker pull quay.io/prometheus/alertmanager:v0.16.1
|
||||
docker tag quay.io/prometheus/alertmanager:v0.16.1 internal-registry.com/organization/alertmanager:v0.16.1
|
||||
docker push internal-registry.com/organization/alertmanager:v0.16.1
|
||||
...
|
||||
```
|
||||
|
||||
The output of this command can be piped to a shell to be executed by appending `| sh`.
|
||||
|
||||
Then to generate manifests with `internal-registry.com/organization`, use the `withImageRepository` mixin:
|
||||
|
||||
[embedmd]:# (examples/internal-registry.jsonnet)
|
||||
```jsonnet
|
||||
local mixin = import 'kube-prometheus/kube-prometheus-config-mixins.libsonnet';
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
},
|
||||
} + mixin.withImageRepository('internal-registry.com/organization');
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
||||
```
|
||||
|
||||
### NodePorts
|
||||
|
||||
Another mixin that may be useful for exploring the stack is to expose the UIs of Prometheus, Alertmanager and Grafana on NodePorts:
|
||||
|
||||
[embedmd]:# (examples/jsonnet-snippets/node-ports.jsonnet)
|
||||
```jsonnet
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-node-ports.libsonnet')
|
||||
```
|
||||
|
||||
### Prometheus Object Name
|
||||
|
||||
To give another customization example, the name of the `Prometheus` object provided by this library can be overridden:
|
||||
|
||||
[embedmd]:# (examples/prometheus-name-override.jsonnet)
|
||||
```jsonnet
|
||||
((import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
prometheus+: {
|
||||
prometheus+: {
|
||||
metadata+: {
|
||||
name: 'my-name',
|
||||
},
|
||||
},
|
||||
},
|
||||
}).prometheus.prometheus
|
||||
```
|
||||
|
||||
### node-exporter DaemonSet namespace
|
||||
|
||||
Standard Kubernetes manifests are all written using [ksonnet-lib](https://github.com/ksonnet/ksonnet-lib/), so they can be modified with the mixins supplied by ksonnet-lib. For example to override the namespace of the node-exporter DaemonSet:
|
||||
|
||||
[embedmd]:# (examples/ksonnet-example.jsonnet)
|
||||
```jsonnet
|
||||
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
local daemonset = k.apps.v1beta2.daemonSet;
|
||||
|
||||
((import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
nodeExporter+: {
|
||||
daemonset+:
|
||||
daemonset.mixin.metadata.withNamespace('my-custom-namespace'),
|
||||
},
|
||||
}).nodeExporter.daemonset
|
||||
```
|
||||
|
||||
### Alertmanager configuration
|
||||
|
||||
The Alertmanager configuration is located in the `_config.alertmanager.config` configuration field. In order to set a custom Alertmanager configuration simply set this field.
|
||||
|
||||
[embedmd]:# (examples/alertmanager-config.jsonnet)
|
||||
```jsonnet
|
||||
((import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
alertmanager+: {
|
||||
config: |||
|
||||
global:
|
||||
resolve_timeout: 10m
|
||||
route:
|
||||
group_by: ['job']
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 12h
|
||||
receiver: 'null'
|
||||
routes:
|
||||
- match:
|
||||
alertname: Watchdog
|
||||
receiver: 'null'
|
||||
receivers:
|
||||
- name: 'null'
|
||||
|||,
|
||||
},
|
||||
},
|
||||
}).alertmanager.secret
|
||||
```
|
||||
|
||||
In the above example the configuration has been inlined, but can just as well be an external file imported in jsonnet via the `importstr` function.
|
||||
|
||||
[embedmd]:# (examples/alertmanager-config-external.jsonnet)
|
||||
```jsonnet
|
||||
((import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
alertmanager+: {
|
||||
config: importstr 'alertmanager-config.yaml',
|
||||
},
|
||||
},
|
||||
}).alertmanager.secret
|
||||
```
|
||||
|
||||
### Adding additional namespaces to monitor
|
||||
|
||||
In order to monitor additional namespaces, the Prometheus server requires the appropriate `Role` and `RoleBinding` to be able to discover targets from that namespace. By default the Prometheus server is limited to the three namespaces it requires: default, kube-system and the namespace you configure the stack to run in via `$._config.namespace`. This is specified in `$._config.prometheus.namespaces`, to add new namespaces to monitor, simply append the additional namespaces:
|
||||
|
||||
[embedmd]:# (examples/additional-namespaces.jsonnet)
|
||||
```jsonnet
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
|
||||
prometheus+:: {
|
||||
namespaces+: ['my-namespace', 'my-second-namespace'],
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
||||
```
|
||||
|
||||
### Static etcd configuration
|
||||
|
||||
In order to configure a static etcd cluster to scrape there is a simple [kube-prometheus-static-etcd.libsonnet](jsonnet/kube-prometheus/kube-prometheus-static-etcd.libsonnet) mixin prepared - see [etcd.jsonnet](examples/etcd.jsonnet) for an example of how to use that mixin, and [Monitoring external etcd](docs/monitoring-external-etcd.md) for more information.
|
||||
|
||||
> Note that monitoring etcd in minikube is currently not possible because of how etcd is setup. (minikube's etcd binds to 127.0.0.1:2379 only, and within host networking namespace.)
|
||||
|
||||
### Pod Anti-Affinity
|
||||
|
||||
To prevent `Prometheus` and `Alertmanager` instances from being deployed onto the same node when
|
||||
possible, one can include the [kube-prometheus-anti-affinity.libsonnet](jsonnet/kube-prometheus/kube-prometheus-anti-affinity.libsonnet) mixin:
|
||||
|
||||
```jsonnet
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-anti-affinity.libsonnet')
|
||||
```
|
||||
|
||||
### Customizing Prometheus alerting/recording rules and Grafana dashboards
|
||||
|
||||
See [developing Prometheus rules and Grafana dashboards](docs/developing-prometheus-rules-and-grafana-dashboards.md) guide.
|
||||
|
||||
### Exposing Prometheus/Alermanager/Grafana via Ingress
|
||||
|
||||
See [exposing Prometheus/Alertmanager/Grafana](docs/exposing-prometheus-alertmanager-grafana-ingress.md) guide.
|
||||
|
||||
## Minikube Example
|
||||
|
||||
To use an easy to reproduce example, see [minikube.jsonnet](examples/minikube.jsonnet), which uses the minikube setup as demonstrated in [Prerequisites](#prerequisites). Because we would like easy access to our Prometheus, Alertmanager and Grafana UIs, `minikube.jsonnet` exposes the services as NodePort type services.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Error retrieving kubelet metrics
|
||||
|
||||
Should the Prometheus `/targets` page show kubelet targets, but not able to successfully scrape the metrics, then most likely it is a problem with the authentication and authorization setup of the kubelets.
|
||||
|
||||
As described in the [Prerequisites](#prerequisites) section, in order to retrieve metrics from the kubelet token authentication and authorization must be enabled. Some Kubernetes setup tools do not enable this by default.
|
||||
|
||||
If you are using Google's GKE product, see [cAdvisor support](docs/GKE-cadvisor-support.md).
|
||||
|
||||
#### Authentication problem
|
||||
|
||||
The Prometheus `/targets` page will show the kubelet job with the error `403 Unauthorized`, when token authentication is not enabled. Ensure, that the `--authentication-token-webhook=true` flag is enabled on all kubelet configurations.
|
||||
|
||||
#### Authorization problem
|
||||
|
||||
The Prometheus `/targets` page will show the kubelet job with the error `401 Unauthorized`, when token authorization is not enabled. Ensure that the `--authorization-mode=Webhook` flag is enabled on all kubelet configurations.
|
||||
|
||||
### kube-state-metrics resource usage
|
||||
|
||||
In some environments, kube-state-metrics may need additional
|
||||
resources. One driver for more resource needs, is a high number of
|
||||
namespaces. There may be others.
|
||||
|
||||
kube-state-metrics resource allocation is managed by
|
||||
[addon-resizer](https://github.com/kubernetes/autoscaler/tree/master/addon-resizer/nanny)
|
||||
You can control it's parameters by setting variables in the
|
||||
config. They default to:
|
||||
|
||||
``` jsonnet
|
||||
kubeStateMetrics+:: {
|
||||
baseCPU: '100m',
|
||||
cpuPerNode: '2m',
|
||||
baseMemory: '150Mi',
|
||||
memoryPerNode: '30Mi',
|
||||
}
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
All `.yaml` files in the `/manifests` folder are generated via
|
||||
[Jsonnet](https://jsonnet.org/). Contributing changes will most likely include
|
||||
the following process:
|
||||
|
||||
1. Make your changes in the respective `*.jsonnet` file.
|
||||
2. Commit your changes (This is currently necessary due to our vendoring
|
||||
process. This is likely to change in the future).
|
||||
3. Update the pinned kube-prometheus dependency in `jsonnetfile.lock.json`: `jb
|
||||
update`.
|
||||
3. Generate dependent `*.yaml` files: `make generate-in-docker`.
|
||||
4. Commit the generated changes.
|
||||
|
16
build.sh
Executable file
16
build.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# This script uses arg $1 (name of *.jsonnet file to use) to generate the manifests/*.yaml files.
|
||||
|
||||
set -e
|
||||
set -x
|
||||
# only exit with zero if all commands of the pipeline exit successfully
|
||||
set -o pipefail
|
||||
|
||||
# Make sure to start with a clean 'manifests' dir
|
||||
rm -rf manifests
|
||||
mkdir manifests
|
||||
|
||||
# optional, but we would like to generate yaml, not json
|
||||
jsonnet -J vendor -m manifests "${1-example.jsonnet}" | xargs -I{} sh -c 'cat {} | gojsontoyaml > {}.yaml; rm -f {}' -- {}
|
||||
|
36
docs/GKE-cadvisor-support.md
Normal file
36
docs/GKE-cadvisor-support.md
Normal file
@@ -0,0 +1,36 @@
|
||||
# Kubelet / cAdvisor special configuration updates for GKE
|
||||
|
||||
Prior to GKE 1.11, the kubelet does not support token
|
||||
authentication. Until it does, Prometheus must use HTTP (not HTTPS)
|
||||
for scraping.
|
||||
|
||||
You can configure this behavior through kube-prometheus with:
|
||||
```
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-insecure-kubelet.libsonnet') +
|
||||
{
|
||||
_config+:: {
|
||||
# ... config here
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
Or, you can patch and re-apply your existing manifests with:
|
||||
|
||||
On linux:
|
||||
|
||||
```
|
||||
sed -i -e 's/https/http/g' manifests/prometheus-serviceMonitorKubelet.yaml
|
||||
```
|
||||
|
||||
On MacOs:
|
||||
|
||||
```
|
||||
sed -i '' -e 's/https/http/g' manifests/prometheus-serviceMonitorKubelet.yaml
|
||||
```
|
||||
|
||||
After you have modified the yaml file please run
|
||||
|
||||
```
|
||||
kubectl apply -f manifests/prometheus-serviceMonitorKubelet.yaml
|
||||
```
|
293
docs/developing-prometheus-rules-and-grafana-dashboards.md
Normal file
293
docs/developing-prometheus-rules-and-grafana-dashboards.md
Normal file
@@ -0,0 +1,293 @@
|
||||
# Developing Prometheus Rules and Grafana Dashboards
|
||||
|
||||
`kube-prometheus` ships with a set of default [Prometheus rules](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) and [Grafana](http://grafana.com/) dashboards. At some point one might like to extend them, the purpose of this document is to explain how to do this.
|
||||
|
||||
All manifests of kube-prometheus are generated using [jsonnet](https://jsonnet.org/) and Prometheus rules and Grafana dashboards in specific follow the [Prometheus Monitoring Mixins proposal](https://docs.google.com/document/d/1A9xvzwqnFVSOZ5fD3blKODXfsat5fg6ZhnKu9LK3lB4/).
|
||||
|
||||
For both the Prometheus rules and the Grafana dashboards Kubernetes `ConfigMap`s are generated within kube-prometheus. In order to add additional rules and dashboards simply merge them onto the existing json objects. This document illustrates examples for rules as well as dashboards.
|
||||
|
||||
As a basis, all examples in this guide are based on the base example of the kube-prometheus [readme](../README.md):
|
||||
|
||||
[embedmd]:# (../example.jsonnet)
|
||||
```jsonnet
|
||||
local kp =
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
},
|
||||
};
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
||||
|
||||
```
|
||||
|
||||
## Prometheus rules
|
||||
|
||||
### Alerting rules
|
||||
|
||||
According to the [Prometheus Monitoring Mixins proposal](https://docs.google.com/document/d/1A9xvzwqnFVSOZ5fD3blKODXfsat5fg6ZhnKu9LK3lB4/) Prometheus alerting rules are under the key `prometheusAlerts` in the top level object, so in order to add an additional alerting rule, we can simply merge an extra rule into the existing object.
|
||||
|
||||
The format is exactly the Prometheus format, so there should be no changes necessary should you have existing rules that you want to include.
|
||||
|
||||
> Note that alerts can just as well be included into this file, using the jsonnet `import` function. In this example it is just inlined in order to demonstrate their use in a single file.
|
||||
|
||||
[embedmd]:# (../examples/prometheus-additional-alert-rule-example.jsonnet)
|
||||
```jsonnet
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
},
|
||||
prometheusAlerts+:: {
|
||||
groups+: [
|
||||
{
|
||||
name: 'example-group',
|
||||
rules: [
|
||||
{
|
||||
alert: 'Watchdog',
|
||||
expr: 'vector(1)',
|
||||
labels: {
|
||||
severity: 'none',
|
||||
},
|
||||
annotations: {
|
||||
description: 'This is a Watchdog meant to ensure that the entire alerting pipeline is functional.',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
||||
```
|
||||
|
||||
### Recording rules
|
||||
|
||||
In order to add a recording rule, simply do the same with the `prometheusRules` field.
|
||||
|
||||
> Note that rules can just as well be included into this file, using the jsonnet `import` function. In this example it is just inlined in order to demonstrate their use in a single file.
|
||||
|
||||
[embedmd]:# (../examples/prometheus-additional-recording-rule-example.jsonnet)
|
||||
```jsonnet
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
},
|
||||
prometheusRules+:: {
|
||||
groups+: [
|
||||
{
|
||||
name: 'example-group',
|
||||
rules: [
|
||||
{
|
||||
record: 'some_recording_rule_name',
|
||||
expr: 'vector(1)',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
||||
```
|
||||
|
||||
### Pre-rendered rules
|
||||
|
||||
We acknowledge, that users may need to transition existing rules, and therefore allow an option to add additional pre-rendered rules. Luckily the yaml and json formats are very close so the yaml rules just need to be converted to json without any manual interaction needed. Just a tool to convert yaml to json is needed:
|
||||
|
||||
```
|
||||
go get -u -v github.com/brancz/gojsontoyaml
|
||||
```
|
||||
|
||||
And convert the existing rule file:
|
||||
|
||||
```
|
||||
cat existingrule.yaml | gojsontoyaml -yamltojson > existingrule.json
|
||||
```
|
||||
|
||||
Then import it in jsonnet:
|
||||
|
||||
[embedmd]:# (../examples/prometheus-additional-rendered-rule-example.jsonnet)
|
||||
```jsonnet
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
prometheusAlerts+:: (import 'existingrule.json'),
|
||||
};
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
||||
```
|
||||
### Changing default rules
|
||||
|
||||
Along with adding additional rules, we give the user the option to filter or adjust the existing rules imported by `kube-prometheus/kube-prometheus.libsonnet`. The recording rules can be found in [kube-prometheus/rules](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus/jsonnet/kube-prometheus/rules) and [kubernetes-mixin/rules](https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/rules) while the alerting rules can be found in [kube-prometheus/alerts](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus/jsonnet/kube-prometheus/alerts) and [kubernetes-mixin/alerts](https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/alerts).
|
||||
|
||||
Knowing which rules to change, the user can now use functions from the [Jsonnet standard library](https://jsonnet.org/ref/stdlib.html) to make these changes. Below are examples of both a filter and an adjustment being made to the default rules. These changes can be assigned to a local variable and then added to the `local kp` object as seen in the examples above.
|
||||
|
||||
#### Filter
|
||||
Here the alert `KubeStatefulSetReplicasMismatch` is being filtered out of the group `kubernetes-apps`. The default rule can be seen [here](https://github.com/kubernetes-monitoring/kubernetes-mixin/blob/master/alerts/apps_alerts.libsonnet).
|
||||
```jsonnet
|
||||
local filter = {
|
||||
prometheusAlerts+:: {
|
||||
groups: std.map(
|
||||
function(group)
|
||||
if group.name == 'kubernetes-apps' then
|
||||
group {
|
||||
rules: std.filter(function(rule)
|
||||
rule.alert != "KubeStatefulSetReplicasMismatch",
|
||||
group.rules
|
||||
)
|
||||
}
|
||||
else
|
||||
group,
|
||||
super.groups
|
||||
),
|
||||
},
|
||||
};
|
||||
```
|
||||
#### Adjustment
|
||||
Here the expression for the alert used above is updated from its previous value. The default rule can be seen [here](https://github.com/kubernetes-monitoring/kubernetes-mixin/blob/master/alerts/apps_alerts.libsonnet).
|
||||
```jsonnet
|
||||
local update = {
|
||||
prometheusAlerts+:: {
|
||||
groups: std.map(
|
||||
function(group)
|
||||
if group.name == 'kubernetes-apps' then
|
||||
group {
|
||||
rules: std.map(
|
||||
function(rule)
|
||||
if rule.alert == "KubeStatefulSetReplicasMismatch" then
|
||||
rule {
|
||||
expr: "kube_statefulset_status_replicas_ready{job=\"kube-state-metrics\",statefulset!=\"vault\"} != kube_statefulset_status_replicas{job=\"kube-state-metrics\",statefulset!=\"vault\"}"
|
||||
}
|
||||
else
|
||||
rule,
|
||||
group.rules
|
||||
)
|
||||
}
|
||||
else
|
||||
group,
|
||||
super.groups
|
||||
),
|
||||
},
|
||||
};
|
||||
```
|
||||
Using the example from above about adding in pre-rendered rules, the new local vaiables can be added in as follows:
|
||||
```jsonnet
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + filter + update + {
|
||||
prometheusAlerts+:: (import 'existingrule.json'),
|
||||
};
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
||||
```
|
||||
## Dashboards
|
||||
|
||||
Dashboards can either be added using jsonnet or simply a pre-rendered json dashboard.
|
||||
|
||||
### Jsonnet dashboard
|
||||
|
||||
We recommend using the [grafonnet](https://github.com/grafana/grafonnet-lib/) library for jsonnet, which gives you a simple DSL to generate Grafana dashboards. Following the [Prometheus Monitoring Mixins proposal](https://docs.google.com/document/d/1A9xvzwqnFVSOZ5fD3blKODXfsat5fg6ZhnKu9LK3lB4/) additional dashboards are added to the `grafanaDashboards` key, located in the top level object. To add new jsonnet dashboards, simply add one.
|
||||
|
||||
> Note that dashboards can just as well be included into this file, using the jsonnet `import` function. In this example it is just inlined in order to demonstrate their use in a single file.
|
||||
|
||||
[embedmd]:# (../examples/grafana-additional-jsonnet-dashboard-example.jsonnet)
|
||||
```jsonnet
|
||||
local grafana = import 'grafonnet/grafana.libsonnet';
|
||||
local dashboard = grafana.dashboard;
|
||||
local row = grafana.row;
|
||||
local prometheus = grafana.prometheus;
|
||||
local template = grafana.template;
|
||||
local graphPanel = grafana.graphPanel;
|
||||
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
},
|
||||
grafanaDashboards+:: {
|
||||
'my-dashboard.json':
|
||||
dashboard.new('My Dashboard')
|
||||
.addTemplate(
|
||||
{
|
||||
current: {
|
||||
text: 'Prometheus',
|
||||
value: 'Prometheus',
|
||||
},
|
||||
hide: 0,
|
||||
label: null,
|
||||
name: 'datasource',
|
||||
options: [],
|
||||
query: 'prometheus',
|
||||
refresh: 1,
|
||||
regex: '',
|
||||
type: 'datasource',
|
||||
},
|
||||
)
|
||||
.addRow(
|
||||
row.new()
|
||||
.addPanel(graphPanel.new('My Panel', span=6, datasource='$datasource')
|
||||
.addTarget(prometheus.target('vector(1)')))
|
||||
),
|
||||
},
|
||||
};
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
||||
```
|
||||
|
||||
### Pre-rendered Grafana dashboards
|
||||
|
||||
As jsonnet is a superset of json, the jsonnet `import` function can be used to include Grafana dashboard json blobs. In this example we are importing a [provided example dashboard](../examples/example-grafana-dashboard.json).
|
||||
|
||||
[embedmd]:# (../examples/grafana-additional-rendered-dashboard-example.jsonnet)
|
||||
```jsonnet
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
},
|
||||
grafanaDashboards+:: {
|
||||
'my-dashboard.json': (import 'example-grafana-dashboard.json'),
|
||||
},
|
||||
};
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
||||
```
|
101
docs/exposing-prometheus-alertmanager-grafana-ingress.md
Normal file
101
docs/exposing-prometheus-alertmanager-grafana-ingress.md
Normal file
@@ -0,0 +1,101 @@
|
||||
# Exposing Prometheus, Alertmanager and Grafana UIs via Ingress
|
||||
|
||||
In order to access the web interfaces via the Internet [Kubernetes Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) is a popular option. This guide explains, how Kubernetes Ingress can be setup, in order to expose the Prometheus, Alertmanager and Grafana UIs, that are included in the [kube-prometheus](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus) project.
|
||||
|
||||
Note: before continuing, it is recommended to first get familiar with the [kube-prometheus](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus) stack by itself.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Apart from a running Kubernetes cluster with a running [kube-prometheus](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus) stack, a Kubernetes Ingress controller must be installed and functional. This guide was tested with the [nginx-ingress-controller](https://github.com/kubernetes/ingress-nginx). If you wish to reproduce the exact result in as depicted in this guide we recommend using the nginx-ingress-controller.
|
||||
|
||||
## Setting up Ingress
|
||||
|
||||
The setup of Ingress objects is the same for Prometheus, Alertmanager and Grafana. Therefore this guides demonstrates it in detail for Prometheus as it can easily be adapted for the other applications.
|
||||
|
||||
As monitoring data may contain sensitive data, this guide describes how to setup Ingress with basic auth as an example of minimal security. Of course this should be adapted to the preferred authentication mean of any particular organization, but we feel it is important to at least provide an example with a minimum of security.
|
||||
|
||||
In order to setup basic auth, a secret with the `htpasswd` formatted file needs to be created. To do this, first install the [`htpasswd`](https://httpd.apache.org/docs/2.4/programs/htpasswd.html) tool.
|
||||
|
||||
To create the `htpasswd` formatted file called `auth` run:
|
||||
|
||||
```
|
||||
htpasswd -c auth <username>
|
||||
```
|
||||
|
||||
In order to use this a secret needs to be created containing the name of the `htpasswd`, and with annotations on the Ingress object basic auth can be configured.
|
||||
|
||||
Also, the applications provide external links to themselves in alerts and various places. When an ingress is used in front of the applications these links need to be based on the external URL's. This can be configured for each application in jsonnet.
|
||||
|
||||
```jsonnet
|
||||
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
local secret = k.core.v1.secret;
|
||||
local ingress = k.extensions.v1beta1.ingress;
|
||||
local ingressTls = ingress.mixin.spec.tlsType;
|
||||
local ingressRule = ingress.mixin.spec.rulesType;
|
||||
local httpIngressPath = ingressRule.mixin.http.pathsType;
|
||||
|
||||
local kp =
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
{
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
},
|
||||
prometheus+:: {
|
||||
prometheus+: {
|
||||
spec+: {
|
||||
externalUrl: 'http://prometheus.example.com',
|
||||
},
|
||||
},
|
||||
},
|
||||
ingress+:: {
|
||||
'prometheus-k8s':
|
||||
ingress.new() +
|
||||
ingress.mixin.metadata.withName($.prometheus.prometheus.metadata.name) +
|
||||
ingress.mixin.metadata.withNamespace($.prometheus.prometheus.metadata.namespace) +
|
||||
ingress.mixin.metadata.withAnnotations({
|
||||
'nginx.ingress.kubernetes.io/auth-type': 'basic',
|
||||
'nginx.ingress.kubernetes.io/auth-secret': 'basic-auth',
|
||||
'nginx.ingress.kubernetes.io/auth-realm': 'Authentication Required',
|
||||
}) +
|
||||
ingress.mixin.spec.withRules(
|
||||
ingressRule.new() +
|
||||
ingressRule.withHost('prometheus.example.com') +
|
||||
ingressRule.mixin.http.withPaths(
|
||||
httpIngressPath.new() +
|
||||
httpIngressPath.mixin.backend.withServiceName($.prometheus.service.metadata.name) +
|
||||
httpIngressPath.mixin.backend.withServicePort('web')
|
||||
),
|
||||
),
|
||||
},
|
||||
} + {
|
||||
ingress+:: {
|
||||
'basic-auth-secret':
|
||||
secret.new('basic-auth', { auth: std.base64(importstr 'auth') }) +
|
||||
secret.mixin.metadata.withNamespace($._config.namespace),
|
||||
},
|
||||
};
|
||||
|
||||
k.core.v1.list.new([
|
||||
kp.ingress['prometheus-k8s'],
|
||||
kp.ingress['basic-auth-secret'],
|
||||
])
|
||||
```
|
||||
|
||||
In order to expose Alertmanager and Grafana, simply create additional fields containing an ingress object, but simply pointing at the `alertmanager` or `grafana` instead of the `prometheus-k8s` Service. Make sure to also use the correct port respectively, for Alertmanager it is also `web`, for Grafana it is `http`. Be sure to also specify the appropriate external URL.
|
||||
|
||||
In order to render the ingress objects similar to the other objects use as demonstrated in the [main readme](../README.md#usage):
|
||||
|
||||
```
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } +
|
||||
{ ['ingress-' + name]: kp.ingress[name] for name in std.objectFields(kp.ingress) }
|
||||
```
|
||||
|
||||
Note, that in comparison only the last line was added, the rest is identical to the original.
|
||||
|
||||
See [ingress.jsonnet](../examples/ingress.jsonnet) for an example implementation.
|
159
docs/kube-prometheus-on-kubeadm.md
Normal file
159
docs/kube-prometheus-on-kubeadm.md
Normal file
@@ -0,0 +1,159 @@
|
||||
<br>
|
||||
<div class="alert alert-info" role="alert">
|
||||
<i class="fa fa-exclamation-triangle"></i><b> Note:</b> Starting with v0.12.0, Prometheus Operator requires use of Kubernetes v1.7.x and up.
|
||||
</div>
|
||||
|
||||
# Kube Prometheus on Kubeadm
|
||||
|
||||
The [kubeadm](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/) tool is linked by Kubernetes as the offical way to deploy and manage self-hosted clusters. Kubeadm does a lot of heavy lifting by automatically configuring your Kubernetes cluster with some common options. This guide is intended to show you how to deploy Prometheus, Prometheus Operator and Kube Prometheus to get you started monitoring your cluster that was deployed with Kubeadm.
|
||||
|
||||
This guide assumes you have a basic understanding of how to use the functionality the Prometheus Operator implements. If you haven't yet, we recommend reading through the [getting started guide](../../../Documentation/user-guides/getting-started.md) as well as the [alerting guide](../../../Documentation/user-guides/alerting.md).
|
||||
|
||||
## Kubeadm Pre-requisites
|
||||
|
||||
This guide assumes you have some familiarity with `kubeadm` or at least have deployed a cluster using `kubeadm`. By default, `kubeadm` does not expose two of the services that we will be monitoring. Therefore, in order to get the most out of the `kube-prometheus` package, we need to make some quick tweaks to the Kubernetes cluster. Since we will be monitoring the `kube-controller-manager` and `kube-scheduler`, we must expose them to the cluster.
|
||||
|
||||
By default, `kubeadm` runs these pods on your master and bound to `127.0.0.1`. There are a couple of ways to change this. The recommended way to change these features is to use the [kubeadm config file](https://kubernetes.io/docs/reference/generated/kubeadm/#config-file). An example configuration file can be used:
|
||||
|
||||
```yaml
|
||||
apiVersion: kubeadm.k8s.io/v1alpha1
|
||||
kind: MasterConfiguration
|
||||
api:
|
||||
advertiseAddress: 192.168.1.173
|
||||
bindPort: 6443
|
||||
authorizationModes:
|
||||
- Node
|
||||
- RBAC
|
||||
certificatesDir: /etc/kubernetes/pki
|
||||
cloudProvider:
|
||||
etcd:
|
||||
dataDir: /var/lib/etcd
|
||||
endpoints: null
|
||||
imageRepository: gcr.io/google_containers
|
||||
kubernetesVersion: v1.8.3
|
||||
networking:
|
||||
dnsDomain: cluster.local
|
||||
serviceSubnet: 10.96.0.0/12
|
||||
nodeName: your-dev
|
||||
tokenTTL: 24h0m0s
|
||||
controllerManagerExtraArgs:
|
||||
address: 0.0.0.0
|
||||
schedulerExtraArgs:
|
||||
address: 0.0.0.0
|
||||
```
|
||||
|
||||
Notice the `schedulerExtraArgs` and `controllerManagerExtraArgs`. This exposes the `kube-controller-manager` and `kube-scheduler` services to the rest of the cluster. If you have kubernetes core components as pods in the kube-system namespace, ensure that the `kube-prometheus-exporter-kube-scheduler` and `kube-prometheus-exporter-kube-controller-manager` services' `spec.selector` values match those of pods.
|
||||
|
||||
In addition, we will be using `node-exporter` to monitor the `cAdvisor` service on all the nodes. This, however requires a change to the `kubelet` service on the master as well as all the nodes. According to the Kubernetes documentation
|
||||
|
||||
> The kubeadm deb package ships with configuration for how the kubelet should be run. Note that the `kubeadm` CLI command will never touch this drop-in file. This drop-in file belongs to the kubeadm deb/rpm package.
|
||||
|
||||
Again, we need to expose the `cadvisor` that is installed and managed by the `kubelet` daemon and allow webhook token authentication. To do so, we do the following on all the masters and nodes:
|
||||
|
||||
```bash
|
||||
KUBEADM_SYSTEMD_CONF=/etc/systemd/system/kubelet.service.d/10-kubeadm.conf
|
||||
sed -e "/cadvisor-port=0/d" -i "$KUBEADM_SYSTEMD_CONF"
|
||||
if ! grep -q "authentication-token-webhook=true" "$KUBEADM_SYSTEMD_CONF"; then
|
||||
sed -e "s/--authorization-mode=Webhook/--authentication-token-webhook=true --authorization-mode=Webhook/" -i "$KUBEADM_SYSTEMD_CONF"
|
||||
fi
|
||||
systemctl daemon-reload
|
||||
systemctl restart kubelet
|
||||
```
|
||||
|
||||
In case you already have a Kubernetes deployed with kubeadm, change the address kube-controller-manager and kube-scheduler listens in addition to previous kubelet change:
|
||||
|
||||
```
|
||||
sed -e "s/- --address=127.0.0.1/- --address=0.0.0.0/" -i /etc/kubernetes/manifests/kube-controller-manager.yaml
|
||||
sed -e "s/- --address=127.0.0.1/- --address=0.0.0.0/" -i /etc/kubernetes/manifests/kube-scheduler.yaml
|
||||
```
|
||||
|
||||
With these changes, your Kubernetes cluster is ready.
|
||||
|
||||
## Metric Sources
|
||||
|
||||
Monitoring a Kubernetes cluster with Prometheus is a natural choice as Kubernetes components themselves are instrumented with Prometheus metrics, therefore those components simply have to be discovered by Prometheus and most of the cluster is monitored.
|
||||
|
||||
Metrics that are rather about cluster state than a single component's metrics is exposed by the add-on component [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics).
|
||||
|
||||
Additionally, to have an overview of cluster nodes' resources the Prometheus [node_exporter](https://github.com/prometheus/node_exporter) is used. The node_exporter allows monitoring a node's resources: CPU, memory and disk utilization and more.
|
||||
|
||||
Once you complete this guide you will monitor the following:
|
||||
|
||||
* cluster state via kube-state-metrics
|
||||
* nodes via the node_exporter
|
||||
* kubelets
|
||||
* apiserver
|
||||
* kube-scheduler
|
||||
* kube-controller-manager
|
||||
|
||||
|
||||
## Getting Up and Running Fast with Kube-Prometheus
|
||||
|
||||
To help get started more quickly with monitoring Kubernetes clusters, [kube-prometheus](https://github.com/coreos/prometheus-operator/tree/master/contrib/kube-prometheus) was created. It is a collection of manifests including dashboards and alerting rules that can easily be deployed. It utilizes the Prometheus Operator and all the manifests demonstrated in [this guide](../../../Documentation/user-guides/cluster-monitoring.md).
|
||||
|
||||
This section represent a quick installation and is not intended to teach you about all the components. The easiest way to get started is to clone this repository and use the `kube-prometheus` section of the code.
|
||||
|
||||
```
|
||||
git clone https://github.com/coreos/prometheus-operator
|
||||
cd prometheus-operator/contrib/kube-prometheus/
|
||||
```
|
||||
|
||||
First, create the namespace in which you want the monitoring tool suite to be running.
|
||||
|
||||
```
|
||||
export NAMESPACE='monitoring'
|
||||
kubectl create namespace "$NAMESPACE"
|
||||
```
|
||||
|
||||
Now we will create the components for the Prometheus operator
|
||||
|
||||
```
|
||||
kubectl --namespace="$NAMESPACE" apply -f manifests/prometheus-operator
|
||||
```
|
||||
|
||||
This will create all the Prometheus Operator components. You might need to wait a short amount of time before the Custom Resource Definitions are available in the cluster. You can wait for them:
|
||||
|
||||
```
|
||||
until kubectl --namespace="$NAMESPACE" get alertmanagers.monitoring.coreos.com > /dev/null 2>&1; do sleep 1; printf "."; done
|
||||
```
|
||||
|
||||
Next, we will install the node exporter and then kube-state-metrics:
|
||||
|
||||
```
|
||||
kubectl --namespace="$NAMESPACE" apply -f manifests/node-exporter
|
||||
kubectl --namespace="$NAMESPACE" apply -f manifests/kube-state-metrics
|
||||
```
|
||||
|
||||
Then, we can deploy the grafana credentials. By default, the username/password will be `admin/admin`, you should change these for your production clusters.
|
||||
|
||||
```
|
||||
kubectl --namespace="$NAMESPACE" apply -f manifests/grafana/grafana-credentials.yaml
|
||||
```
|
||||
|
||||
Then install grafana itself:
|
||||
|
||||
```
|
||||
kubectl --namespace="$NAMESPACE" apply -f manifests/grafana
|
||||
```
|
||||
|
||||
Next up is the `Prometheus` object itself. We will deploy the application, and then the roles/role-bindings.
|
||||
|
||||
```
|
||||
find manifests/prometheus -type f ! -name prometheus-k8s-roles.yaml ! -name prometheus-k8s-role-bindings.yaml -exec kubectl --namespace "$NAMESPACE" apply -f {} \;
|
||||
kubectl apply -f manifests/prometheus/prometheus-k8s-roles.yaml
|
||||
kubectl apply -f manifests/prometheus/prometheus-k8s-role-bindings.yaml
|
||||
```
|
||||
|
||||
Finally, install the [Alertmanager](../../../Documentation/user-guides/alerting.md)
|
||||
|
||||
```
|
||||
kubectl --namespace="$NAMESPACE" apply -f manifests/alertmanager
|
||||
```
|
||||
|
||||
Now you should have a working cluster. After all the pods are ready, you should be able to reach:
|
||||
|
||||
* Prometheus UI on node port `30900`
|
||||
* Alertmanager UI on node port `30903`
|
||||
* Grafana on node port `30902`
|
||||
|
||||
These can of course be changed via the Service definitions. It is recommended to look at the [Exposing Prometheus and Alert Manager](../../../Documentation/user-guides/exposing-prometheus-and-alertmanager.md) documentation for more detailed information on how to expose these services.
|
64
docs/monitoring-external-etcd.md
Normal file
64
docs/monitoring-external-etcd.md
Normal file
@@ -0,0 +1,64 @@
|
||||
# Monitoring external etcd
|
||||
This guide will help you monitor an external etcd cluster. When the etcd cluster is not hosted inside Kubernetes.
|
||||
This is often the case with Kubernetes setups. This approach has been tested with kube-aws but the same principals apply to other tools.
|
||||
|
||||
Note that [etcd.jsonnet](../examples/etcd.jsonnet) & [kube-prometheus-static-etcd.libsonnet](../jsonnet/kube-prometheus/kube-prometheus-static-etcd.libsonnet) (which are described by a section of the [Readme](../README.md#static-etcd-configuration)) do the following:
|
||||
* Put the three etcd TLS client files (CA & cert & key) into a secret in the namespace, and have Prometheus Operator load the secret.
|
||||
* Create the following (to expose etcd metrics - port 2379): a Service, Endpoint, & ServiceMonitor.
|
||||
|
||||
# Step 1: Open the port
|
||||
|
||||
You now need to allow the nodes Prometheus are running on to talk to the etcd on the port 2379 (if 2379 is the port used by etcd to expose the metrics)
|
||||
|
||||
If using kube-aws, you will need to edit the etcd security group inbound, specifying the security group of your Kubernetes node (worker) as the source.
|
||||
|
||||
## kube-aws and EIP or ENI inconsistency
|
||||
With kube-aws, each etcd node has two IP addresses:
|
||||
|
||||
* EC2 instance IP
|
||||
* EIP or ENI (depending on the chosen method in yuour cluster.yaml)
|
||||
|
||||
For some reason, some etcd node answer to :2379/metrics on the intance IP (eth0), some others on the EIP|ENI address (eth1). See issue https://github.com/kubernetes-incubator/kube-aws/issues/923
|
||||
It would be of course much better if we could hit the EPI/ENI all the time as they don't change even if the underlying EC2 intance goes down.
|
||||
If specifying the Instance IP (eth0) in the Prometheus Operator ServiceMonitor, and the EC2 intance goes down, one would have to update the ServiceMonitor.
|
||||
|
||||
Another idea woud be to use the DNS entries of etcd, but those are not currently supported for EndPoints objects in Kubernetes.
|
||||
|
||||
# Step 2: verify
|
||||
|
||||
Go to the Prometheus UI on :9090/config and check that you have an etcd job entry:
|
||||
```
|
||||
- job_name: monitoring/etcd-k8s/0
|
||||
scrape_interval: 30s
|
||||
scrape_timeout: 10s
|
||||
...
|
||||
```
|
||||
|
||||
On the :9090/targets page:
|
||||
* You should see "etcd" with the UP state. If not, check the Error column for more information.
|
||||
* If no "etcd" targets are even shown on this page, prometheus isn't attempting to scrape it.
|
||||
|
||||
# Step 3: Grafana dashboard
|
||||
|
||||
## Find a dashboard you like
|
||||
|
||||
Try to load this dashboard:
|
||||
https://grafana.com/dashboards/3070
|
||||
|
||||
## Save the dashboard in the configmap
|
||||
|
||||
As documented here, [Developing Alerts and Dashboards](developing-prometheus-rules-and-grafana-dashboards.md), the Grafana instances are stateless. The dashboards are automatically re-loaded from the ConfigMap.
|
||||
So if you load a dashboard through the Grafana UI, it won't be kept unless saved in ConfigMap
|
||||
|
||||
Read [the document](developing-prometheus-rules-and-grafana-dashboards.md), but in summary:
|
||||
|
||||
### Copy your dashboard:
|
||||
Once you are happy with the dashboard, export it and move it to `prometheus-operator/contrib/kube-prometheus/assets/grafana/` (ending in "-dashboard.json")
|
||||
|
||||
### Regenetate the grafana dashboard manifest:
|
||||
`hack/scripts/generate-dashboards-configmap.sh > manifests/grafana/grafana-dashboards.yaml`
|
||||
|
||||
### Reload the manifest in Kubernetes:
|
||||
` kubectl -n monitoring replace -f manifests/grafana/grafana-dashboards.yaml`
|
||||
|
||||
After a few minutes your dasboard will be available permanently to all Grafana instances
|
28
docs/monitoring-other-namespaces.md
Normal file
28
docs/monitoring-other-namespaces.md
Normal file
@@ -0,0 +1,28 @@
|
||||
# Monitoring other Kubernetes Namespaces
|
||||
This guide will help you monitor applications in other Namespaces. By default the RBAC rules are only enabled for the `Default` and `kube-system` Namespace during Install.
|
||||
|
||||
# Setup
|
||||
You have to give the list of the Namespaces that you want to be able to monitor.
|
||||
This is done in the variable `prometheus.roleSpecificNamespaces`. You usually set this in your `.jsonnet` file when building the manifests.
|
||||
|
||||
Example to create the needed `Role` and `Rolebindig` for the Namespace `foo` :
|
||||
```
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
|
||||
prometheus+:: {
|
||||
namespaces: ["default", "kube-system","foo"],
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
||||
|
||||
```
|
16
example.jsonnet
Normal file
16
example.jsonnet
Normal file
@@ -0,0 +1,16 @@
|
||||
local kp =
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
},
|
||||
};
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
||||
|
17
examples/additional-namespaces.jsonnet
Normal file
17
examples/additional-namespaces.jsonnet
Normal file
@@ -0,0 +1,17 @@
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
|
||||
prometheus+:: {
|
||||
namespaces+: ['my-namespace', 'my-second-namespace'],
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
7
examples/alertmanager-config-external.jsonnet
Normal file
7
examples/alertmanager-config-external.jsonnet
Normal file
@@ -0,0 +1,7 @@
|
||||
((import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
alertmanager+: {
|
||||
config: importstr 'alertmanager-config.yaml',
|
||||
},
|
||||
},
|
||||
}).alertmanager.secret
|
22
examples/alertmanager-config.jsonnet
Normal file
22
examples/alertmanager-config.jsonnet
Normal file
@@ -0,0 +1,22 @@
|
||||
((import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
alertmanager+: {
|
||||
config: |||
|
||||
global:
|
||||
resolve_timeout: 10m
|
||||
route:
|
||||
group_by: ['job']
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 12h
|
||||
receiver: 'null'
|
||||
routes:
|
||||
- match:
|
||||
alertname: Watchdog
|
||||
receiver: 'null'
|
||||
receivers:
|
||||
- name: 'null'
|
||||
|||,
|
||||
},
|
||||
},
|
||||
}).alertmanager.secret
|
15
examples/alertmanager-config.yaml
Normal file
15
examples/alertmanager-config.yaml
Normal file
@@ -0,0 +1,15 @@
|
||||
# external alertmanager yaml
|
||||
global:
|
||||
resolve_timeout: 10m
|
||||
route:
|
||||
group_by: ['job']
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 12h
|
||||
receiver: 'null'
|
||||
routes:
|
||||
- match:
|
||||
alertname: Watchdog
|
||||
receiver: 'null'
|
||||
receivers:
|
||||
- name: 'null'
|
2
examples/auth
Normal file
2
examples/auth
Normal file
@@ -0,0 +1,2 @@
|
||||
# This file should not ever be used, it's just a mock.
|
||||
dontusethis:$apr1$heg6VIp7$1PSzJ/Z6fYboQ5pYrbgSy.
|
8
examples/basic-auth/secrets.yaml
Normal file
8
examples/basic-auth/secrets.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: basic-auth
|
||||
data:
|
||||
password: dG9vcg== # toor
|
||||
user: YWRtaW4= # admin
|
||||
type: Opaque
|
22
examples/basic-auth/service-monitor.yaml
Normal file
22
examples/basic-auth/service-monitor.yaml
Normal file
@@ -0,0 +1,22 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
k8s-apps: basic-auth-example
|
||||
name: basic-auth-example
|
||||
spec:
|
||||
endpoints:
|
||||
- basicAuth:
|
||||
password:
|
||||
name: basic-auth
|
||||
key: password
|
||||
username:
|
||||
name: basic-auth
|
||||
key: user
|
||||
port: metrics
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- logging
|
||||
selector:
|
||||
matchLabels:
|
||||
app: myapp
|
0
examples/etcd-client-ca.crt
Normal file
0
examples/etcd-client-ca.crt
Normal file
0
examples/etcd-client.crt
Normal file
0
examples/etcd-client.crt
Normal file
0
examples/etcd-client.key
Normal file
0
examples/etcd-client.key
Normal file
22
examples/etcd-skip-verify.jsonnet
Normal file
22
examples/etcd-skip-verify.jsonnet
Normal file
@@ -0,0 +1,22 @@
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-static-etcd.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
|
||||
etcd+:: {
|
||||
ips: ['127.0.0.1'],
|
||||
clientCA: importstr 'etcd-client-ca.crt',
|
||||
clientKey: importstr 'etcd-client.key',
|
||||
clientCert: importstr 'etcd-client.crt',
|
||||
insecureSkipVerify: true,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
53
examples/etcd.jsonnet
Normal file
53
examples/etcd.jsonnet
Normal file
@@ -0,0 +1,53 @@
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-static-etcd.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
|
||||
// Reference info: https://github.com/coreos/prometheus-operator/blob/master/contrib/kube-prometheus/README.md#static-etcd-configuration
|
||||
etcd+:: {
|
||||
// Configure this to be the IP(s) to scrape - i.e. your etcd node(s) (use commas to separate multiple values).
|
||||
ips: ['127.0.0.1'],
|
||||
|
||||
// Reference info:
|
||||
// * https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#servicemonitorspec (has endpoints)
|
||||
// * https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#endpoint (has tlsConfig)
|
||||
// * https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#tlsconfig (has: caFile, certFile, keyFile, serverName, & insecureSkipVerify)
|
||||
|
||||
// Set these three variables to the fully qualified directory path on your work machine to the certificate files that are valid to scrape etcd metrics with (check the apiserver container).
|
||||
// Most likely these certificates are generated somewhere in an infrastructure repository, so using the jsonnet `importstr` function can
|
||||
// be useful here. (Kube-aws stores these three files inside the credential folder.)
|
||||
// All the sensitive information on the certificates will end up in a Kubernetes Secret.
|
||||
clientCA: importstr 'etcd-client-ca.crt',
|
||||
clientKey: importstr 'etcd-client.key',
|
||||
clientCert: importstr 'etcd-client.crt',
|
||||
|
||||
// Note that you should specify a value EITHER for 'serverName' OR for 'insecureSkipVerify'. (Don't specify a value for both of them, and don't specify a value for neither of them.)
|
||||
// * Specifying serverName: Ideally you should provide a valid value for serverName (and then insecureSkipVerify should be left as false - so that serverName gets used).
|
||||
// * Specifying insecureSkipVerify: insecureSkipVerify is only to be used (i.e. set to true) if you cannot (based on how your etcd certificates were created) use a Subject Alternative Name.
|
||||
// * If you specify a value:
|
||||
// ** for both of these variables: When 'insecureSkipVerify: true' is specified, then also specifying a value for serverName won't hurt anything but it will be ignored.
|
||||
// ** for neither of these variables: then you'll get authentication errors on the prom '/targets' page with your etcd targets.
|
||||
|
||||
// A valid name (DNS or Subject Alternative Name) that the client (i.e. prometheus) will use to verify the etcd TLS certificate.
|
||||
// * Note that doing `nslookup etcd.kube-system.svc.cluster.local` (on a pod in a K8s cluster where kube-prometheus has been installed) shows that kube-prometheus sets up this hostname.
|
||||
// * `openssl x509 -noout -text -in etcd-client.pem` will print the Subject Alternative Names.
|
||||
serverName: 'etcd.kube-system.svc.cluster.local',
|
||||
|
||||
// When insecureSkipVerify isn't specified, the default value is "false".
|
||||
//insecureSkipVerify: true,
|
||||
|
||||
// In case you have generated the etcd certificate with kube-aws:
|
||||
// * If you only have one etcd node, you can use the value from 'etcd.internalDomainName' (specified in your kube-aws cluster.yaml) as the value for 'serverName'.
|
||||
// * But if you have multiple etcd nodes, you will need to use 'insecureSkipVerify: true' (if using default certificate generators method), as the valid certificate domain
|
||||
// will be different for each etcd node. (kube-aws default certificates are not valid against the IP - they were created for the DNS.)
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
36
examples/example-app/example-app.yaml
Normal file
36
examples/example-app/example-app.yaml
Normal file
@@ -0,0 +1,36 @@
|
||||
kind: Service
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: example-app
|
||||
labels:
|
||||
tier: frontend
|
||||
namespace: default
|
||||
spec:
|
||||
selector:
|
||||
app: example-app
|
||||
ports:
|
||||
- name: web
|
||||
protocol: TCP
|
||||
port: 8080
|
||||
targetPort: web
|
||||
---
|
||||
apiVersion: extensions/v1beta1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: example-app
|
||||
namespace: default
|
||||
spec:
|
||||
replicas: 4
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: example-app
|
||||
version: 1.1.3
|
||||
spec:
|
||||
containers:
|
||||
- name: example-app
|
||||
image: quay.io/fabxc/prometheus_demo_service
|
||||
ports:
|
||||
- name: web
|
||||
containerPort: 8080
|
||||
protocol: TCP
|
@@ -0,0 +1,13 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: prometheus-frontend
|
||||
namespace: monitoring
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: alertmanager-discovery
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus-frontend
|
||||
namespace: default
|
@@ -0,0 +1,12 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: alertmanager-discovery
|
||||
namespace: monitoring
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs: ["list", "watch"]
|
13
examples/example-app/prometheus-frontend-role-binding.yaml
Normal file
13
examples/example-app/prometheus-frontend-role-binding.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: prometheus-frontend
|
||||
namespace: default
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: prometheus-frontend
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus-frontend
|
||||
namespace: default
|
17
examples/example-app/prometheus-frontend-role.yaml
Normal file
17
examples/example-app/prometheus-frontend-role.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: prometheus-frontend
|
||||
namespace: default
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- nodes
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- configmaps
|
||||
verbs: ["get"]
|
@@ -0,0 +1,5 @@
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: prometheus-frontend
|
||||
namespace: default
|
15
examples/example-app/prometheus-frontend-svc.yaml
Normal file
15
examples/example-app/prometheus-frontend-svc.yaml
Normal file
@@ -0,0 +1,15 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: prometheus-frontend
|
||||
namespace: default
|
||||
spec:
|
||||
type: NodePort
|
||||
ports:
|
||||
- name: web
|
||||
nodePort: 30100
|
||||
port: 9090
|
||||
protocol: TCP
|
||||
targetPort: web
|
||||
selector:
|
||||
prometheus: frontend
|
25
examples/example-app/prometheus-frontend.yaml
Normal file
25
examples/example-app/prometheus-frontend.yaml
Normal file
@@ -0,0 +1,25 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: Prometheus
|
||||
metadata:
|
||||
name: frontend
|
||||
namespace: default
|
||||
labels:
|
||||
prometheus: frontend
|
||||
spec:
|
||||
serviceAccountName: prometheus-frontend
|
||||
version: v1.7.1
|
||||
serviceMonitorSelector:
|
||||
matchLabels:
|
||||
tier: frontend
|
||||
resources:
|
||||
requests:
|
||||
# 2Gi is default, but won't schedule if you don't have a node with >2Gi
|
||||
# memory. Modify based on your target and time-series count for
|
||||
# production use. This value is mainly meant for demonstration/testing
|
||||
# purposes.
|
||||
memory: 400Mi
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- namespace: monitoring
|
||||
name: alertmanager-main
|
||||
port: web
|
19
examples/example-app/servicemonitor-frontend.yaml
Normal file
19
examples/example-app/servicemonitor-frontend.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: frontend
|
||||
namespace: default
|
||||
labels:
|
||||
tier: frontend
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
tier: frontend
|
||||
targetLabels:
|
||||
- tier
|
||||
endpoints:
|
||||
- port: web
|
||||
interval: 10s
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- default
|
177
examples/example-grafana-dashboard.json
Normal file
177
examples/example-grafana-dashboard.json
Normal file
@@ -0,0 +1,177 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
|
||||
]
|
||||
},
|
||||
"editable": false,
|
||||
"gnetid": null,
|
||||
"graphtooltip": 0,
|
||||
"hidecontrols": false,
|
||||
"id": null,
|
||||
"links": [
|
||||
|
||||
],
|
||||
"refresh": "",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
"collapsed": false,
|
||||
"height": "250px",
|
||||
"panels": [
|
||||
{
|
||||
"aliascolors": {
|
||||
|
||||
},
|
||||
"bars": false,
|
||||
"dashlength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "$datasource",
|
||||
"fill": 1,
|
||||
"gridpos": {
|
||||
|
||||
},
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"alignastable": false,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"rightside": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"nullpointmode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesoverrides": [
|
||||
|
||||
],
|
||||
"spacelength": 10,
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedline": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "vector(1)",
|
||||
"format": "time_series",
|
||||
"intervalfactor": 2,
|
||||
"legendformat": "",
|
||||
"refid": "a"
|
||||
}
|
||||
],
|
||||
"thresholds": [
|
||||
|
||||
],
|
||||
"timefrom": null,
|
||||
"timeshift": null,
|
||||
"title": "my panel",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": [
|
||||
|
||||
]
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logbase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logbase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatiteration": null,
|
||||
"repeatrowid": null,
|
||||
"showtitle": false,
|
||||
"title": "dashboard row",
|
||||
"titlesize": "h6",
|
||||
"type": "row"
|
||||
}
|
||||
],
|
||||
"schemaversion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "prometheus",
|
||||
"value": "prometheus"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": null,
|
||||
"name": "datasource",
|
||||
"options": [
|
||||
|
||||
],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"type": "datasource"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "browser",
|
||||
"title": "my dashboard",
|
||||
"version": 0
|
||||
}
|
1
examples/existingrule.json
Normal file
1
examples/existingrule.json
Normal file
@@ -0,0 +1 @@
|
||||
{"groups":[{"name":"example-group","rules":[{"alert":"Watchdog","annotations":{"description":"This is a Watchdog meant to ensure that the entire alerting pipeline is functional."},"expr":"vector(1)","labels":{"severity":"none"}}]}]}
|
9
examples/existingrule.yaml
Normal file
9
examples/existingrule.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
groups:
|
||||
- name: example-group
|
||||
rules:
|
||||
- alert: Watchdog
|
||||
expr: vector(1)
|
||||
labels:
|
||||
severity: "none"
|
||||
annotations:
|
||||
description: This is a Watchdog meant to ensure that the entire alerting pipeline is functional.
|
@@ -0,0 +1,45 @@
|
||||
local grafana = import 'grafonnet/grafana.libsonnet';
|
||||
local dashboard = grafana.dashboard;
|
||||
local row = grafana.row;
|
||||
local prometheus = grafana.prometheus;
|
||||
local template = grafana.template;
|
||||
local graphPanel = grafana.graphPanel;
|
||||
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
},
|
||||
grafanaDashboards+:: {
|
||||
'my-dashboard.json':
|
||||
dashboard.new('My Dashboard')
|
||||
.addTemplate(
|
||||
{
|
||||
current: {
|
||||
text: 'Prometheus',
|
||||
value: 'Prometheus',
|
||||
},
|
||||
hide: 0,
|
||||
label: null,
|
||||
name: 'datasource',
|
||||
options: [],
|
||||
query: 'prometheus',
|
||||
refresh: 1,
|
||||
regex: '',
|
||||
type: 'datasource',
|
||||
},
|
||||
)
|
||||
.addRow(
|
||||
row.new()
|
||||
.addPanel(graphPanel.new('My Panel', span=6, datasource='$datasource')
|
||||
.addTarget(prometheus.target('vector(1)')))
|
||||
),
|
||||
},
|
||||
};
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
@@ -0,0 +1,16 @@
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
},
|
||||
grafanaDashboards+:: {
|
||||
'my-dashboard.json': (import 'example-grafana-dashboard.json'),
|
||||
},
|
||||
};
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
104
examples/ingress.jsonnet
Normal file
104
examples/ingress.jsonnet
Normal file
@@ -0,0 +1,104 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
local secret = k.core.v1.secret;
|
||||
local ingress = k.extensions.v1beta1.ingress;
|
||||
local ingressTls = ingress.mixin.spec.tlsType;
|
||||
local ingressRule = ingress.mixin.spec.rulesType;
|
||||
local httpIngressPath = ingressRule.mixin.http.pathsType;
|
||||
|
||||
local kp =
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
{
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
},
|
||||
// Configure External URL's per application
|
||||
alertmanager+:: {
|
||||
alertmanager+: {
|
||||
spec+: {
|
||||
externalUrl: 'http://alertmanager.example.com',
|
||||
},
|
||||
},
|
||||
},
|
||||
grafana+:: {
|
||||
config+: {
|
||||
sections+: {
|
||||
server+: {
|
||||
root_url: 'http://grafana.example.com/',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
prometheus+:: {
|
||||
prometheus+: {
|
||||
spec+: {
|
||||
externalUrl: 'http://prometheus.example.com',
|
||||
},
|
||||
},
|
||||
},
|
||||
// Create ingress objects per application
|
||||
ingress+:: {
|
||||
'alertmanager-main':
|
||||
ingress.new() +
|
||||
ingress.mixin.metadata.withName('alertmanager-main') +
|
||||
ingress.mixin.metadata.withNamespace($._config.namespace) +
|
||||
ingress.mixin.metadata.withAnnotations({
|
||||
'nginx.ingress.kubernetes.io/auth-type': 'basic',
|
||||
'nginx.ingress.kubernetes.io/auth-secret': 'basic-auth',
|
||||
'nginx.ingress.kubernetes.io/auth-realm': 'Authentication Required',
|
||||
}) +
|
||||
ingress.mixin.spec.withRules(
|
||||
ingressRule.new() +
|
||||
ingressRule.withHost('alertmanager.example.com') +
|
||||
ingressRule.mixin.http.withPaths(
|
||||
httpIngressPath.new() +
|
||||
httpIngressPath.mixin.backend.withServiceName('alertmanager-main') +
|
||||
httpIngressPath.mixin.backend.withServicePort('web')
|
||||
),
|
||||
),
|
||||
grafana:
|
||||
ingress.new() +
|
||||
ingress.mixin.metadata.withName('grafana') +
|
||||
ingress.mixin.metadata.withNamespace($._config.namespace) +
|
||||
ingress.mixin.metadata.withAnnotations({
|
||||
'nginx.ingress.kubernetes.io/auth-type': 'basic',
|
||||
'nginx.ingress.kubernetes.io/auth-secret': 'basic-auth',
|
||||
'nginx.ingress.kubernetes.io/auth-realm': 'Authentication Required',
|
||||
}) +
|
||||
ingress.mixin.spec.withRules(
|
||||
ingressRule.new() +
|
||||
ingressRule.withHost('grafana.example.com') +
|
||||
ingressRule.mixin.http.withPaths(
|
||||
httpIngressPath.new() +
|
||||
httpIngressPath.mixin.backend.withServiceName('grafana') +
|
||||
httpIngressPath.mixin.backend.withServicePort('http')
|
||||
),
|
||||
),
|
||||
'prometheus-k8s':
|
||||
ingress.new() +
|
||||
ingress.mixin.metadata.withName('prometheus-k8s') +
|
||||
ingress.mixin.metadata.withNamespace($._config.namespace) +
|
||||
ingress.mixin.metadata.withAnnotations({
|
||||
'nginx.ingress.kubernetes.io/auth-type': 'basic',
|
||||
'nginx.ingress.kubernetes.io/auth-secret': 'basic-auth',
|
||||
'nginx.ingress.kubernetes.io/auth-realm': 'Authentication Required',
|
||||
}) +
|
||||
ingress.mixin.spec.withRules(
|
||||
ingressRule.new() +
|
||||
ingressRule.withHost('prometheus.example.com') +
|
||||
ingressRule.mixin.http.withPaths(
|
||||
httpIngressPath.new() +
|
||||
httpIngressPath.mixin.backend.withServiceName('prometheus-k8s') +
|
||||
httpIngressPath.mixin.backend.withServicePort('web')
|
||||
),
|
||||
),
|
||||
},
|
||||
} + {
|
||||
// Create basic auth secret - replace 'auth' file with your own
|
||||
ingress+:: {
|
||||
'basic-auth-secret':
|
||||
secret.new('basic-auth', { auth: std.base64(importstr 'auth') }) +
|
||||
secret.mixin.metadata.withNamespace($._config.namespace),
|
||||
},
|
||||
};
|
||||
|
||||
{ [name + '-ingress']: kp.ingress[name] for name in std.objectFields(kp.ingress) }
|
14
examples/internal-registry.jsonnet
Normal file
14
examples/internal-registry.jsonnet
Normal file
@@ -0,0 +1,14 @@
|
||||
local mixin = import 'kube-prometheus/kube-prometheus-config-mixins.libsonnet';
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
},
|
||||
} + mixin.withImageRepository('internal-registry.com/organization');
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
7
examples/jsonnet-build-snippet/build-snippet.jsonnet
Normal file
7
examples/jsonnet-build-snippet/build-snippet.jsonnet
Normal file
@@ -0,0 +1,7 @@
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
2
examples/jsonnet-snippets/bootkube.jsonnet
Normal file
2
examples/jsonnet-snippets/bootkube.jsonnet
Normal file
@@ -0,0 +1,2 @@
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-bootkube.libsonnet')
|
3
examples/jsonnet-snippets/kops-coredns.jsonnet
Normal file
3
examples/jsonnet-snippets/kops-coredns.jsonnet
Normal file
@@ -0,0 +1,3 @@
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-kops.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-kops-coredns.libsonnet')
|
2
examples/jsonnet-snippets/kops.jsonnet
Normal file
2
examples/jsonnet-snippets/kops.jsonnet
Normal file
@@ -0,0 +1,2 @@
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-kops.libsonnet')
|
2
examples/jsonnet-snippets/kube-aws.jsonnet
Normal file
2
examples/jsonnet-snippets/kube-aws.jsonnet
Normal file
@@ -0,0 +1,2 @@
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-kube-aws.libsonnet')
|
2
examples/jsonnet-snippets/kubeadm.jsonnet
Normal file
2
examples/jsonnet-snippets/kubeadm.jsonnet
Normal file
@@ -0,0 +1,2 @@
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-kubeadm.libsonnet')
|
2
examples/jsonnet-snippets/kubespray.jsonnet
Normal file
2
examples/jsonnet-snippets/kubespray.jsonnet
Normal file
@@ -0,0 +1,2 @@
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-kubespray.libsonnet')
|
2
examples/jsonnet-snippets/node-ports.jsonnet
Normal file
2
examples/jsonnet-snippets/node-ports.jsonnet
Normal file
@@ -0,0 +1,2 @@
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-node-ports.libsonnet')
|
9
examples/ksonnet-example.jsonnet
Normal file
9
examples/ksonnet-example.jsonnet
Normal file
@@ -0,0 +1,9 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
local daemonset = k.apps.v1beta2.daemonSet;
|
||||
|
||||
((import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
nodeExporter+: {
|
||||
daemonset+:
|
||||
daemonset.mixin.metadata.withNamespace('my-custom-namespace'),
|
||||
},
|
||||
}).nodeExporter.daemonset
|
28
examples/kustomize.jsonnet
Normal file
28
examples/kustomize.jsonnet
Normal file
@@ -0,0 +1,28 @@
|
||||
local kp =
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
},
|
||||
};
|
||||
|
||||
local manifests =
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['prometheus-adapter-' + name]: kp.prometheusAdapter[name] for name in std.objectFields(kp.prometheusAdapter) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) };
|
||||
|
||||
local kustomizationResourceFile(name) = './manifests/' + name + '.yaml';
|
||||
local kustomization = {
|
||||
apiVersion: 'kustomize.config.k8s.io/v1beta1',
|
||||
kind: 'Kustomization',
|
||||
resources: std.map(kustomizationResourceFile, std.objectFields(manifests)),
|
||||
};
|
||||
|
||||
manifests {
|
||||
'../kustomization': kustomization,
|
||||
}
|
||||
|
59
examples/minikube.jsonnet
Normal file
59
examples/minikube.jsonnet
Normal file
@@ -0,0 +1,59 @@
|
||||
local kp =
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-kubeadm.libsonnet') +
|
||||
// Note that NodePort type services is likely not a good idea for your production use case, it is only used for demonstration purposes here.
|
||||
(import 'kube-prometheus/kube-prometheus-node-ports.libsonnet') +
|
||||
{
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
alertmanager+:: {
|
||||
config: importstr 'alertmanager-config.yaml',
|
||||
},
|
||||
grafana+:: {
|
||||
config: { // http://docs.grafana.org/installation/configuration/
|
||||
sections: {
|
||||
// Do not require grafana users to login/authenticate
|
||||
'auth.anonymous': { enabled: true },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
// For simplicity, each of the following values for 'externalUrl':
|
||||
// * assume that `minikube ip` prints "192.168.99.100"
|
||||
// * hard-code the NodePort for each app
|
||||
prometheus+:: {
|
||||
prometheus+: {
|
||||
// Reference info: https://coreos.com/operators/prometheus/docs/latest/api.html#prometheusspec
|
||||
spec+: {
|
||||
// An e.g. of the purpose of this is so the "Source" links on http://<alert-manager>/#/alerts are valid.
|
||||
externalUrl: 'http://192.168.99.100:30900',
|
||||
|
||||
// Reference info: "external_labels" on https://prometheus.io/docs/prometheus/latest/configuration/configuration/
|
||||
externalLabels: {
|
||||
// This 'cluster' label will be included on every firing prometheus alert. (This is more useful
|
||||
// when running multiple clusters in a shared environment (e.g. AWS) with other users.)
|
||||
cluster: 'minikube-<INSERT YOUR USERNAME HERE>',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
alertmanager+:: {
|
||||
alertmanager+: {
|
||||
// Reference info: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#alertmanagerspec
|
||||
spec+: {
|
||||
externalUrl: 'http://192.168.99.100:30903',
|
||||
|
||||
logLevel: 'debug', // So firing alerts show up in log
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
32
examples/prometheus-additional-alert-rule-example.jsonnet
Normal file
32
examples/prometheus-additional-alert-rule-example.jsonnet
Normal file
@@ -0,0 +1,32 @@
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
},
|
||||
prometheusAlerts+:: {
|
||||
groups+: [
|
||||
{
|
||||
name: 'example-group',
|
||||
rules: [
|
||||
{
|
||||
alert: 'Watchdog',
|
||||
expr: 'vector(1)',
|
||||
labels: {
|
||||
severity: 'none',
|
||||
},
|
||||
annotations: {
|
||||
description: 'This is a Watchdog meant to ensure that the entire alerting pipeline is functional.',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
@@ -0,0 +1,26 @@
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
},
|
||||
prometheusRules+:: {
|
||||
groups+: [
|
||||
{
|
||||
name: 'example-group',
|
||||
rules: [
|
||||
{
|
||||
record: 'some_recording_rule_name',
|
||||
expr: 'vector(1)',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
11
examples/prometheus-additional-rendered-rule-example.jsonnet
Normal file
11
examples/prometheus-additional-rendered-rule-example.jsonnet
Normal file
@@ -0,0 +1,11 @@
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
prometheusAlerts+:: (import 'existingrule.json'),
|
||||
};
|
||||
|
||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
9
examples/prometheus-name-override.jsonnet
Normal file
9
examples/prometheus-name-override.jsonnet
Normal file
@@ -0,0 +1,9 @@
|
||||
((import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
prometheus+: {
|
||||
prometheus+: {
|
||||
metadata+: {
|
||||
name: 'my-name',
|
||||
},
|
||||
},
|
||||
},
|
||||
}).prometheus.prometheus
|
58
examples/prometheus-pvc.jsonnet
Normal file
58
examples/prometheus-pvc.jsonnet
Normal file
@@ -0,0 +1,58 @@
|
||||
// Reference info: documentation for https://github.com/ksonnet/ksonnet-lib can be found at http://g.bryan.dev.hepti.center
|
||||
//
|
||||
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet'; // https://github.com/ksonnet/ksonnet-lib/blob/master/ksonnet.beta.3/k.libsonnet - imports k8s.libsonnet
|
||||
// * https://github.com/ksonnet/ksonnet-lib/blob/master/ksonnet.beta.3/k8s.libsonnet defines things such as "persistentVolumeClaim:: {"
|
||||
//
|
||||
local pvc = k.core.v1.persistentVolumeClaim; // https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.11/#persistentvolumeclaim-v1-core (defines variable named 'spec' of type 'PersistentVolumeClaimSpec')
|
||||
|
||||
local kp =
|
||||
(import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||
(import 'kube-prometheus/kube-prometheus-bootkube.libsonnet') +
|
||||
{
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
},
|
||||
|
||||
prometheus+:: {
|
||||
prometheus+: {
|
||||
spec+: { // https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#prometheusspec
|
||||
// If a value isn't specified for 'retention', then by default the '--storage.tsdb.retention=24h' arg will be passed to prometheus by prometheus-operator.
|
||||
// The possible values for a prometheus <duration> are:
|
||||
// * https://github.com/prometheus/common/blob/c7de230/model/time.go#L178 specifies "^([0-9]+)(y|w|d|h|m|s|ms)$" (years weeks days hours minutes seconds milliseconds)
|
||||
retention: '30d',
|
||||
|
||||
// Reference info: https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md
|
||||
// By default (if the following 'storage.volumeClaimTemplate' isn't created), prometheus will be created with an EmptyDir for the 'prometheus-k8s-db' volume (for the prom tsdb).
|
||||
// This 'storage.volumeClaimTemplate' causes the following to be automatically created (via dynamic provisioning) for each prometheus pod:
|
||||
// * PersistentVolumeClaim (and a corresponding PersistentVolume)
|
||||
// * the actual volume (per the StorageClassName specified below)
|
||||
storage: { // https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#storagespec
|
||||
volumeClaimTemplate: // (same link as above where the 'pvc' variable is defined)
|
||||
pvc.new() + // http://g.bryan.dev.hepti.center/core/v1/persistentVolumeClaim/#core.v1.persistentVolumeClaim.new
|
||||
|
||||
pvc.mixin.spec.withAccessModes('ReadWriteOnce') +
|
||||
|
||||
// https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.11/#resourcerequirements-v1-core (defines 'requests'),
|
||||
// and https://kubernetes.io/docs/concepts/policy/resource-quotas/#storage-resource-quota (defines 'requests.storage')
|
||||
pvc.mixin.spec.resources.withRequests({ storage: '100Gi' }) +
|
||||
|
||||
// A StorageClass of the following name (which can be seen via `kubectl get storageclass` from a node in the given K8s cluster) must exist prior to kube-prometheus being deployed.
|
||||
pvc.mixin.spec.withStorageClassName('ssd'),
|
||||
|
||||
// The following 'selector' is only needed if you're using manual storage provisioning (https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md#manual-storage-provisioning).
|
||||
// And note that this is not supported/allowed by AWS - uncommenting the following 'selector' line (when deploying kube-prometheus to a K8s cluster in AWS) will cause the pvc to be stuck in the Pending status and have the following error:
|
||||
// * 'Failed to provision volume with StorageClass "ssd": claim.Spec.Selector is not supported for dynamic provisioning on AWS'
|
||||
//pvc.mixin.spec.selector.withMatchLabels({}),
|
||||
}, // storage
|
||||
}, // spec
|
||||
}, // prometheus
|
||||
}, // prometheus
|
||||
|
||||
};
|
||||
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
7
experimental/custom-metrics-api/.gitignore
vendored
Normal file
7
experimental/custom-metrics-api/.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
apiserver-key.pem
|
||||
apiserver.csr
|
||||
apiserver.pem
|
||||
metrics-ca-config.json
|
||||
metrics-ca.crt
|
||||
metrics-ca.key
|
||||
cm-adapter-serving-certs.yaml
|
21
experimental/custom-metrics-api/README.md
Normal file
21
experimental/custom-metrics-api/README.md
Normal file
@@ -0,0 +1,21 @@
|
||||
# Custom Metrics API
|
||||
|
||||
The custom metrics API allows the HPA v2 to scale based on arbirary metrics.
|
||||
|
||||
This directory contains an example deployment which extends the Prometheus Adapter, deployed with kube-prometheus, serve the [Custom Metrics API](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/instrumentation/custom-metrics-api.md) by talking to Prometheus running inside the cluster.
|
||||
|
||||
Make sure you have the Prometheus Adapter up and running in the `monitoring` namespace.
|
||||
|
||||
You can deploy everything in the `monitoring` namespace using `./deploy.sh`.
|
||||
|
||||
When you're done, you can teardown using the `./teardown.sh` script.
|
||||
|
||||
### Sample App
|
||||
|
||||
Additionally, this directory contains a sample app that uses the [Horizontal Pod Autoscaler](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/) to scale the Deployment's replicas of Pods up and down as needed.
|
||||
Deploy this app by running `kubectl apply -f sample-app.yaml`.
|
||||
Make the app accessible on your system, for example by using `kubectl port-forward svc/sample-app 8080`. Next you need to put some load on its http endpoints.
|
||||
|
||||
A tool like [hey](https://github.com/rakyll/hey) is helpful for doing so: `hey -c 20 -n 100000000 http://localhost:8080/metrics`
|
||||
|
||||
There is an even more detailed information on this sample app at [luxas/kubeadm-workshop](https://github.com/luxas/kubeadm-workshop#deploying-the-prometheus-operator-for-monitoring-services-in-the-cluster).
|
@@ -0,0 +1,12 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: custom-metrics-server-resources
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: custom-metrics-server-resources
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus-adapter
|
||||
namespace: monitoring
|
@@ -0,0 +1,13 @@
|
||||
apiVersion: apiregistration.k8s.io/v1beta1
|
||||
kind: APIService
|
||||
metadata:
|
||||
name: v1beta1.custom.metrics.k8s.io
|
||||
spec:
|
||||
service:
|
||||
name: prometheus-adapter
|
||||
namespace: monitoring
|
||||
group: custom.metrics.k8s.io
|
||||
version: v1beta1
|
||||
insecureSkipTLSVerify: true
|
||||
groupPriorityMinimum: 100
|
||||
versionPriority: 100
|
@@ -0,0 +1,9 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: custom-metrics-server-resources
|
||||
rules:
|
||||
- apiGroups:
|
||||
- custom.metrics.k8s.io
|
||||
resources: ["*"]
|
||||
verbs: ["*"]
|
@@ -0,0 +1,98 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: adapter-config
|
||||
namespace: monitoring
|
||||
data:
|
||||
config.yaml: |
|
||||
rules:
|
||||
- seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}'
|
||||
seriesFilters: []
|
||||
resources:
|
||||
overrides:
|
||||
namespace:
|
||||
resource: namespace
|
||||
pod_name:
|
||||
resource: pod
|
||||
name:
|
||||
matches: ^container_(.*)_seconds_total$
|
||||
as: ""
|
||||
metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[1m])) by (<<.GroupBy>>)
|
||||
- seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}'
|
||||
seriesFilters:
|
||||
- isNot: ^container_.*_seconds_total$
|
||||
resources:
|
||||
overrides:
|
||||
namespace:
|
||||
resource: namespace
|
||||
pod_name:
|
||||
resource: pod
|
||||
name:
|
||||
matches: ^container_(.*)_total$
|
||||
as: ""
|
||||
metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}[1m])) by (<<.GroupBy>>)
|
||||
- seriesQuery: '{__name__=~"^container_.*",container_name!="POD",namespace!="",pod_name!=""}'
|
||||
seriesFilters:
|
||||
- isNot: ^container_.*_total$
|
||||
resources:
|
||||
overrides:
|
||||
namespace:
|
||||
resource: namespace
|
||||
pod_name:
|
||||
resource: pod
|
||||
name:
|
||||
matches: ^container_(.*)$
|
||||
as: ""
|
||||
metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>,container_name!="POD"}) by (<<.GroupBy>>)
|
||||
- seriesQuery: '{namespace!="",__name__!~"^container_.*"}'
|
||||
seriesFilters:
|
||||
- isNot: .*_total$
|
||||
resources:
|
||||
template: <<.Resource>>
|
||||
name:
|
||||
matches: ""
|
||||
as: ""
|
||||
metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)
|
||||
- seriesQuery: '{namespace!="",__name__!~"^container_.*"}'
|
||||
seriesFilters:
|
||||
- isNot: .*_seconds_total
|
||||
resources:
|
||||
template: <<.Resource>>
|
||||
name:
|
||||
matches: ^(.*)_total$
|
||||
as: ""
|
||||
metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)
|
||||
- seriesQuery: '{namespace!="",__name__!~"^container_.*"}'
|
||||
seriesFilters: []
|
||||
resources:
|
||||
template: <<.Resource>>
|
||||
name:
|
||||
matches: ^(.*)_seconds_total$
|
||||
as: ""
|
||||
metricsQuery: sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)
|
||||
resourceRules:
|
||||
cpu:
|
||||
containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)
|
||||
nodeQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>, id='/'}[1m])) by (<<.GroupBy>>)
|
||||
resources:
|
||||
overrides:
|
||||
node:
|
||||
resource: node
|
||||
namespace:
|
||||
resource: namespace
|
||||
pod_name:
|
||||
resource: pod
|
||||
containerLabel: container_name
|
||||
memory:
|
||||
containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>}) by (<<.GroupBy>>)
|
||||
nodeQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,id='/'}) by (<<.GroupBy>>)
|
||||
resources:
|
||||
overrides:
|
||||
node:
|
||||
resource: node
|
||||
namespace:
|
||||
resource: namespace
|
||||
pod_name:
|
||||
resource: pod
|
||||
containerLabel: container_name
|
||||
window: 1m
|
7
experimental/custom-metrics-api/deploy.sh
Normal file
7
experimental/custom-metrics-api/deploy.sh
Normal file
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
kubectl apply -n monitoring -f custom-metrics-apiserver-resource-reader-cluster-role-binding.yaml
|
||||
kubectl apply -n monitoring -f custom-metrics-apiservice.yaml
|
||||
kubectl apply -n monitoring -f custom-metrics-cluster-role.yaml
|
||||
kubectl apply -n monitoring -f custom-metrics-configmap.yaml
|
||||
kubectl apply -n monitoring -f hpa-custom-metrics-cluster-role-binding.yaml
|
@@ -0,0 +1,12 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: hpa-controller-custom-metrics
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: custom-metrics-server-resources
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: horizontal-pod-autoscaler
|
||||
namespace: kube-system
|
67
experimental/custom-metrics-api/sample-app.yaml
Normal file
67
experimental/custom-metrics-api/sample-app.yaml
Normal file
@@ -0,0 +1,67 @@
|
||||
kind: ServiceMonitor
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
metadata:
|
||||
name: sample-app
|
||||
labels:
|
||||
app: sample-app
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: sample-app
|
||||
endpoints:
|
||||
- port: http
|
||||
interval: 5s
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: sample-app
|
||||
labels:
|
||||
app: sample-app
|
||||
spec:
|
||||
ports:
|
||||
- name: http
|
||||
port: 8080
|
||||
targetPort: 8080
|
||||
selector:
|
||||
app: sample-app
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: sample-app
|
||||
labels:
|
||||
app: sample-app
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: sample-app
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: sample-app
|
||||
spec:
|
||||
containers:
|
||||
- image: luxas/autoscale-demo:v0.1.2
|
||||
name: metrics-provider
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8080
|
||||
---
|
||||
kind: HorizontalPodAutoscaler
|
||||
apiVersion: autoscaling/v2beta1
|
||||
metadata:
|
||||
name: sample-app
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: sample-app
|
||||
minReplicas: 1
|
||||
maxReplicas: 10
|
||||
metrics:
|
||||
- type: Pods
|
||||
pods:
|
||||
metricName: http_requests
|
||||
targetAverageValue: 500m
|
7
experimental/custom-metrics-api/teardown.sh
Normal file
7
experimental/custom-metrics-api/teardown.sh
Normal file
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
kubectl delete -n monitoring -f custom-metrics-apiserver-resource-reader-cluster-role-binding.yaml
|
||||
kubectl delete -n monitoring -f custom-metrics-apiservice.yaml
|
||||
kubectl delete -n monitoring -f custom-metrics-cluster-role.yaml
|
||||
kubectl delete -n monitoring -f custom-metrics-configmap.yaml
|
||||
kubectl delete -n monitoring -f hpa-custom-metrics-cluster-role-binding.yaml
|
12
experimental/metrics-server/auth-delegator.yaml
Normal file
12
experimental/metrics-server/auth-delegator.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: metrics-server:system:auth-delegator
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: system:auth-delegator
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: metrics-server
|
||||
namespace: kube-system
|
13
experimental/metrics-server/auth-reader.yaml
Normal file
13
experimental/metrics-server/auth-reader.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1beta1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: metrics-server-auth-reader
|
||||
namespace: kube-system
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: extension-apiserver-authentication-reader
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: metrics-server
|
||||
namespace: kube-system
|
13
experimental/metrics-server/metrics-apiservice.yaml
Normal file
13
experimental/metrics-server/metrics-apiservice.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
apiVersion: apiregistration.k8s.io/v1beta1
|
||||
kind: APIService
|
||||
metadata:
|
||||
name: v1beta1.metrics.k8s.io
|
||||
spec:
|
||||
service:
|
||||
name: metrics-server
|
||||
namespace: kube-system
|
||||
group: metrics.k8s.io
|
||||
version: v1beta1
|
||||
insecureSkipTLSVerify: true
|
||||
groupPriorityMinimum: 100
|
||||
versionPriority: 100
|
@@ -0,0 +1,12 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: system:metrics-server
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: system:metrics-server
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: metrics-server
|
||||
namespace: kube-system
|
24
experimental/metrics-server/metrics-server-cluster-role.yaml
Normal file
24
experimental/metrics-server/metrics-server-cluster-role.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: system:metrics-server
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
- nodes
|
||||
- nodes/stats
|
||||
- namespaces
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- "extensions"
|
||||
resources:
|
||||
- deployments
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
25
experimental/metrics-server/metrics-server-deployment.yaml
Normal file
25
experimental/metrics-server/metrics-server-deployment.yaml
Normal file
@@ -0,0 +1,25 @@
|
||||
apiVersion: extensions/v1beta1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: metrics-server
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: metrics-server
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: metrics-server
|
||||
template:
|
||||
metadata:
|
||||
name: metrics-server
|
||||
labels:
|
||||
k8s-app: metrics-server
|
||||
spec:
|
||||
serviceAccountName: metrics-server
|
||||
containers:
|
||||
- name: metrics-server
|
||||
image: gcr.io/google_containers/metrics-server-amd64:v0.2.0
|
||||
imagePullPolicy: Always
|
||||
command:
|
||||
- /metrics-server
|
||||
- --source=kubernetes.summary_api:''
|
@@ -0,0 +1,5 @@
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: metrics-server
|
||||
namespace: kube-system
|
14
experimental/metrics-server/metrics-server-service.yaml
Normal file
14
experimental/metrics-server/metrics-server-service.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: metrics-server
|
||||
namespace: kube-system
|
||||
labels:
|
||||
kubernetes.io/name: "Metrics-server"
|
||||
spec:
|
||||
selector:
|
||||
k8s-app: metrics-server
|
||||
ports:
|
||||
- port: 443
|
||||
protocol: TCP
|
||||
targetPort: 443
|
17
grafana-image/Dockerfile
Normal file
17
grafana-image/Dockerfile
Normal file
@@ -0,0 +1,17 @@
|
||||
FROM debian:9.3-slim
|
||||
|
||||
ARG GRAFANA_VERSION
|
||||
|
||||
RUN apt-get update && apt-get install -qq -y wget tar sqlite && \
|
||||
wget -O /tmp/grafana.tar.gz https://s3-us-west-2.amazonaws.com/grafana-releases/release/grafana-$GRAFANA_VERSION.linux-x64.tar.gz && \
|
||||
tar -zxvf /tmp/grafana.tar.gz -C /tmp && mv /tmp/grafana-$GRAFANA_VERSION /grafana && \
|
||||
rm -rf /tmp/grafana.tar.gz
|
||||
|
||||
ADD config.ini /grafana/conf/config.ini
|
||||
|
||||
USER nobody
|
||||
EXPOSE 3000
|
||||
VOLUME [ "/data" ]
|
||||
WORKDIR /grafana
|
||||
ENTRYPOINT [ "/grafana/bin/grafana-server" ]
|
||||
CMD [ "-config=/grafana/conf/config.ini" ]
|
5
grafana-image/Makefile
Normal file
5
grafana-image/Makefile
Normal file
@@ -0,0 +1,5 @@
|
||||
VERSION=5.0.3
|
||||
IMAGE_TAG=$(VERSION)
|
||||
|
||||
container:
|
||||
docker build --build-arg GRAFANA_VERSION=$(VERSION) -t quay.io/coreos/monitoring-grafana:$(IMAGE_TAG) .
|
16
grafana-image/config.ini
Normal file
16
grafana-image/config.ini
Normal file
@@ -0,0 +1,16 @@
|
||||
[database]
|
||||
path = /data/grafana.db
|
||||
|
||||
[paths]
|
||||
data = /data
|
||||
logs = /data/log
|
||||
plugins = /data/plugins
|
||||
|
||||
[session]
|
||||
provider = memory
|
||||
|
||||
[auth.basic]
|
||||
enabled = false
|
||||
|
||||
[auth.anonymous]
|
||||
enabled = true
|
9
hack/example-service-monitoring/deploy
Executable file
9
hack/example-service-monitoring/deploy
Executable file
@@ -0,0 +1,9 @@
|
||||
#!/usr/bin/env bash
|
||||
# exit immediately when a command fails
|
||||
set -e
|
||||
# only exit with zero if all commands of the pipeline exit successfully
|
||||
set -o pipefail
|
||||
# error on unset variables
|
||||
set -u
|
||||
|
||||
kubectl apply -f examples/example-app
|
9
hack/example-service-monitoring/teardown
Executable file
9
hack/example-service-monitoring/teardown
Executable file
@@ -0,0 +1,9 @@
|
||||
#!/usr/bin/env bash
|
||||
# exit immediately when a command fails
|
||||
set -e
|
||||
# only exit with zero if all commands of the pipeline exit successfully
|
||||
set -o pipefail
|
||||
# error on unset variables
|
||||
set -u
|
||||
|
||||
kubectl delete -f examples/example-app
|
2
jsonnet/kube-prometheus/.gitignore
vendored
Normal file
2
jsonnet/kube-prometheus/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
jsonnetfile.lock.json
|
||||
vendor/
|
125
jsonnet/kube-prometheus/alertmanager/alertmanager.libsonnet
Normal file
125
jsonnet/kube-prometheus/alertmanager/alertmanager.libsonnet
Normal file
@@ -0,0 +1,125 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
namespace: 'default',
|
||||
|
||||
versions+:: {
|
||||
alertmanager: 'v0.16.1',
|
||||
},
|
||||
|
||||
imageRepos+:: {
|
||||
alertmanager: 'quay.io/prometheus/alertmanager',
|
||||
},
|
||||
|
||||
alertmanager+:: {
|
||||
name: $._config.alertmanager.name,
|
||||
config: {
|
||||
global: {
|
||||
resolve_timeout: '5m',
|
||||
},
|
||||
route: {
|
||||
group_by: ['job'],
|
||||
group_wait: '30s',
|
||||
group_interval: '5m',
|
||||
repeat_interval: '12h',
|
||||
receiver: 'null',
|
||||
routes: [
|
||||
{
|
||||
receiver: 'null',
|
||||
match: {
|
||||
alertname: 'Watchdog',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
receivers: [
|
||||
{
|
||||
name: 'null',
|
||||
},
|
||||
],
|
||||
},
|
||||
replicas: 3,
|
||||
},
|
||||
},
|
||||
|
||||
alertmanager+:: {
|
||||
secret:
|
||||
local secret = k.core.v1.secret;
|
||||
|
||||
if std.type($._config.alertmanager.config) == 'object' then
|
||||
secret.new('alertmanager-' + $._config.alertmanager.name, { 'alertmanager.yaml': std.base64(std.manifestYamlDoc($._config.alertmanager.config)) }) +
|
||||
secret.mixin.metadata.withNamespace($._config.namespace)
|
||||
else
|
||||
secret.new('alertmanager-' + $._config.alertmanager.name, { 'alertmanager.yaml': std.base64($._config.alertmanager.config) }) +
|
||||
secret.mixin.metadata.withNamespace($._config.namespace),
|
||||
|
||||
serviceAccount:
|
||||
local serviceAccount = k.core.v1.serviceAccount;
|
||||
|
||||
serviceAccount.new('alertmanager-' + $._config.alertmanager.name) +
|
||||
serviceAccount.mixin.metadata.withNamespace($._config.namespace),
|
||||
|
||||
service:
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
local alertmanagerPort = servicePort.newNamed('web', 9093, 'web');
|
||||
|
||||
service.new('alertmanager-' + $._config.alertmanager.name, { app: 'alertmanager', alertmanager: $._config.alertmanager.name }, alertmanagerPort) +
|
||||
service.mixin.spec.withSessionAffinity('ClientIP') +
|
||||
service.mixin.metadata.withNamespace($._config.namespace) +
|
||||
service.mixin.metadata.withLabels({ alertmanager: $._config.alertmanager.name }),
|
||||
|
||||
serviceMonitor:
|
||||
{
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'alertmanager',
|
||||
namespace: $._config.namespace,
|
||||
labels: {
|
||||
'k8s-app': 'alertmanager',
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
selector: {
|
||||
matchLabels: {
|
||||
alertmanager: $._config.alertmanager.name,
|
||||
},
|
||||
},
|
||||
endpoints: [
|
||||
{
|
||||
port: 'web',
|
||||
interval: '30s',
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
|
||||
alertmanager:
|
||||
{
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'Alertmanager',
|
||||
metadata: {
|
||||
name: $._config.alertmanager.name,
|
||||
namespace: $._config.namespace,
|
||||
labels: {
|
||||
alertmanager: $._config.alertmanager.name,
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
replicas: $._config.alertmanager.replicas,
|
||||
version: $._config.versions.alertmanager,
|
||||
baseImage: $._config.imageRepos.alertmanager,
|
||||
nodeSelector: { 'beta.kubernetes.io/os': 'linux' },
|
||||
serviceAccountName: 'alertmanager-' + $._config.alertmanager.name,
|
||||
securityContext: {
|
||||
runAsUser: 1000,
|
||||
runAsNonRoot: true,
|
||||
fsGroup: 2000,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
52
jsonnet/kube-prometheus/alerts/alertmanager.libsonnet
Normal file
52
jsonnet/kube-prometheus/alerts/alertmanager.libsonnet
Normal file
@@ -0,0 +1,52 @@
|
||||
{
|
||||
prometheusAlerts+:: {
|
||||
groups+: [
|
||||
{
|
||||
name: 'alertmanager.rules',
|
||||
rules: [
|
||||
{
|
||||
alert: 'AlertmanagerConfigInconsistent',
|
||||
annotations: {
|
||||
message: 'The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.',
|
||||
},
|
||||
expr: |||
|
||||
count_values("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{%(prometheusOperatorSelector)s,controller="alertmanager"}, "service", "alertmanager-$1", "name", "(.*)") != 1
|
||||
||| % $._config,
|
||||
'for': '5m',
|
||||
labels: {
|
||||
severity: 'critical',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'AlertmanagerFailedReload',
|
||||
annotations: {
|
||||
message: "Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}.",
|
||||
},
|
||||
expr: |||
|
||||
alertmanager_config_last_reload_successful{%(alertmanagerSelector)s} == 0
|
||||
||| % $._config,
|
||||
'for': '10m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert:'AlertmanagerMembersInconsistent',
|
||||
annotations:{
|
||||
message: 'Alertmanager has not found all other members of the cluster.',
|
||||
},
|
||||
expr: |||
|
||||
alertmanager_cluster_members{%(alertmanagerSelector)s}
|
||||
!= on (service) GROUP_LEFT()
|
||||
count by (service) (alertmanager_cluster_members{%(alertmanagerSelector)s})
|
||||
||| % $._config,
|
||||
'for': '5m',
|
||||
labels: {
|
||||
severity: 'critical',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
5
jsonnet/kube-prometheus/alerts/alerts.libsonnet
Normal file
5
jsonnet/kube-prometheus/alerts/alerts.libsonnet
Normal file
@@ -0,0 +1,5 @@
|
||||
(import 'alertmanager.libsonnet') +
|
||||
(import 'general.libsonnet') +
|
||||
(import 'node.libsonnet') +
|
||||
(import 'prometheus.libsonnet') +
|
||||
(import 'prometheus-operator.libsonnet')
|
38
jsonnet/kube-prometheus/alerts/general.libsonnet
Normal file
38
jsonnet/kube-prometheus/alerts/general.libsonnet
Normal file
@@ -0,0 +1,38 @@
|
||||
{
|
||||
prometheusAlerts+:: {
|
||||
groups+: [
|
||||
{
|
||||
name: 'general.rules',
|
||||
rules: [
|
||||
{
|
||||
alert: 'TargetDown',
|
||||
annotations: {
|
||||
message: '{{ $value }}% of the {{ $labels.job }} targets are down.',
|
||||
},
|
||||
expr: '100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10',
|
||||
'for': '10m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'Watchdog',
|
||||
annotations: {
|
||||
message: |||
|
||||
This is an alert meant to ensure that the entire alerting pipeline is functional.
|
||||
This alert is always firing, therefore it should always be firing in Alertmanager
|
||||
and always fire against a receiver. There are integrations with various notification
|
||||
mechanisms that send a notification when this alert is not firing. For example the
|
||||
"DeadMansSnitch" integration in PagerDuty.
|
||||
|||,
|
||||
},
|
||||
expr: 'vector(1)',
|
||||
labels: {
|
||||
severity: 'none',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
112
jsonnet/kube-prometheus/alerts/node.libsonnet
Normal file
112
jsonnet/kube-prometheus/alerts/node.libsonnet
Normal file
@@ -0,0 +1,112 @@
|
||||
{
|
||||
prometheusAlerts+:: {
|
||||
groups+: [
|
||||
{
|
||||
name: 'kube-prometheus-node-alerting.rules',
|
||||
rules: [
|
||||
{
|
||||
alert: 'NodeDiskRunningFull',
|
||||
annotations: {
|
||||
message: 'Device {{ $labels.device }} of node-exporter {{ $labels.namespace }}/{{ $labels.pod }} will be full within the next 24 hours.',
|
||||
},
|
||||
expr: |||
|
||||
(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[6h], 3600 * 24) < 0)
|
||||
||| % $._config,
|
||||
'for': '30m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'NodeDiskRunningFull',
|
||||
annotations: {
|
||||
message: 'Device {{ $labels.device }} of node-exporter {{ $labels.namespace }}/{{ $labels.pod }} will be full within the next 2 hours.',
|
||||
},
|
||||
expr: |||
|
||||
(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[30m], 3600 * 2) < 0)
|
||||
||| % $._config,
|
||||
'for': '10m',
|
||||
labels: {
|
||||
severity: 'critical',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'node-time',
|
||||
rules: [
|
||||
{
|
||||
alert: 'ClockSkewDetected',
|
||||
annotations: {
|
||||
message: 'Clock skew detected on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}. Ensure NTP is configured correctly on this host.',
|
||||
},
|
||||
expr: |||
|
||||
node_ntp_offset_seconds{%(nodeExporterSelector)s} < -0.03 or node_ntp_offset_seconds{%(nodeExporterSelector)s} > 0.03
|
||||
||| % $._config,
|
||||
'for': '2m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'node-network',
|
||||
rules: [
|
||||
{
|
||||
alert: 'NetworkReceiveErrors',
|
||||
annotations: {
|
||||
message: 'Network interface "{{ $labels.device }}" showing receive errors on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"',
|
||||
},
|
||||
expr: |||
|
||||
rate(node_network_receive_errs_total{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 0
|
||||
||| % $._config,
|
||||
'for': '2m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'NetworkTransmitErrors',
|
||||
annotations: {
|
||||
message: 'Network interface "{{ $labels.device }}" showing transmit errors on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"',
|
||||
},
|
||||
expr: |||
|
||||
rate(node_network_transmit_errs_total{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 0
|
||||
||| % $._config,
|
||||
'for': '2m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'NodeNetworkInterfaceDown',
|
||||
annotations: {
|
||||
message: 'Network interface "{{ $labels.device }}" down on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"',
|
||||
},
|
||||
expr: |||
|
||||
node_network_up{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s} == 0
|
||||
||| % $._config,
|
||||
'for': '2m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'NodeNetworkInterfaceFlapping',
|
||||
annotations: {
|
||||
message: 'Network interface "{{ $labels.device }}" changing it\'s up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"',
|
||||
},
|
||||
expr: |||
|
||||
changes(node_network_up{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 2
|
||||
||| % $._config,
|
||||
'for': '2m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
37
jsonnet/kube-prometheus/alerts/prometheus-operator.libsonnet
Normal file
37
jsonnet/kube-prometheus/alerts/prometheus-operator.libsonnet
Normal file
@@ -0,0 +1,37 @@
|
||||
{
|
||||
prometheusAlerts+:: {
|
||||
groups+: [
|
||||
{
|
||||
name: 'prometheus-operator',
|
||||
rules: [
|
||||
{
|
||||
alert: 'PrometheusOperatorReconcileErrors',
|
||||
expr: |||
|
||||
rate(prometheus_operator_reconcile_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1
|
||||
||| % $._config,
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
message: 'Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace }} Namespace.',
|
||||
},
|
||||
'for': '10m',
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusOperatorNodeLookupErrors',
|
||||
expr: |||
|
||||
rate(prometheus_operator_node_address_lookup_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1
|
||||
||| % $._config,
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
message: 'Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.',
|
||||
},
|
||||
'for': '10m',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
151
jsonnet/kube-prometheus/alerts/prometheus.libsonnet
Normal file
151
jsonnet/kube-prometheus/alerts/prometheus.libsonnet
Normal file
@@ -0,0 +1,151 @@
|
||||
{
|
||||
prometheusAlerts+:: {
|
||||
groups+: [
|
||||
{
|
||||
name: 'prometheus.rules',
|
||||
rules: [
|
||||
{
|
||||
alert: 'PrometheusConfigReloadFailed',
|
||||
annotations: {
|
||||
description: "Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}}",
|
||||
summary: "Reloading Prometheus' configuration failed",
|
||||
},
|
||||
expr: |||
|
||||
prometheus_config_last_reload_successful{%(prometheusSelector)s} == 0
|
||||
||| % $._config,
|
||||
'for': '10m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusNotificationQueueRunningFull',
|
||||
annotations: {
|
||||
description: "Prometheus' alert notification queue is running full for {{$labels.namespace}}/{{ $labels.pod}}",
|
||||
summary: "Prometheus' alert notification queue is running full",
|
||||
},
|
||||
expr: |||
|
||||
predict_linear(prometheus_notifications_queue_length{%(prometheusSelector)s}[5m], 60 * 30) > prometheus_notifications_queue_capacity{%(prometheusSelector)s}
|
||||
||| % $._config,
|
||||
'for': '10m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusErrorSendingAlerts',
|
||||
annotations: {
|
||||
description: 'Errors while sending alerts from Prometheus {{$labels.namespace}}/{{ $labels.pod}} to Alertmanager {{$labels.Alertmanager}}',
|
||||
summary: 'Errors while sending alert from Prometheus',
|
||||
},
|
||||
expr: |||
|
||||
rate(prometheus_notifications_errors_total{%(prometheusSelector)s}[5m]) / rate(prometheus_notifications_sent_total{%(prometheusSelector)s}[5m]) > 0.01
|
||||
||| % $._config,
|
||||
'for': '10m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusErrorSendingAlerts',
|
||||
annotations: {
|
||||
description: 'Errors while sending alerts from Prometheus {{$labels.namespace}}/{{ $labels.pod}} to Alertmanager {{$labels.Alertmanager}}',
|
||||
summary: 'Errors while sending alerts from Prometheus',
|
||||
},
|
||||
expr: |||
|
||||
rate(prometheus_notifications_errors_total{%(prometheusSelector)s}[5m]) / rate(prometheus_notifications_sent_total{%(prometheusSelector)s}[5m]) > 0.03
|
||||
||| % $._config,
|
||||
'for': '10m',
|
||||
labels: {
|
||||
severity: 'critical',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusNotConnectedToAlertmanagers',
|
||||
annotations: {
|
||||
description: 'Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected to any Alertmanagers',
|
||||
summary: 'Prometheus is not connected to any Alertmanagers',
|
||||
},
|
||||
expr: |||
|
||||
prometheus_notifications_alertmanagers_discovered{%(prometheusSelector)s} < 1
|
||||
||| % $._config,
|
||||
'for': '10m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusTSDBReloadsFailing',
|
||||
annotations: {
|
||||
description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}} reload failures over the last four hours.',
|
||||
summary: 'Prometheus has issues reloading data blocks from disk',
|
||||
},
|
||||
expr: |||
|
||||
increase(prometheus_tsdb_reloads_failures_total{%(prometheusSelector)s}[2h]) > 0
|
||||
||| % $._config,
|
||||
'for': '12h',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusTSDBCompactionsFailing',
|
||||
annotations: {
|
||||
description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}} compaction failures over the last four hours.',
|
||||
summary: 'Prometheus has issues compacting sample blocks',
|
||||
},
|
||||
expr: |||
|
||||
increase(prometheus_tsdb_compactions_failed_total{%(prometheusSelector)s}[2h]) > 0
|
||||
||| % $._config,
|
||||
'for': '12h',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusTSDBWALCorruptions',
|
||||
annotations: {
|
||||
description: '{{$labels.job}} at {{$labels.instance}} has a corrupted write-ahead log (WAL).',
|
||||
summary: 'Prometheus write-ahead log is corrupted',
|
||||
},
|
||||
expr: |||
|
||||
prometheus_tsdb_wal_corruptions_total{%(prometheusSelector)s} > 0
|
||||
||| % $._config,
|
||||
'for': '4h',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusNotIngestingSamples',
|
||||
annotations: {
|
||||
description: "Prometheus {{ $labels.namespace }}/{{ $labels.pod}} isn't ingesting samples.",
|
||||
summary: "Prometheus isn't ingesting samples",
|
||||
},
|
||||
expr: |||
|
||||
rate(prometheus_tsdb_head_samples_appended_total{%(prometheusSelector)s}[5m]) <= 0
|
||||
||| % $._config,
|
||||
'for': '10m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusTargetScrapesDuplicate',
|
||||
annotations: {
|
||||
description: '{{$labels.namespace}}/{{$labels.pod}} has many samples rejected due to duplicate timestamps but different values',
|
||||
summary: 'Prometheus has many samples rejected',
|
||||
},
|
||||
expr: |||
|
||||
increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{%(prometheusSelector)s}[5m]) > 0
|
||||
||| % $._config,
|
||||
'for': '10m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
157
jsonnet/kube-prometheus/alerts/tests.yaml
Normal file
157
jsonnet/kube-prometheus/alerts/tests.yaml
Normal file
@@ -0,0 +1,157 @@
|
||||
# TODO(metalmatze): This file is temporarily saved here for later reference
|
||||
# until we find out how to integrate the tests into our jsonnet stack.
|
||||
|
||||
rule_files:
|
||||
- rules.yaml
|
||||
|
||||
evaluation_interval: 1m
|
||||
|
||||
tests:
|
||||
- interval: 1m
|
||||
input_series:
|
||||
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.0",namespace="monitoring",pod="alertmanager-main-0",service="alertmanager-main"}'
|
||||
values: '3 3 3 3 3 2 2 2 2 2 2 1 1 1 1 1 1 0 0 0 0 0 0'
|
||||
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.1",namespace="monitoring",pod="alertmanager-main-1",service="alertmanager-main"}'
|
||||
values: '3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3'
|
||||
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.2",namespace="monitoring",pod="alertmanager-main-2",service="alertmanager-main"}'
|
||||
values: '3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3'
|
||||
alert_rule_test:
|
||||
- eval_time: 5m
|
||||
alertname: AlertmanagerMembersInconsistent
|
||||
- eval_time: 11m
|
||||
alertname: AlertmanagerMembersInconsistent
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
service: 'alertmanager-main'
|
||||
severity: critical
|
||||
job: 'alertmanager-main'
|
||||
instance: 10.10.10.0
|
||||
namespace: monitoring
|
||||
pod: alertmanager-main-0
|
||||
exp_annotations:
|
||||
message: 'Alertmanager has not found all other members of the cluster.'
|
||||
- eval_time: 17m
|
||||
alertname: AlertmanagerMembersInconsistent
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
service: 'alertmanager-main'
|
||||
severity: critical
|
||||
job: 'alertmanager-main'
|
||||
instance: 10.10.10.0
|
||||
namespace: monitoring
|
||||
pod: alertmanager-main-0
|
||||
exp_annotations:
|
||||
message: 'Alertmanager has not found all other members of the cluster.'
|
||||
- eval_time: 23m
|
||||
alertname: AlertmanagerMembersInconsistent
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
service: 'alertmanager-main'
|
||||
severity: critical
|
||||
job: 'alertmanager-main'
|
||||
instance: 10.10.10.0
|
||||
namespace: monitoring
|
||||
pod: alertmanager-main-0
|
||||
exp_annotations:
|
||||
message: 'Alertmanager has not found all other members of the cluster.'
|
||||
- interval: 1m
|
||||
input_series:
|
||||
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.0",namespace="monitoring",pod="alertmanager-main-0",service="alertmanager-main"}'
|
||||
values: '3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
|
||||
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.1",namespace="monitoring",pod="alertmanager-main-1",service="alertmanager-main"}'
|
||||
values: '3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2'
|
||||
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.2",namespace="monitoring",pod="alertmanager-main-2",service="alertmanager-main"}'
|
||||
values: '3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2'
|
||||
alert_rule_test:
|
||||
- eval_time: 5m
|
||||
alertname: AlertmanagerMembersInconsistent
|
||||
- eval_time: 11m
|
||||
alertname: AlertmanagerMembersInconsistent
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
service: 'alertmanager-main'
|
||||
severity: critical
|
||||
job: 'alertmanager-main'
|
||||
instance: 10.10.10.0
|
||||
namespace: monitoring
|
||||
pod: alertmanager-main-0
|
||||
exp_annotations:
|
||||
message: 'Alertmanager has not found all other members of the cluster.'
|
||||
- exp_labels:
|
||||
service: 'alertmanager-main'
|
||||
severity: critical
|
||||
job: 'alertmanager-main'
|
||||
instance: 10.10.10.1
|
||||
namespace: monitoring
|
||||
pod: alertmanager-main-1
|
||||
exp_annotations:
|
||||
message: 'Alertmanager has not found all other members of the cluster.'
|
||||
- exp_labels:
|
||||
service: 'alertmanager-main'
|
||||
severity: critical
|
||||
job: 'alertmanager-main'
|
||||
instance: 10.10.10.2
|
||||
namespace: monitoring
|
||||
pod: alertmanager-main-2
|
||||
exp_annotations:
|
||||
message: 'Alertmanager has not found all other members of the cluster.'
|
||||
- eval_time: 17m
|
||||
alertname: AlertmanagerMembersInconsistent
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
service: 'alertmanager-main'
|
||||
severity: critical
|
||||
job: 'alertmanager-main'
|
||||
instance: 10.10.10.0
|
||||
namespace: monitoring
|
||||
pod: alertmanager-main-0
|
||||
exp_annotations:
|
||||
message: 'Alertmanager has not found all other members of the cluster.'
|
||||
- exp_labels:
|
||||
service: 'alertmanager-main'
|
||||
severity: critical
|
||||
job: 'alertmanager-main'
|
||||
instance: 10.10.10.1
|
||||
namespace: monitoring
|
||||
pod: alertmanager-main-1
|
||||
exp_annotations:
|
||||
message: 'Alertmanager has not found all other members of the cluster.'
|
||||
- exp_labels:
|
||||
service: 'alertmanager-main'
|
||||
severity: critical
|
||||
job: 'alertmanager-main'
|
||||
instance: 10.10.10.2
|
||||
namespace: monitoring
|
||||
pod: alertmanager-main-2
|
||||
exp_annotations:
|
||||
message: 'Alertmanager has not found all other members of the cluster.'
|
||||
- eval_time: 23m
|
||||
alertname: AlertmanagerMembersInconsistent
|
||||
exp_alerts:
|
||||
- exp_labels:
|
||||
service: 'alertmanager-main'
|
||||
severity: critical
|
||||
job: 'alertmanager-main'
|
||||
instance: 10.10.10.0
|
||||
namespace: monitoring
|
||||
pod: alertmanager-main-0
|
||||
exp_annotations:
|
||||
message: 'Alertmanager has not found all other members of the cluster.'
|
||||
- exp_labels:
|
||||
service: 'alertmanager-main'
|
||||
severity: critical
|
||||
job: 'alertmanager-main'
|
||||
instance: 10.10.10.1
|
||||
namespace: monitoring
|
||||
pod: alertmanager-main-1
|
||||
exp_annotations:
|
||||
message: 'Alertmanager has not found all other members of the cluster.'
|
||||
- exp_labels:
|
||||
service: 'alertmanager-main'
|
||||
severity: critical
|
||||
job: 'alertmanager-main'
|
||||
instance: 10.10.10.2
|
||||
namespace: monitoring
|
||||
pod: alertmanager-main-2
|
||||
exp_annotations:
|
||||
message: 'Alertmanager has not found all other members of the cluster.'
|
54
jsonnet/kube-prometheus/jsonnetfile.json
Normal file
54
jsonnet/kube-prometheus/jsonnetfile.json
Normal file
@@ -0,0 +1,54 @@
|
||||
{
|
||||
"dependencies": [
|
||||
{
|
||||
"name": "ksonnet",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/ksonnet/ksonnet-lib",
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "master"
|
||||
},
|
||||
{
|
||||
"name": "kubernetes-mixin",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "master"
|
||||
},
|
||||
{
|
||||
"name": "grafana",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/brancz/kubernetes-grafana",
|
||||
"subdir": "grafana"
|
||||
}
|
||||
},
|
||||
"version": "master"
|
||||
},
|
||||
{
|
||||
"name": "prometheus-operator",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/coreos/prometheus-operator",
|
||||
"subdir": "jsonnet/prometheus-operator"
|
||||
}
|
||||
},
|
||||
"version": "v0.29.0"
|
||||
},
|
||||
{
|
||||
"name": "etcd-mixin",
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/coreos/etcd",
|
||||
"subdir": "Documentation/etcd-mixin"
|
||||
}
|
||||
},
|
||||
"version": "master"
|
||||
}
|
||||
]
|
||||
}
|
@@ -0,0 +1,39 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
local statefulSet = k.apps.v1beta2.statefulSet;
|
||||
local affinity = statefulSet.mixin.spec.template.spec.affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecutionType;
|
||||
local matchExpression = affinity.mixin.podAffinityTerm.labelSelector.matchExpressionsType;
|
||||
|
||||
{
|
||||
local antiaffinity(key, values) = {
|
||||
affinity: {
|
||||
podAntiAffinity: {
|
||||
preferredDuringSchedulingIgnoredDuringExecution: [
|
||||
affinity.new() +
|
||||
affinity.withWeight(100) +
|
||||
affinity.mixin.podAffinityTerm.withNamespaces($._config.namespace) +
|
||||
affinity.mixin.podAffinityTerm.withTopologyKey('kubernetes.io/hostname') +
|
||||
affinity.mixin.podAffinityTerm.labelSelector.withMatchExpressions([
|
||||
matchExpression.new() +
|
||||
matchExpression.withKey(key) +
|
||||
matchExpression.withOperator('In') +
|
||||
matchExpression.withValues(values),
|
||||
]),
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
alertmanager+:: {
|
||||
alertmanager+: {
|
||||
spec+:
|
||||
antiaffinity('alertmanager', [$._config.alertmanager.name]),
|
||||
},
|
||||
},
|
||||
|
||||
prometheus+: {
|
||||
prometheus+: {
|
||||
spec+:
|
||||
antiaffinity('prometheus', [$._config.prometheus.name]),
|
||||
},
|
||||
},
|
||||
}
|
23
jsonnet/kube-prometheus/kube-prometheus-bootkube.libsonnet
Normal file
23
jsonnet/kube-prometheus/kube-prometheus-bootkube.libsonnet
Normal file
@@ -0,0 +1,23 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
{
|
||||
prometheus+:: {
|
||||
kubeControllerManagerPrometheusDiscoveryService:
|
||||
service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
kubeSchedulerPrometheusDiscoveryService:
|
||||
service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('http-metrics', 10251, 10251)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
kubeDnsPrometheusDiscoveryService:
|
||||
service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('http-metrics-skydns', 10055, 10055), servicePort.newNamed('http-metrics-dnsmasq', 10054, 10054)]) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
},
|
||||
}
|
@@ -0,0 +1,20 @@
|
||||
local l = import 'lib/lib.libsonnet';
|
||||
|
||||
// withImageRepository is a mixin that replaces all images prefixes by repository. eg.
|
||||
// quay.io/coreos/addon-resizer -> $repository/addon-resizer
|
||||
// grafana/grafana -> grafana $repository/grafana
|
||||
local withImageRepository(repository) = {
|
||||
local oldRepos = super._config.imageRepos,
|
||||
local substituteRepository(image, repository) =
|
||||
if repository == null then image else repository + '/' + l.imageName(image),
|
||||
_config+:: {
|
||||
imageRepos:: {
|
||||
[field]: substituteRepository(oldRepos[field], repository),
|
||||
for field in std.objectFields(oldRepos)
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
{
|
||||
withImageRepository:: withImageRepository,
|
||||
}
|
@@ -0,0 +1,25 @@
|
||||
{
|
||||
prometheus+:: {
|
||||
serviceMonitorKubelet+:
|
||||
{
|
||||
spec+: {
|
||||
endpoints: [
|
||||
{
|
||||
port: 'http-metrics',
|
||||
scheme: 'http',
|
||||
interval: '30s',
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
},
|
||||
{
|
||||
port: 'http-metrics',
|
||||
scheme: 'http',
|
||||
path: '/metrics/cadvisor',
|
||||
interval: '30s',
|
||||
honorLabels: true,
|
||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
@@ -0,0 +1,13 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
{
|
||||
prometheus+:: {
|
||||
kubeDnsPrometheusDiscoveryService:
|
||||
service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('metrics', 9153, 9153)]) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
},
|
||||
}
|
23
jsonnet/kube-prometheus/kube-prometheus-kops.libsonnet
Normal file
23
jsonnet/kube-prometheus/kube-prometheus-kops.libsonnet
Normal file
@@ -0,0 +1,23 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
{
|
||||
prometheus+:: {
|
||||
kubeControllerManagerPrometheusDiscoveryService:
|
||||
service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
kubeSchedulerPrometheusDiscoveryService:
|
||||
service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('http-metrics', 10251, 10251)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
kubeDnsPrometheusDiscoveryService:
|
||||
service.new('kube-dns-prometheus-discovery', { 'k8s-app': 'kube-dns' }, [servicePort.newNamed('metrics', 10055, 10055), servicePort.newNamed('http-metrics-dnsmasq', 10054, 10054)]) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-dns' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
},
|
||||
}
|
@@ -0,0 +1,8 @@
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet');
|
||||
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
{ ['kube-state-metrics-' + name]: kp.kubeStateMetrics[name] for name in std.objectFields(kp.kubeStateMetrics) } +
|
||||
{ ['alertmanager-' + name]: kp.alertmanager[name] for name in std.objectFields(kp.alertmanager) } +
|
||||
{ ['prometheus-' + name]: kp.prometheus[name] for name in std.objectFields(kp.prometheus) } +
|
||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
18
jsonnet/kube-prometheus/kube-prometheus-kube-aws.libsonnet
Normal file
18
jsonnet/kube-prometheus/kube-prometheus-kube-aws.libsonnet
Normal file
@@ -0,0 +1,18 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
{
|
||||
prometheus+: {
|
||||
kubeControllerManagerPrometheusDiscoveryService:
|
||||
service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
kubeSchedulerPrometheusDiscoveryService:
|
||||
service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('http-metrics', 10251, 10251)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
},
|
||||
}
|
18
jsonnet/kube-prometheus/kube-prometheus-kubeadm.libsonnet
Normal file
18
jsonnet/kube-prometheus/kube-prometheus-kubeadm.libsonnet
Normal file
@@ -0,0 +1,18 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
{
|
||||
prometheus+: {
|
||||
kubeControllerManagerPrometheusDiscoveryService:
|
||||
service.new('kube-controller-manager-prometheus-discovery', { component: 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
kubeSchedulerPrometheusDiscoveryService:
|
||||
service.new('kube-scheduler-prometheus-discovery', { component: 'kube-scheduler' }, servicePort.newNamed('http-metrics', 10251, 10251)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
},
|
||||
}
|
18
jsonnet/kube-prometheus/kube-prometheus-kubespray.libsonnet
Normal file
18
jsonnet/kube-prometheus/kube-prometheus-kubespray.libsonnet
Normal file
@@ -0,0 +1,18 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
{
|
||||
prometheus+: {
|
||||
kubeControllerManagerPrometheusDiscoveryService:
|
||||
service.new('kube-controller-manager-prometheus-discovery', { 'k8s-app': 'kube-controller-manager' }, servicePort.newNamed('http-metrics', 10252, 10252)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-controller-manager' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
kubeSchedulerPrometheusDiscoveryService:
|
||||
service.new('kube-scheduler-prometheus-discovery', { 'k8s-app': 'kube-scheduler' }, servicePort.newNamed('http-metrics', 10251, 10251)) +
|
||||
service.mixin.metadata.withNamespace('kube-system') +
|
||||
service.mixin.metadata.withLabels({ 'k8s-app': 'kube-scheduler' }) +
|
||||
service.mixin.spec.withClusterIp('None'),
|
||||
},
|
||||
}
|
@@ -0,0 +1,28 @@
|
||||
// On managed Kubernetes clusters some of the control plane components are not exposed to customers.
|
||||
// Disable scrape jobs and service monitors for these components by overwriting 'kube-prometheus.libsonnet' defaults
|
||||
// Note this doesn't disable generation of associated alerting rules but the rules don't trigger
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
// This snippet walks the original object (super.jobs, set as temp var j) and creates a replacement jobs object
|
||||
// excluding any members of the set specified (eg: controller and scheduler).
|
||||
local j = super.jobs,
|
||||
jobs: {
|
||||
[k]: j[k]
|
||||
for k in std.objectFields(j)
|
||||
if !std.setMember(k, ['KubeControllerManager', 'KubeScheduler'])
|
||||
},
|
||||
},
|
||||
|
||||
// Same as above but for ServiceMonitor's
|
||||
local p = super.prometheus,
|
||||
prometheus: {
|
||||
[q]: p[q]
|
||||
for q in std.objectFields(p)
|
||||
if !std.setMember(q, ['serviceMonitorKubeControllerManager', 'serviceMonitorKubeScheduler'])
|
||||
},
|
||||
|
||||
// TODO: disable generationg of alerting rules
|
||||
// manifests/prometheus-rules.yaml:52: - name: kube-scheduler.rules
|
||||
|
||||
}
|
21
jsonnet/kube-prometheus/kube-prometheus-node-ports.libsonnet
Normal file
21
jsonnet/kube-prometheus/kube-prometheus-node-ports.libsonnet
Normal file
@@ -0,0 +1,21 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
{
|
||||
prometheus+: {
|
||||
service+:
|
||||
service.mixin.spec.withPorts(servicePort.newNamed('web', 9090, 'web') + servicePort.withNodePort(30900)) +
|
||||
service.mixin.spec.withType('NodePort'),
|
||||
},
|
||||
alertmanager+: {
|
||||
service+:
|
||||
service.mixin.spec.withPorts(servicePort.newNamed('web', 9093, 'web') + servicePort.withNodePort(30903)) +
|
||||
service.mixin.spec.withType('NodePort'),
|
||||
},
|
||||
grafana+: {
|
||||
service+:
|
||||
service.mixin.spec.withPorts(servicePort.newNamed('http', 3000, 'http') + servicePort.withNodePort(30902)) +
|
||||
service.mixin.spec.withType('NodePort'),
|
||||
},
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user