Merge branch 'main' of https://github.com/prometheus-operator/kube-prometheus into example/alertmanager-custom-config
This commit is contained in:
2
.github/env
vendored
Normal file
2
.github/env
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
golang-version=1.16
|
||||||
|
kind-version=v0.11.1
|
||||||
11
.github/workflows/ci.yaml
vendored
11
.github/workflows/ci.yaml
vendored
@@ -22,6 +22,17 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
go-version: ${{ env.golang-version }}
|
go-version: ${{ env.golang-version }}
|
||||||
- run: make --always-make generate validate && git diff --exit-code
|
- run: make --always-make generate validate && git diff --exit-code
|
||||||
|
check-docs:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
name: Check Documentation formatting and links
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
persist-credentials: false
|
||||||
|
- uses: actions/setup-go@v2
|
||||||
|
with:
|
||||||
|
go-version: ${{ env.golang-version }}
|
||||||
|
- run: make check-docs
|
||||||
lint:
|
lint:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
name: Jsonnet linter
|
name: Jsonnet linter
|
||||||
|
|||||||
13
.github/workflows/versions.yaml
vendored
13
.github/workflows/versions.yaml
vendored
@@ -23,11 +23,15 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
go-version: 1.16
|
go-version: 1.16
|
||||||
- name: Upgrade versions
|
- name: Upgrade versions
|
||||||
|
id: versions
|
||||||
run: |
|
run: |
|
||||||
export GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }}
|
export GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }}
|
||||||
# Write to temporary file to make update atomic
|
# Write to temporary file to make update atomic
|
||||||
scripts/generate-versions.sh > /tmp/versions.json
|
scripts/generate-versions.sh > /tmp/versions.json
|
||||||
mv /tmp/versions.json jsonnet/kube-prometheus/versions.json
|
mv /tmp/versions.json jsonnet/kube-prometheus/versions.json
|
||||||
|
# Get the links to the changelogs of the updated versions and make them
|
||||||
|
# available to the reviewers
|
||||||
|
echo ::set-output name=new_changelogs::$(scripts/get-new-changelogs.sh)
|
||||||
if: matrix.branch == 'main'
|
if: matrix.branch == 'main'
|
||||||
- name: Update jsonnet dependencies
|
- name: Update jsonnet dependencies
|
||||||
run: |
|
run: |
|
||||||
@@ -49,7 +53,12 @@ jobs:
|
|||||||
|
|
||||||
This is an automated version and jsonnet dependencies update performed from CI.
|
This is an automated version and jsonnet dependencies update performed from CI.
|
||||||
|
|
||||||
Configuration of the workflow is located in `.github/workflows/versions.yaml`
|
Please review the following changelogs to make sure that we don't miss any important
|
||||||
|
changes before merging this PR.
|
||||||
|
|
||||||
|
${{ steps.versions.outputs.new_changelogs }}
|
||||||
|
|
||||||
|
Configuration of the workflow is located in `.github/workflows/versions.yaml`.
|
||||||
|
|
||||||
## Type of change
|
## Type of change
|
||||||
|
|
||||||
@@ -61,6 +70,8 @@ jobs:
|
|||||||
|
|
||||||
```
|
```
|
||||||
team-reviewers: kube-prometheus-reviewers
|
team-reviewers: kube-prometheus-reviewers
|
||||||
|
committer: Prometheus Operator Bot <prom-op-bot@users.noreply.github.com>
|
||||||
|
author: Prometheus Operator Bot <prom-op-bot@users.noreply.github.com>
|
||||||
branch: automated-updates-${{ matrix.branch }}
|
branch: automated-updates-${{ matrix.branch }}
|
||||||
delete-branch: true
|
delete-branch: true
|
||||||
# GITHUB_TOKEN cannot be used as it won't trigger CI in a created PR
|
# GITHUB_TOKEN cannot be used as it won't trigger CI in a created PR
|
||||||
|
|||||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -5,4 +5,5 @@ vendor/
|
|||||||
.swp
|
.swp
|
||||||
crdschemas/
|
crdschemas/
|
||||||
|
|
||||||
.gitpod/_output/
|
developer-workspace/gitpod/_output
|
||||||
|
kind
|
||||||
10
.gitpod.yml
10
.gitpod.yml
@@ -24,17 +24,17 @@ tasks:
|
|||||||
chmod +x ${PWD}/.git/hooks/pre-commit
|
chmod +x ${PWD}/.git/hooks/pre-commit
|
||||||
- name: run kube-prometheus
|
- name: run kube-prometheus
|
||||||
command: |
|
command: |
|
||||||
.gitpod/prepare-k3s.sh
|
developer-workspace/gitpod/prepare-k3s.sh
|
||||||
.gitpod/deploy-kube-prometheus.sh
|
developer-workspace/common/deploy-kube-prometheus.sh
|
||||||
- name: kernel dev environment
|
- name: kernel dev environment
|
||||||
init: |
|
init: |
|
||||||
sudo apt update -y
|
sudo apt update -y
|
||||||
sudo apt install qemu qemu-system-x86 linux-image-$(uname -r) libguestfs-tools sshpass netcat -y
|
sudo apt install qemu qemu-system-x86 linux-image-$(uname -r) libguestfs-tools sshpass netcat -y
|
||||||
sudo curl -o /usr/bin/kubectl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
|
sudo curl -o /usr/bin/kubectl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
|
||||||
sudo chmod +x /usr/bin/kubectl
|
sudo chmod +x /usr/bin/kubectl
|
||||||
.gitpod/prepare-rootfs.sh
|
developer-workspace/gitpod/prepare-rootfs.sh
|
||||||
command: |
|
command: |
|
||||||
.gitpod/qemu.sh
|
developer-workspace/gitpod/qemu.sh
|
||||||
ports:
|
ports:
|
||||||
- port: 3000
|
- port: 3000
|
||||||
onOpen: open-browser
|
onOpen: open-browser
|
||||||
@@ -44,4 +44,4 @@ ports:
|
|||||||
onOpen: open-browser
|
onOpen: open-browser
|
||||||
vscode:
|
vscode:
|
||||||
extensions:
|
extensions:
|
||||||
- heptio.jsonnet@0.1.0:woEDU5N62LRdgdz0g/I6sQ==
|
- heptio.jsonnet
|
||||||
9
.mdox.validate.yaml
Normal file
9
.mdox.validate.yaml
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
version: 1
|
||||||
|
|
||||||
|
validators:
|
||||||
|
# Ignore localhost links.
|
||||||
|
- regex: 'localhost'
|
||||||
|
type: "ignore"
|
||||||
|
# Ignore release links.
|
||||||
|
- regex: 'https:\/\/github\.com\/prometheus-operator\/kube-prometheus\/releases'
|
||||||
|
type: "ignore"
|
||||||
29
Makefile
29
Makefile
@@ -2,29 +2,39 @@ SHELL=/bin/bash -o pipefail
|
|||||||
|
|
||||||
BIN_DIR?=$(shell pwd)/tmp/bin
|
BIN_DIR?=$(shell pwd)/tmp/bin
|
||||||
|
|
||||||
EMBEDMD_BIN=$(BIN_DIR)/embedmd
|
MDOX_BIN=$(BIN_DIR)/mdox
|
||||||
JB_BIN=$(BIN_DIR)/jb
|
JB_BIN=$(BIN_DIR)/jb
|
||||||
GOJSONTOYAML_BIN=$(BIN_DIR)/gojsontoyaml
|
GOJSONTOYAML_BIN=$(BIN_DIR)/gojsontoyaml
|
||||||
JSONNET_BIN=$(BIN_DIR)/jsonnet
|
JSONNET_BIN=$(BIN_DIR)/jsonnet
|
||||||
JSONNETLINT_BIN=$(BIN_DIR)/jsonnet-lint
|
JSONNETLINT_BIN=$(BIN_DIR)/jsonnet-lint
|
||||||
JSONNETFMT_BIN=$(BIN_DIR)/jsonnetfmt
|
JSONNETFMT_BIN=$(BIN_DIR)/jsonnetfmt
|
||||||
KUBECONFORM_BIN=$(BIN_DIR)/kubeconform
|
KUBECONFORM_BIN=$(BIN_DIR)/kubeconform
|
||||||
TOOLING=$(EMBEDMD_BIN) $(JB_BIN) $(GOJSONTOYAML_BIN) $(JSONNET_BIN) $(JSONNETLINT_BIN) $(JSONNETFMT_BIN) $(KUBECONFORM_BIN)
|
TOOLING=$(JB_BIN) $(GOJSONTOYAML_BIN) $(JSONNET_BIN) $(JSONNETLINT_BIN) $(JSONNETFMT_BIN) $(KUBECONFORM_BIN) $(MDOX_BIN)
|
||||||
|
|
||||||
JSONNETFMT_ARGS=-n 2 --max-blank-lines 2 --string-style s --comment-style s
|
JSONNETFMT_ARGS=-n 2 --max-blank-lines 2 --string-style s --comment-style s
|
||||||
|
|
||||||
all: generate fmt test
|
MDOX_VALIDATE_CONFIG?=.mdox.validate.yaml
|
||||||
|
MD_FILES_TO_FORMAT=$(shell find docs developer-workspace examples experimental jsonnet manifests -name "*.md") $(shell ls *.md)
|
||||||
|
|
||||||
|
all: generate fmt test docs
|
||||||
|
|
||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
clean:
|
clean:
|
||||||
# Remove all files and directories ignored by git.
|
# Remove all files and directories ignored by git.
|
||||||
git clean -Xfd .
|
git clean -Xfd .
|
||||||
|
|
||||||
.PHONY: generate
|
.PHONY: docs
|
||||||
generate: manifests **.md
|
docs: $(MDOX_BIN) $(shell find examples) build.sh example.jsonnet
|
||||||
|
@echo ">> formatting and local/remote links"
|
||||||
|
$(MDOX_BIN) fmt --soft-wraps -l --links.localize.address-regex="https://prometheus-operator.dev/.*" --links.validate.config-file=$(MDOX_VALIDATE_CONFIG) $(MD_FILES_TO_FORMAT)
|
||||||
|
|
||||||
**.md: $(EMBEDMD_BIN) $(shell find examples) build.sh example.jsonnet
|
.PHONY: check-docs
|
||||||
$(EMBEDMD_BIN) -w `find . -name "*.md" | grep -v vendor`
|
check-docs: $(MDOX_BIN) $(shell find examples) build.sh example.jsonnet
|
||||||
|
@echo ">> checking formatting and local/remote links"
|
||||||
|
$(MDOX_BIN) fmt --soft-wraps --check -l --links.localize.address-regex="https://prometheus-operator.dev/.*" --links.validate.config-file=$(MDOX_VALIDATE_CONFIG) $(MD_FILES_TO_FORMAT)
|
||||||
|
|
||||||
|
.PHONY: generate
|
||||||
|
generate: manifests
|
||||||
|
|
||||||
manifests: examples/kustomize.jsonnet $(GOJSONTOYAML_BIN) vendor
|
manifests: examples/kustomize.jsonnet $(GOJSONTOYAML_BIN) vendor
|
||||||
./build.sh $<
|
./build.sh $<
|
||||||
@@ -78,3 +88,8 @@ $(BIN_DIR):
|
|||||||
$(TOOLING): $(BIN_DIR)
|
$(TOOLING): $(BIN_DIR)
|
||||||
@echo Installing tools from scripts/tools.go
|
@echo Installing tools from scripts/tools.go
|
||||||
@cd scripts && cat tools.go | grep _ | awk -F'"' '{print $$2}' | xargs -tI % go build -modfile=go.mod -o $(BIN_DIR) %
|
@cd scripts && cat tools.go | grep _ | awk -F'"' '{print $$2}' | xargs -tI % go build -modfile=go.mod -o $(BIN_DIR) %
|
||||||
|
|
||||||
|
.PHONY: deploy
|
||||||
|
deploy:
|
||||||
|
./developer-workspace/codespaces/prepare-kind.sh
|
||||||
|
./developer-workspace/common/deploy-kube-prometheus.sh
|
||||||
|
|||||||
89
README.md
89
README.md
@@ -80,8 +80,8 @@ You will need a Kubernetes cluster, that's it! By default it is assumed, that th
|
|||||||
|
|
||||||
This means the kubelet configuration must contain these flags:
|
This means the kubelet configuration must contain these flags:
|
||||||
|
|
||||||
* `--authentication-token-webhook=true` This flag enables, that a `ServiceAccount` token can be used to authenticate against the kubelet(s). This can also be enabled by setting the kubelet configuration value `authentication.webhook.enabled` to `true`.
|
* `--authentication-token-webhook=true` This flag enables, that a `ServiceAccount` token can be used to authenticate against the kubelet(s). This can also be enabled by setting the kubelet configuration value `authentication.webhook.enabled` to `true`.
|
||||||
* `--authorization-mode=Webhook` This flag enables, that the kubelet will perform an RBAC request with the API to determine, whether the requesting entity (Prometheus in this case) is allowed to access a resource, in specific for this project the `/metrics` endpoint. This can also be enabled by setting the kubelet configuration value `authorization.mode` to `Webhook`.
|
* `--authorization-mode=Webhook` This flag enables, that the kubelet will perform an RBAC request with the API to determine, whether the requesting entity (Prometheus in this case) is allowed to access a resource, in specific for this project the `/metrics` endpoint. This can also be enabled by setting the kubelet configuration value `authorization.mode` to `Webhook`.
|
||||||
|
|
||||||
This stack provides [resource metrics](https://github.com/kubernetes/metrics#resource-metrics-api) by deploying the [Prometheus Adapter](https://github.com/DirectXMan12/k8s-prometheus-adapter/).
|
This stack provides [resource metrics](https://github.com/kubernetes/metrics#resource-metrics-api) by deploying the [Prometheus Adapter](https://github.com/DirectXMan12/k8s-prometheus-adapter/).
|
||||||
This adapter is an Extension API Server and Kubernetes needs to be have this feature enabled, otherwise the adapter has no effect, but is still deployed.
|
This adapter is an Extension API Server and Kubernetes needs to be have this feature enabled, otherwise the adapter has no effect, but is still deployed.
|
||||||
@@ -116,12 +116,12 @@ The following versions are supported and work as we test against these versions
|
|||||||
|
|
||||||
## Quickstart
|
## Quickstart
|
||||||
|
|
||||||
>Note: For versions before Kubernetes v1.21.z refer to the [Kubernetes compatibility matrix](#kubernetes-compatibility-matrix) in order to choose a compatible branch.
|
> Note: For versions before Kubernetes v1.21.z refer to the [Kubernetes compatibility matrix](#kubernetes-compatibility-matrix) in order to choose a compatible branch.
|
||||||
|
|
||||||
This project is intended to be used as a library (i.e. the intent is not for you to create your own modified copy of this repository).
|
This project is intended to be used as a library (i.e. the intent is not for you to create your own modified copy of this repository).
|
||||||
|
|
||||||
Though for a quickstart a compiled version of the Kubernetes [manifests](manifests) generated with this library (specifically with `example.jsonnet`) is checked into this repository in order to try the content out quickly. To try out the stack un-customized run:
|
Though for a quickstart a compiled version of the Kubernetes [manifests](manifests) generated with this library (specifically with `example.jsonnet`) is checked into this repository in order to try the content out quickly. To try out the stack un-customized run:
|
||||||
* Create the monitoring stack using the config in the `manifests` directory:
|
* Create the monitoring stack using the config in the `manifests` directory:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
# Create the namespace and CRDs, and then wait for them to be available before creating the remaining resources
|
# Create the namespace and CRDs, and then wait for them to be available before creating the remaining resources
|
||||||
@@ -135,7 +135,8 @@ Alternatively, the resources in both folders can be applied with a single comman
|
|||||||
`kubectl create -f manifests/setup -f manifests`, but it may be necessary to run the command multiple times for all components to
|
`kubectl create -f manifests/setup -f manifests`, but it may be necessary to run the command multiple times for all components to
|
||||||
be created successfullly.
|
be created successfullly.
|
||||||
|
|
||||||
* And to teardown the stack:
|
* And to teardown the stack:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
kubectl delete --ignore-not-found=true -f manifests/ -f manifests/setup
|
kubectl delete --ignore-not-found=true -f manifests/ -f manifests/setup
|
||||||
```
|
```
|
||||||
@@ -173,14 +174,15 @@ Then access via [http://localhost:9093](http://localhost:9093)
|
|||||||
## Customizing Kube-Prometheus
|
## Customizing Kube-Prometheus
|
||||||
|
|
||||||
This section:
|
This section:
|
||||||
* describes how to customize the kube-prometheus library via compiling the kube-prometheus manifests yourself (as an alternative to the [Quickstart section](#Quickstart)).
|
* describes how to customize the kube-prometheus library via compiling the kube-prometheus manifests yourself (as an alternative to the [Quickstart section](#quickstart)).
|
||||||
* still doesn't require you to make a copy of this entire repository, but rather only a copy of a few select files.
|
* still doesn't require you to make a copy of this entire repository, but rather only a copy of a few select files.
|
||||||
|
|
||||||
### Installing
|
### Installing
|
||||||
|
|
||||||
The content of this project consists of a set of [jsonnet](http://jsonnet.org/) files making up a library to be consumed.
|
The content of this project consists of a set of [jsonnet](http://jsonnet.org/) files making up a library to be consumed.
|
||||||
|
|
||||||
Install this library in your own project with [jsonnet-bundler](https://github.com/jsonnet-bundler/jsonnet-bundler#install) (the jsonnet package manager):
|
Install this library in your own project with [jsonnet-bundler](https://github.com/jsonnet-bundler/jsonnet-bundler#install) (the jsonnet package manager):
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ mkdir my-kube-prometheus; cd my-kube-prometheus
|
$ mkdir my-kube-prometheus; cd my-kube-prometheus
|
||||||
$ jb init # Creates the initial/empty `jsonnetfile.json`
|
$ jb init # Creates the initial/empty `jsonnetfile.json`
|
||||||
@@ -196,6 +198,7 @@ $ wget https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/rel
|
|||||||
> An e.g. of how to install a given version of this library: `jb install github.com/prometheus-operator/kube-prometheus/jsonnet/kube-prometheus@release-0.7`
|
> An e.g. of how to install a given version of this library: `jb install github.com/prometheus-operator/kube-prometheus/jsonnet/kube-prometheus@release-0.7`
|
||||||
|
|
||||||
In order to update the kube-prometheus dependency, simply use the jsonnet-bundler update functionality:
|
In order to update the kube-prometheus dependency, simply use the jsonnet-bundler update functionality:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ jb update
|
$ jb update
|
||||||
```
|
```
|
||||||
@@ -210,8 +213,7 @@ Here's [example.jsonnet](example.jsonnet):
|
|||||||
|
|
||||||
> Note: some of the following components must be configured beforehand. See [configuration](#configuration) and [customization-examples](#customization-examples).
|
> Note: some of the following components must be configured beforehand. See [configuration](#configuration) and [customization-examples](#customization-examples).
|
||||||
|
|
||||||
[embedmd]:# (example.jsonnet)
|
```jsonnet mdox-exec="cat example.jsonnet"
|
||||||
```jsonnet
|
|
||||||
local kp =
|
local kp =
|
||||||
(import 'kube-prometheus/main.libsonnet') +
|
(import 'kube-prometheus/main.libsonnet') +
|
||||||
// Uncomment the following imports to enable its patches
|
// Uncomment the following imports to enable its patches
|
||||||
@@ -250,8 +252,7 @@ local kp =
|
|||||||
|
|
||||||
And here's the [build.sh](build.sh) script (which uses `vendor/` to render all manifests in a json structure of `{filename: manifest-content}`):
|
And here's the [build.sh](build.sh) script (which uses `vendor/` to render all manifests in a json structure of `{filename: manifest-content}`):
|
||||||
|
|
||||||
[embedmd]:# (build.sh)
|
```sh mdox-exec="cat build.sh"
|
||||||
```sh
|
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
# This script uses arg $1 (name of *.jsonnet file to use) to generate the manifests/*.yaml files.
|
# This script uses arg $1 (name of *.jsonnet file to use) to generate the manifests/*.yaml files.
|
||||||
@@ -282,6 +283,7 @@ rm -f kustomization
|
|||||||
This script runs the jsonnet code, then reads each key of the generated json and uses that as the file name, and writes the value of that key to that file, and converts each json manifest to yaml.
|
This script runs the jsonnet code, then reads each key of the generated json and uses that as the file name, and writes the value of that key to that file, and converts each json manifest to yaml.
|
||||||
|
|
||||||
### Apply the kube-prometheus stack
|
### Apply the kube-prometheus stack
|
||||||
|
|
||||||
The previous steps (compilation) has created a bunch of manifest files in the manifest/ folder.
|
The previous steps (compilation) has created a bunch of manifest files in the manifest/ folder.
|
||||||
Now simply use `kubectl` to install Prometheus and Grafana as per your configuration:
|
Now simply use `kubectl` to install Prometheus and Grafana as per your configuration:
|
||||||
|
|
||||||
@@ -290,6 +292,7 @@ Now simply use `kubectl` to install Prometheus and Grafana as per your configura
|
|||||||
$ kubectl apply -f manifests/setup
|
$ kubectl apply -f manifests/setup
|
||||||
$ kubectl apply -f manifests/
|
$ kubectl apply -f manifests/
|
||||||
```
|
```
|
||||||
|
|
||||||
Alternatively, the resources in both folders can be applied with a single command
|
Alternatively, the resources in both folders can be applied with a single command
|
||||||
`kubectl apply -Rf manifests`, but it may be necessary to run the command multiple times for all components to
|
`kubectl apply -Rf manifests`, but it may be necessary to run the command multiple times for all components to
|
||||||
be created successfullly.
|
be created successfullly.
|
||||||
@@ -299,15 +302,18 @@ Check the monitoring namespace (or the namespace you have specific in `namespace
|
|||||||
### Containerized Installing and Compiling
|
### Containerized Installing and Compiling
|
||||||
|
|
||||||
If you don't care to have `jb` nor `jsonnet` nor `gojsontoyaml` installed, then use `quay.io/coreos/jsonnet-ci` container image. Do the following from this `kube-prometheus` directory:
|
If you don't care to have `jb` nor `jsonnet` nor `gojsontoyaml` installed, then use `quay.io/coreos/jsonnet-ci` container image. Do the following from this `kube-prometheus` directory:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ docker run --rm -v $(pwd):$(pwd) --workdir $(pwd) quay.io/coreos/jsonnet-ci jb update
|
$ docker run --rm -v $(pwd):$(pwd) --workdir $(pwd) quay.io/coreos/jsonnet-ci jb update
|
||||||
$ docker run --rm -v $(pwd):$(pwd) --workdir $(pwd) quay.io/coreos/jsonnet-ci ./build.sh example.jsonnet
|
$ docker run --rm -v $(pwd):$(pwd) --workdir $(pwd) quay.io/coreos/jsonnet-ci ./build.sh example.jsonnet
|
||||||
```
|
```
|
||||||
|
|
||||||
## Update from upstream project
|
## Update from upstream project
|
||||||
|
|
||||||
You may wish to fetch changes made on this project so they are available to you.
|
You may wish to fetch changes made on this project so they are available to you.
|
||||||
|
|
||||||
### Update jb
|
### Update jb
|
||||||
|
|
||||||
`jb` may have been updated so it's a good idea to get the latest version of this binary:
|
`jb` may have been updated so it's a good idea to get the latest version of this binary:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
@@ -315,14 +321,16 @@ $ go get -u github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb
|
|||||||
```
|
```
|
||||||
|
|
||||||
### Update kube-prometheus
|
### Update kube-prometheus
|
||||||
|
|
||||||
The command below will sync with upstream project:
|
The command below will sync with upstream project:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ jb update
|
$ jb update
|
||||||
```
|
```
|
||||||
|
|
||||||
### Compile the manifests and apply
|
### Compile the manifests and apply
|
||||||
Once updated, just follow the instructions under "Compiling" and "Apply the kube-prometheus stack" to apply the changes to your cluster.
|
|
||||||
|
|
||||||
|
Once updated, just follow the instructions under "Compiling" and "Apply the kube-prometheus stack" to apply the changes to your cluster.
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
@@ -343,7 +351,8 @@ Configuration is mainly done in the `values` map. You can see this being used in
|
|||||||
},
|
},
|
||||||
```
|
```
|
||||||
|
|
||||||
The grafana definition is located in a different project (https://github.com/brancz/kubernetes-grafana), but needed configuration can be customized from the same top level `values` field. For example to allow anonymous access to grafana, add the following `values` section:
|
The grafana definition is located in a different project (https://github.com/brancz/kubernetes-grafana ), but needed configuration can be customized from the same top level `values` field. For example to allow anonymous access to grafana, add the following `values` section:
|
||||||
|
|
||||||
```
|
```
|
||||||
grafana+:: {
|
grafana+:: {
|
||||||
config: { // http://docs.grafana.org/installation/configuration/
|
config: { // http://docs.grafana.org/installation/configuration/
|
||||||
@@ -366,14 +375,14 @@ A common example is that not all Kubernetes clusters are created exactly the sam
|
|||||||
* bootkube
|
* bootkube
|
||||||
* eks
|
* eks
|
||||||
* gke
|
* gke
|
||||||
* kops-coredns
|
* kops
|
||||||
|
* kops_coredns
|
||||||
* kubeadm
|
* kubeadm
|
||||||
* kubespray
|
* kubespray
|
||||||
|
|
||||||
These mixins are selectable via the `platform` field of kubePrometheus:
|
These mixins are selectable via the `platform` field of kubePrometheus:
|
||||||
|
|
||||||
[embedmd]:# (examples/jsonnet-snippets/platform.jsonnet)
|
```jsonnet mdox-exec="cat examples/jsonnet-snippets/platform.jsonnet"
|
||||||
```jsonnet
|
|
||||||
(import 'kube-prometheus/main.libsonnet') +
|
(import 'kube-prometheus/main.libsonnet') +
|
||||||
{
|
{
|
||||||
values+:: {
|
values+:: {
|
||||||
@@ -405,8 +414,7 @@ The output of this command can be piped to a shell to be executed by appending `
|
|||||||
|
|
||||||
Then to generate manifests with `internal-registry.com/organization`, use the `withImageRepository` mixin:
|
Then to generate manifests with `internal-registry.com/organization`, use the `withImageRepository` mixin:
|
||||||
|
|
||||||
[embedmd]:# (examples/internal-registry.jsonnet)
|
```jsonnet mdox-exec="cat examples/internal-registry.jsonnet"
|
||||||
```jsonnet
|
|
||||||
local mixin = import 'kube-prometheus/addons/config-mixins.libsonnet';
|
local mixin = import 'kube-prometheus/addons/config-mixins.libsonnet';
|
||||||
local kp = (import 'kube-prometheus/main.libsonnet') + {
|
local kp = (import 'kube-prometheus/main.libsonnet') + {
|
||||||
values+:: {
|
values+:: {
|
||||||
@@ -429,8 +437,7 @@ local kp = (import 'kube-prometheus/main.libsonnet') + {
|
|||||||
|
|
||||||
Another mixin that may be useful for exploring the stack is to expose the UIs of Prometheus, Alertmanager and Grafana on NodePorts:
|
Another mixin that may be useful for exploring the stack is to expose the UIs of Prometheus, Alertmanager and Grafana on NodePorts:
|
||||||
|
|
||||||
[embedmd]:# (examples/jsonnet-snippets/node-ports.jsonnet)
|
```jsonnet mdox-exec="cat examples/jsonnet-snippets/node-ports.jsonnet"
|
||||||
```jsonnet
|
|
||||||
(import 'kube-prometheus/main.libsonnet') +
|
(import 'kube-prometheus/main.libsonnet') +
|
||||||
(import 'kube-prometheus/addons/node-ports.libsonnet')
|
(import 'kube-prometheus/addons/node-ports.libsonnet')
|
||||||
```
|
```
|
||||||
@@ -439,8 +446,7 @@ Another mixin that may be useful for exploring the stack is to expose the UIs of
|
|||||||
|
|
||||||
To give another customization example, the name of the `Prometheus` object provided by this library can be overridden:
|
To give another customization example, the name of the `Prometheus` object provided by this library can be overridden:
|
||||||
|
|
||||||
[embedmd]:# (examples/prometheus-name-override.jsonnet)
|
```jsonnet mdox-exec="cat examples/prometheus-name-override.jsonnet"
|
||||||
```jsonnet
|
|
||||||
((import 'kube-prometheus/main.libsonnet') + {
|
((import 'kube-prometheus/main.libsonnet') + {
|
||||||
prometheus+: {
|
prometheus+: {
|
||||||
prometheus+: {
|
prometheus+: {
|
||||||
@@ -456,8 +462,7 @@ To give another customization example, the name of the `Prometheus` object provi
|
|||||||
|
|
||||||
Standard Kubernetes manifests are all written using [ksonnet-lib](https://github.com/ksonnet/ksonnet-lib/), so they can be modified with the mixins supplied by ksonnet-lib. For example to override the namespace of the node-exporter DaemonSet:
|
Standard Kubernetes manifests are all written using [ksonnet-lib](https://github.com/ksonnet/ksonnet-lib/), so they can be modified with the mixins supplied by ksonnet-lib. For example to override the namespace of the node-exporter DaemonSet:
|
||||||
|
|
||||||
[embedmd]:# (examples/ksonnet-example.jsonnet)
|
```jsonnet mdox-exec="cat examples/ksonnet-example.jsonnet"
|
||||||
```jsonnet
|
|
||||||
((import 'kube-prometheus/main.libsonnet') + {
|
((import 'kube-prometheus/main.libsonnet') + {
|
||||||
nodeExporter+: {
|
nodeExporter+: {
|
||||||
daemonset+: {
|
daemonset+: {
|
||||||
@@ -473,8 +478,7 @@ Standard Kubernetes manifests are all written using [ksonnet-lib](https://github
|
|||||||
|
|
||||||
The Alertmanager configuration is located in the `values.alertmanager.config` configuration field. In order to set a custom Alertmanager configuration simply set this field.
|
The Alertmanager configuration is located in the `values.alertmanager.config` configuration field. In order to set a custom Alertmanager configuration simply set this field.
|
||||||
|
|
||||||
[embedmd]:# (examples/alertmanager-config.jsonnet)
|
```jsonnet mdox-exec="cat examples/alertmanager-config.jsonnet"
|
||||||
```jsonnet
|
|
||||||
((import 'kube-prometheus/main.libsonnet') + {
|
((import 'kube-prometheus/main.libsonnet') + {
|
||||||
values+:: {
|
values+:: {
|
||||||
alertmanager+: {
|
alertmanager+: {
|
||||||
@@ -501,8 +505,7 @@ The Alertmanager configuration is located in the `values.alertmanager.config` co
|
|||||||
|
|
||||||
In the above example the configuration has been inlined, but can just as well be an external file imported in jsonnet via the `importstr` function.
|
In the above example the configuration has been inlined, but can just as well be an external file imported in jsonnet via the `importstr` function.
|
||||||
|
|
||||||
[embedmd]:# (examples/alertmanager-config-external.jsonnet)
|
```jsonnet mdox-exec="cat examples/alertmanager-config-external.jsonnet"
|
||||||
```jsonnet
|
|
||||||
((import 'kube-prometheus/main.libsonnet') + {
|
((import 'kube-prometheus/main.libsonnet') + {
|
||||||
values+:: {
|
values+:: {
|
||||||
alertmanager+: {
|
alertmanager+: {
|
||||||
@@ -516,8 +519,7 @@ In the above example the configuration has been inlined, but can just as well be
|
|||||||
|
|
||||||
In order to monitor additional namespaces, the Prometheus server requires the appropriate `Role` and `RoleBinding` to be able to discover targets from that namespace. By default the Prometheus server is limited to the three namespaces it requires: default, kube-system and the namespace you configure the stack to run in via `$.values.namespace`. This is specified in `$.values.prometheus.namespaces`, to add new namespaces to monitor, simply append the additional namespaces:
|
In order to monitor additional namespaces, the Prometheus server requires the appropriate `Role` and `RoleBinding` to be able to discover targets from that namespace. By default the Prometheus server is limited to the three namespaces it requires: default, kube-system and the namespace you configure the stack to run in via `$.values.namespace`. This is specified in `$.values.prometheus.namespaces`, to add new namespaces to monitor, simply append the additional namespaces:
|
||||||
|
|
||||||
[embedmd]:# (examples/additional-namespaces.jsonnet)
|
```jsonnet mdox-exec="cat examples/additional-namespaces.jsonnet"
|
||||||
```jsonnet
|
|
||||||
local kp = (import 'kube-prometheus/main.libsonnet') + {
|
local kp = (import 'kube-prometheus/main.libsonnet') + {
|
||||||
values+:: {
|
values+:: {
|
||||||
common+: {
|
common+: {
|
||||||
@@ -547,8 +549,7 @@ In order to Prometheus be able to discovery and scrape services inside the addit
|
|||||||
|
|
||||||
You can define ServiceMonitor resources in your `jsonnet` spec. See the snippet bellow:
|
You can define ServiceMonitor resources in your `jsonnet` spec. See the snippet bellow:
|
||||||
|
|
||||||
[embedmd]:# (examples/additional-namespaces-servicemonitor.jsonnet)
|
```jsonnet mdox-exec="cat examples/additional-namespaces-servicemonitor.jsonnet"
|
||||||
```jsonnet
|
|
||||||
local kp = (import 'kube-prometheus/main.libsonnet') + {
|
local kp = (import 'kube-prometheus/main.libsonnet') + {
|
||||||
values+:: {
|
values+:: {
|
||||||
common+: {
|
common+: {
|
||||||
@@ -575,7 +576,7 @@ local kp = (import 'kube-prometheus/main.libsonnet') + {
|
|||||||
],
|
],
|
||||||
selector: {
|
selector: {
|
||||||
matchLabels: {
|
matchLabels: {
|
||||||
app: 'myapp',
|
'app.kubernetes.io/name': 'myapp',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -600,8 +601,7 @@ local kp = (import 'kube-prometheus/main.libsonnet') + {
|
|||||||
|
|
||||||
In case you want to monitor all namespaces in a cluster, you can add the following mixin. Also, make sure to empty the namespaces defined in prometheus so that roleBindings are not created against them.
|
In case you want to monitor all namespaces in a cluster, you can add the following mixin. Also, make sure to empty the namespaces defined in prometheus so that roleBindings are not created against them.
|
||||||
|
|
||||||
[embedmd]:# (examples/all-namespaces.jsonnet)
|
```jsonnet mdox-exec="cat examples/all-namespaces.jsonnet"
|
||||||
```jsonnet
|
|
||||||
local kp = (import 'kube-prometheus/main.libsonnet') +
|
local kp = (import 'kube-prometheus/main.libsonnet') +
|
||||||
(import 'kube-prometheus/addons/all-namespaces.libsonnet') + {
|
(import 'kube-prometheus/addons/all-namespaces.libsonnet') + {
|
||||||
values+:: {
|
values+:: {
|
||||||
@@ -629,7 +629,7 @@ Proceed with [creating ServiceMonitors for the services in the namespaces](#defi
|
|||||||
|
|
||||||
### Static etcd configuration
|
### Static etcd configuration
|
||||||
|
|
||||||
In order to configure a static etcd cluster to scrape there is a simple [kube-prometheus-static-etcd.libsonnet](jsonnet/kube-prometheus/kube-prometheus-static-etcd.libsonnet) mixin prepared - see [etcd.jsonnet](examples/etcd.jsonnet) for an example of how to use that mixin, and [Monitoring external etcd](docs/monitoring-external-etcd.md) for more information.
|
In order to configure a static etcd cluster to scrape there is a simple [static-etcd.libsonnet](jsonnet/kube-prometheus/addons/static-etcd.libsonnet) mixin prepared - see [etcd.jsonnet](examples/etcd.jsonnet) for an example of how to use that mixin, and [Monitoring external etcd](docs/monitoring-external-etcd.md) for more information.
|
||||||
|
|
||||||
> Note that monitoring etcd in minikube is currently not possible because of how etcd is setup. (minikube's etcd binds to 127.0.0.1:2379 only, and within host networking namespace.)
|
> Note that monitoring etcd in minikube is currently not possible because of how etcd is setup. (minikube's etcd binds to 127.0.0.1:2379 only, and within host networking namespace.)
|
||||||
|
|
||||||
@@ -638,8 +638,7 @@ In order to configure a static etcd cluster to scrape there is a simple [kube-pr
|
|||||||
To prevent `Prometheus` and `Alertmanager` instances from being deployed onto the same node when
|
To prevent `Prometheus` and `Alertmanager` instances from being deployed onto the same node when
|
||||||
possible, one can include the [kube-prometheus-anti-affinity.libsonnet](jsonnet/kube-prometheus/addons/anti-affinity.libsonnet) mixin:
|
possible, one can include the [kube-prometheus-anti-affinity.libsonnet](jsonnet/kube-prometheus/addons/anti-affinity.libsonnet) mixin:
|
||||||
|
|
||||||
[embedmd]:# (examples/anti-affinity.jsonnet)
|
```jsonnet mdox-exec="cat examples/anti-affinity.jsonnet"
|
||||||
```jsonnet
|
|
||||||
local kp = (import 'kube-prometheus/main.libsonnet') +
|
local kp = (import 'kube-prometheus/main.libsonnet') +
|
||||||
(import 'kube-prometheus/addons/anti-affinity.libsonnet') + {
|
(import 'kube-prometheus/addons/anti-affinity.libsonnet') + {
|
||||||
values+:: {
|
values+:: {
|
||||||
@@ -663,8 +662,7 @@ local kp = (import 'kube-prometheus/main.libsonnet') +
|
|||||||
Sometimes in small clusters, the CPU/memory limits can get high enough for alerts to be fired continuously. To prevent this, one can strip off the predefined limits.
|
Sometimes in small clusters, the CPU/memory limits can get high enough for alerts to be fired continuously. To prevent this, one can strip off the predefined limits.
|
||||||
To do that, one can import the following mixin
|
To do that, one can import the following mixin
|
||||||
|
|
||||||
[embedmd]:# (examples/strip-limits.jsonnet)
|
```jsonnet mdox-exec="cat examples/strip-limits.jsonnet"
|
||||||
```jsonnet
|
|
||||||
local kp = (import 'kube-prometheus/main.libsonnet') +
|
local kp = (import 'kube-prometheus/main.libsonnet') +
|
||||||
(import 'kube-prometheus/addons/strip-limits.libsonnet') + {
|
(import 'kube-prometheus/addons/strip-limits.libsonnet') + {
|
||||||
values+:: {
|
values+:: {
|
||||||
@@ -758,11 +756,11 @@ resources. One driver for more resource needs, is a high number of
|
|||||||
namespaces. There may be others.
|
namespaces. There may be others.
|
||||||
|
|
||||||
kube-state-metrics resource allocation is managed by
|
kube-state-metrics resource allocation is managed by
|
||||||
[addon-resizer](https://github.com/kubernetes/autoscaler/tree/main/addon-resizer/nanny)
|
[addon-resizer](https://github.com/kubernetes/autoscaler/tree/master/addon-resizer/nanny)
|
||||||
You can control it's parameters by setting variables in the
|
You can control it's parameters by setting variables in the
|
||||||
config. They default to:
|
config. They default to:
|
||||||
|
|
||||||
``` jsonnet
|
```jsonnet
|
||||||
kubeStateMetrics+:: {
|
kubeStateMetrics+:: {
|
||||||
baseCPU: '100m',
|
baseCPU: '100m',
|
||||||
cpuPerNode: '2m',
|
cpuPerNode: '2m',
|
||||||
@@ -772,11 +770,12 @@ config. They default to:
|
|||||||
```
|
```
|
||||||
|
|
||||||
### Error retrieving kube-proxy metrics
|
### Error retrieving kube-proxy metrics
|
||||||
|
|
||||||
By default, kubeadm will configure kube-proxy to listen on 127.0.0.1 for metrics. Because of this prometheus would not be able to scrape these metrics. This would have to be changed to 0.0.0.0 in one of the following two places:
|
By default, kubeadm will configure kube-proxy to listen on 127.0.0.1 for metrics. Because of this prometheus would not be able to scrape these metrics. This would have to be changed to 0.0.0.0 in one of the following two places:
|
||||||
|
|
||||||
1. Before cluster initialization, the config file passed to kubeadm init should have KubeProxyConfiguration manifest with the field metricsBindAddress set to 0.0.0.0:10249
|
1. Before cluster initialization, the config file passed to kubeadm init should have KubeProxyConfiguration manifest with the field metricsBindAddress set to 0.0.0.0:10249
|
||||||
2. If the k8s cluster is already up and running, we'll have to modify the configmap kube-proxy in the namespace kube-system and set the metricsBindAddress field. After this kube-proxy daemonset would have to be restarted with
|
2. If the k8s cluster is already up and running, we'll have to modify the configmap kube-proxy in the namespace kube-system and set the metricsBindAddress field. After this kube-proxy daemonset would have to be restarted with
|
||||||
`kubectl -n kube-system rollout restart daemonset kube-proxy`
|
`kubectl -n kube-system rollout restart daemonset kube-proxy`
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
@@ -788,8 +787,8 @@ the following process:
|
|||||||
2. Commit your changes (This is currently necessary due to our vendoring
|
2. Commit your changes (This is currently necessary due to our vendoring
|
||||||
process. This is likely to change in the future).
|
process. This is likely to change in the future).
|
||||||
3. Update the pinned kube-prometheus dependency in `jsonnetfile.lock.json`: `jb update`
|
3. Update the pinned kube-prometheus dependency in `jsonnetfile.lock.json`: `jb update`
|
||||||
3. Generate dependent `*.yaml` files: `make generate`
|
4. Generate dependent `*.yaml` files: `make generate`
|
||||||
4. Commit the generated changes.
|
5. Commit the generated changes.
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
|
|||||||
120
RELEASE.md
Normal file
120
RELEASE.md
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
# Release schedule
|
||||||
|
|
||||||
|
Kube-prometheus has a somehow predictable release schedule, releases were
|
||||||
|
historically cut in sync with OpenShift releases as per downstream needs. So
|
||||||
|
far there hasn't been any problem with this schedule since it is also in sync
|
||||||
|
with Kubernetes releases. So for every new Kubernetes release, there is a new
|
||||||
|
release of kube-prometheus, although it tends to happen later.
|
||||||
|
|
||||||
|
# How to cut a new release
|
||||||
|
|
||||||
|
> This guide is strongly based on the [prometheus-operator release
|
||||||
|
> instructions](https://github.com/prometheus-operator/prometheus-operator/blob/master/RELEASE.md).
|
||||||
|
|
||||||
|
## Branch management and versioning strategy
|
||||||
|
|
||||||
|
We use [Semantic Versioning](http://semver.org/).
|
||||||
|
|
||||||
|
We maintain a separate branch for each minor release, named
|
||||||
|
`release-<major>.<minor>`, e.g. `release-1.1`, `release-2.0`.
|
||||||
|
|
||||||
|
The usual flow is to merge new features and changes into the master branch and
|
||||||
|
to merge bug fixes into the latest release branch. Bug fixes are then merged
|
||||||
|
into master from the latest release branch. The master branch should always
|
||||||
|
contain all commits from the latest release branch.
|
||||||
|
|
||||||
|
If a bug fix got accidentally merged into master, cherry-pick commits have to be
|
||||||
|
created in the latest release branch, which then has to be merged back into
|
||||||
|
master. Try to avoid that situation.
|
||||||
|
|
||||||
|
Maintaining the release branches for older minor releases happens on a best
|
||||||
|
effort basis.
|
||||||
|
|
||||||
|
## Cut a release of kubernetes-mixins
|
||||||
|
|
||||||
|
kube-prometheus and kubernetes-mixins releases are tied, so before cutting the
|
||||||
|
release of kube-prometheus we should make sure that the same release of
|
||||||
|
kubernetes-mixins exists.
|
||||||
|
|
||||||
|
## Update components version
|
||||||
|
|
||||||
|
Every release of kube-prometheus should include the latest versions of each
|
||||||
|
component. Updating them is automated via a CI job that can be triggered
|
||||||
|
manually from this
|
||||||
|
[workflow](https://github.com/prometheus-operator/kube-prometheus/actions/workflows/versions.yaml).
|
||||||
|
|
||||||
|
Once the workflow is completed, the prometheus-operator bot will create some
|
||||||
|
PRs. You should merge the one prefixed by `[bot][main]` if created before
|
||||||
|
proceeding. If the bot didn't create the PR, it is either because the workflow
|
||||||
|
failed or because the main branch was already up-to-date.
|
||||||
|
|
||||||
|
## Update Kubernetes supported versions
|
||||||
|
|
||||||
|
The main branch of kube-prometheus should support the last 2 versions of
|
||||||
|
Kubernetes. We need to make sure that the CI on the main branch is testing the
|
||||||
|
kube-prometheus configuration against both of these versions by updating the [CI
|
||||||
|
worklow](.github/workflows/ci.yaml) to include the latest kind version and the
|
||||||
|
2 latest images versions that are attached to the kind release. Once that is
|
||||||
|
done, the [compatibility matrix](README.md#kubernetes-compatibility-matrix) in
|
||||||
|
the README should also be updated to reflect the CI changes.
|
||||||
|
|
||||||
|
## Create pull request to cut the release
|
||||||
|
|
||||||
|
### Pin Jsonnet dependencies
|
||||||
|
|
||||||
|
Pin jsonnet dependencies in
|
||||||
|
[jsonnetfile.json](jsonnet/kube-prometheus/jsonnetfile.json). Each dependency
|
||||||
|
should be pinned to the latest release branch or if it doesn't have one, pinned
|
||||||
|
to the latest commit.
|
||||||
|
|
||||||
|
### Start with a fresh environment
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make clean
|
||||||
|
```
|
||||||
|
|
||||||
|
### Update Jsonnet dependencies
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make update
|
||||||
|
```
|
||||||
|
|
||||||
|
### Generate manifests
|
||||||
|
|
||||||
|
```bash
|
||||||
|
make generate
|
||||||
|
```
|
||||||
|
|
||||||
|
### Update the compatibility matrix
|
||||||
|
|
||||||
|
Update the [compatibility matrix](README.md#kubernetes-compatibility-matrix) in
|
||||||
|
the README, by adding the new release based on the `main` branch compatibility
|
||||||
|
and removing the oldest release branch to only keep the latest 5 branches in the
|
||||||
|
matrix.
|
||||||
|
|
||||||
|
### Update changelog
|
||||||
|
|
||||||
|
Iterate over the PRs that were merged between the latest release of kube-prometheus and the HEAD and add the changelog entries to the [CHANGELOG](CHANGELOG.md).
|
||||||
|
|
||||||
|
## Create release branch
|
||||||
|
|
||||||
|
Once the PR cutting the release is merged, pull the changes, create a new
|
||||||
|
release branch named `release-x.y` based on the latest changes and push it to
|
||||||
|
the upstream repository.
|
||||||
|
|
||||||
|
## Create follow-up pull request
|
||||||
|
|
||||||
|
### Unpin Jsonnet dependencies
|
||||||
|
|
||||||
|
Revert previous changes made when pinning the jsonnet dependencies since we want
|
||||||
|
the main branch to be in sync with the latest changes of its dependencies.
|
||||||
|
|
||||||
|
### Update CI workflow
|
||||||
|
|
||||||
|
Update the [versions workflow](.github/workflows/versions.yaml) to include the latest release branch and remove the oldest one to reflect the list of supported releases.
|
||||||
|
|
||||||
|
### Update Kubernetes versions used by kubeconform
|
||||||
|
|
||||||
|
Update the versions of Kubernetes used when validating manifests with
|
||||||
|
kubeconform in the [Makefile](Makefile) to align with the compatibility
|
||||||
|
matrix.
|
||||||
33
developer-workspace/README.md
Normal file
33
developer-workspace/README.md
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
# Ephemeral developer workspaces
|
||||||
|
|
||||||
|
Aiming to provide better developer experience when making contributions to kube-prometheus, whether by actively developing new features/bug fixes or by reviewing pull requests, we want to provide ephemeral developer workspaces with everything already configured (as far as tooling makes it possible).
|
||||||
|
|
||||||
|
Those developer workspaces should provide a brand new kubernetes cluster, where kube-prometheus can be easily deployed and the contributor can easily see the impact that a pull request is proposing.
|
||||||
|
|
||||||
|
Today there is 2 providers in the market:
|
||||||
|
* [Github Codespaces](https://github.com/features/codespaces)
|
||||||
|
* [Gitpod](https://www.gitpod.io/)
|
||||||
|
|
||||||
|
## Codespaces
|
||||||
|
|
||||||
|
Unfortunately, Codespaces is not available for everyone. If you are fortunate to have access to it, you can open a new workspace from a specific branch, or even from Pull Requests.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
After your workspace start, you can deploy a kube-prometheus inside a Kind cluster inside by running `make deploy`.
|
||||||
|
|
||||||
|
If you are reviewing a PR, you'll have a fully-functional kubernetes cluster, generating real monitoring data that can be used to review if the proposed changes works as described.
|
||||||
|
|
||||||
|
If you are working on new features/bug fixes, you can regenerate kube-prometheus's YAML manifests with `make generate` and deploy it again with `make deploy`.
|
||||||
|
|
||||||
|
## Gitpod
|
||||||
|
|
||||||
|
Gitpod is already available to everyone to use for free. It can also run commands that we speficy in the `.gitpod.yml` file located in the root directory of the git repository, so even the cluster creation can be fully automated.
|
||||||
|
|
||||||
|
You can use the same workflow as mentioned in the [Codespaces](#codespaces) section, however Gitpod doesn't have native support for any kubernetes distribution. The workaround is to create a full QEMU Virtual Machine and deploy [k3s](https://github.com/k3s-io/k3s) inside this VM. Don't worry, this whole process is already fully automated, but due to the workaround the whole workspace may be very slow.
|
||||||
|
|
||||||
|
To open up a workspace with Gitpod, you can install the [Google Chrome extension](https://www.gitpod.io/docs/browser-extension/) to add a new button to Github UI and use it on PRs or from the main page. Or by directly typing in the browser `http://gitpod.io/#https://github.com/prometheus-operator/kube-prometheus/pull/<Pull Request Number>` or just `http://gitpod.io/#https://github.com/prometheus-operator/kube-prometheus`
|
||||||
|
|
||||||
|

|
||||||
20
developer-workspace/codespaces/prepare-kind.sh
Executable file
20
developer-workspace/codespaces/prepare-kind.sh
Executable file
@@ -0,0 +1,20 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
which kind
|
||||||
|
if [[ $? != 0 ]]; then
|
||||||
|
echo 'kind not available in $PATH, installing latest kind'
|
||||||
|
# Install latest kind
|
||||||
|
curl -s https://api.github.com/repos/kubernetes-sigs/kind/releases/latest \
|
||||||
|
| grep "browser_download_url.*kind-linux-amd64" \
|
||||||
|
| cut -d : -f 2,3 \
|
||||||
|
| tr -d \" \
|
||||||
|
| wget -qi -
|
||||||
|
mv kind-linux-amd64 kind && chmod +x kind
|
||||||
|
fi
|
||||||
|
|
||||||
|
cluster_created=$($PWD/kind get clusters 2>&1)
|
||||||
|
if [[ "$cluster_created" == "No kind clusters found." ]]; then
|
||||||
|
$PWD/kind create cluster
|
||||||
|
else
|
||||||
|
echo "Cluster '$cluster_created' already present"
|
||||||
|
fi
|
||||||
@@ -1,9 +1,13 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
kubectl apply -f manifests/setup
|
kubectl apply -f manifests/setup
|
||||||
|
|
||||||
# Safety wait for CRDs to be working
|
# Safety wait for CRDs to be working
|
||||||
sleep 30
|
sleep 30
|
||||||
|
|
||||||
kubectl apply -f manifests/
|
kubectl apply -f manifests/
|
||||||
|
sleep 30
|
||||||
|
# Safety wait for resources to be created
|
||||||
|
|
||||||
kubectl rollout status -n monitoring daemonset node-exporter
|
kubectl rollout status -n monitoring daemonset node-exporter
|
||||||
kubectl rollout status -n monitoring statefulset alertmanager-main
|
kubectl rollout status -n monitoring statefulset alertmanager-main
|
||||||
@@ -5,8 +5,8 @@ AWS EKS uses [CNI](https://github.com/aws/amazon-vpc-cni-k8s) networking plugin
|
|||||||
One fatal issue that can occur is that you run out of IP addresses in your eks cluster. (Generally happens due to error configs where pods keep scheduling).
|
One fatal issue that can occur is that you run out of IP addresses in your eks cluster. (Generally happens due to error configs where pods keep scheduling).
|
||||||
|
|
||||||
You can monitor the `awscni` using kube-promethus with :
|
You can monitor the `awscni` using kube-promethus with :
|
||||||
[embedmd]:# (../examples/eks-cni-example.jsonnet)
|
|
||||||
```jsonnet
|
```jsonnet mdox-exec="cat examples/eks-cni-example.jsonnet"
|
||||||
local kp = (import 'kube-prometheus/main.libsonnet') + {
|
local kp = (import 'kube-prometheus/main.libsonnet') + {
|
||||||
values+:: {
|
values+:: {
|
||||||
common+: {
|
common+: {
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ authentication. Until it does, Prometheus must use HTTP (not HTTPS)
|
|||||||
for scraping.
|
for scraping.
|
||||||
|
|
||||||
You can configure this behavior through kube-prometheus with:
|
You can configure this behavior through kube-prometheus with:
|
||||||
|
|
||||||
```
|
```
|
||||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') +
|
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') +
|
||||||
(import 'kube-prometheus/kube-prometheus-insecure-kubelet.libsonnet') +
|
(import 'kube-prometheus/kube-prometheus-insecure-kubelet.libsonnet') +
|
||||||
|
|||||||
@@ -1,16 +1,16 @@
|
|||||||
---
|
---
|
||||||
title: "Blackbox Exporter"
|
|
||||||
description: "Generated API docs for the Prometheus Operator"
|
|
||||||
lead: "This Document documents the types introduced by the Prometheus Operator to be consumed by users."
|
|
||||||
date: 2021-03-08T08:49:31+00:00
|
|
||||||
lastmod: 2021-03-08T08:49:31+00:00
|
|
||||||
draft: false
|
|
||||||
images: []
|
|
||||||
menu:
|
|
||||||
docs:
|
|
||||||
parent: "kube"
|
|
||||||
weight: 630
|
weight: 630
|
||||||
toc: true
|
toc: true
|
||||||
|
title: Blackbox Exporter
|
||||||
|
menu:
|
||||||
|
docs:
|
||||||
|
parent: kube
|
||||||
|
lead: This Document documents the types introduced by the Prometheus Operator to be consumed by users.
|
||||||
|
lastmod: "2021-03-08T08:49:31+00:00"
|
||||||
|
images: []
|
||||||
|
draft: false
|
||||||
|
description: Generated API docs for the Prometheus Operator
|
||||||
|
date: "2021-03-08T08:49:31+00:00"
|
||||||
---
|
---
|
||||||
|
|
||||||
# Setting up a blackbox exporter
|
# Setting up a blackbox exporter
|
||||||
@@ -21,6 +21,7 @@ The `prometheus-operator` defines a `Probe` resource type that can be used to de
|
|||||||
|
|
||||||
1. Override blackbox-related configuration parameters as needed.
|
1. Override blackbox-related configuration parameters as needed.
|
||||||
2. Add the following to the list of renderers to render the blackbox exporter manifests:
|
2. Add the following to the list of renderers to render the blackbox exporter manifests:
|
||||||
|
|
||||||
```
|
```
|
||||||
{ ['blackbox-exporter-' + name]: kp.blackboxExporter[name] for name in std.objectFields(kp.blackboxExporter) }
|
{ ['blackbox-exporter-' + name]: kp.blackboxExporter[name] for name in std.objectFields(kp.blackboxExporter) }
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ For bugs, you can use the GitHub [issue tracker](https://github.com/prometheus-o
|
|||||||
|
|
||||||
For questions, you can use the GitHub [discussions forum](https://github.com/prometheus-operator/kube-prometheus/discussions).
|
For questions, you can use the GitHub [discussions forum](https://github.com/prometheus-operator/kube-prometheus/discussions).
|
||||||
|
|
||||||
Many of the `kube-prometheus` project's contributors and users can also be found on the #prometheus-operator channel of the [Kubernetes Slack][Kubernetes Slack].
|
Many of the `kube-prometheus` project's contributors and users can also be found on the #prometheus-operator channel of the [Kubernetes Slack](https://slack.k8s.io/).
|
||||||
|
|
||||||
`kube-prometheus` is the aggregation of many projects that all have different
|
`kube-prometheus` is the aggregation of many projects that all have different
|
||||||
channels to reach out for help and support. This community strives at
|
channels to reach out for help and support. This community strives at
|
||||||
@@ -18,7 +18,7 @@ if applicable.
|
|||||||
|
|
||||||
For documentation, check the project's [documentation directory](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation).
|
For documentation, check the project's [documentation directory](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation).
|
||||||
|
|
||||||
For questions, use the #prometheus-operator channel on the [Kubernetes Slack][Kubernetes Slack].
|
For questions, use the #prometheus-operator channel on the [Kubernetes Slack](https://slack.k8s.io/).
|
||||||
|
|
||||||
For bugs, use the GitHub [issue tracker](https://github.com/prometheus-operator/prometheus-operator/issues/new/choose).
|
For bugs, use the GitHub [issue tracker](https://github.com/prometheus-operator/prometheus-operator/issues/new/choose).
|
||||||
|
|
||||||
@@ -32,13 +32,13 @@ related to the Prometheus ecosystem.
|
|||||||
|
|
||||||
For questions, see the Prometheus [community page](https://prometheus.io/community/) for the various channels.
|
For questions, see the Prometheus [community page](https://prometheus.io/community/) for the various channels.
|
||||||
|
|
||||||
There is also a #prometheus channel on the [CNCF Slack][CNCF Slack].
|
There is also a #prometheus channel on the [CNCF Slack](https://slack.cncf.io/).
|
||||||
|
|
||||||
## kube-state-metrics
|
## kube-state-metrics
|
||||||
|
|
||||||
For documentation, see the project's [docs directory](https://github.com/kubernetes/kube-state-metrics/tree/master/docs).
|
For documentation, see the project's [docs directory](https://github.com/kubernetes/kube-state-metrics/tree/master/docs).
|
||||||
|
|
||||||
For questions, use the #kube-state-metrics channel on the [Kubernetes Slack][Kubernetes Slack].
|
For questions, use the #kube-state-metrics channel on the [Kubernetes Slack](https://slack.k8s.io/).
|
||||||
|
|
||||||
For bugs, use the GitHub [issue tracker](https://github.com/kubernetes/kube-state-metrics/issues/new/choose).
|
For bugs, use the GitHub [issue tracker](https://github.com/kubernetes/kube-state-metrics/issues/new/choose).
|
||||||
|
|
||||||
@@ -46,7 +46,7 @@ For bugs, use the GitHub [issue tracker](https://github.com/kubernetes/kube-stat
|
|||||||
|
|
||||||
For documentation, check the [Kubernetes docs](https://kubernetes.io/docs/home/).
|
For documentation, check the [Kubernetes docs](https://kubernetes.io/docs/home/).
|
||||||
|
|
||||||
For questions, use the [community forums](https://discuss.kubernetes.io/) and the [Kubernetes Slack][Kubernetes Slack]. Check also the [community page](https://kubernetes.io/community/#discuss).
|
For questions, use the [community forums](https://discuss.kubernetes.io/) and the [Kubernetes Slack](https://slack.k8s.io/). Check also the [community page](https://kubernetes.io/community/#discuss).
|
||||||
|
|
||||||
For bugs, use the GitHub [issue tracker](https://github.com/kubernetes/kubernetes/issues/new/choose).
|
For bugs, use the GitHub [issue tracker](https://github.com/kubernetes/kubernetes/issues/new/choose).
|
||||||
|
|
||||||
@@ -54,7 +54,7 @@ For bugs, use the GitHub [issue tracker](https://github.com/kubernetes/kubernete
|
|||||||
|
|
||||||
For documentation, check the project's [README](https://github.com/DirectXMan12/k8s-prometheus-adapter/blob/master/README.md).
|
For documentation, check the project's [README](https://github.com/DirectXMan12/k8s-prometheus-adapter/blob/master/README.md).
|
||||||
|
|
||||||
For questions, use the #sig-instrumentation channel on the [Kubernetes Slack][Kubernetes Slack].
|
For questions, use the #sig-instrumentation channel on the [Kubernetes Slack](https://slack.k8s.io/).
|
||||||
|
|
||||||
For bugs, use the GitHub [issue tracker](https://github.com/DirectXMan12/k8s-prometheus-adapter/issues/new).
|
For bugs, use the GitHub [issue tracker](https://github.com/DirectXMan12/k8s-prometheus-adapter/issues/new).
|
||||||
|
|
||||||
@@ -70,7 +70,7 @@ For bugs, use the GitHub [issue tracker](https://github.com/grafana/grafana/issu
|
|||||||
|
|
||||||
For documentation, check the project's [README](https://github.com/kubernetes-monitoring/kubernetes-mixin/blob/master/README.md).
|
For documentation, check the project's [README](https://github.com/kubernetes-monitoring/kubernetes-mixin/blob/master/README.md).
|
||||||
|
|
||||||
For questions, use #monitoring-mixins channel on the [Kubernetes Slack][Kubernetes Slack].
|
For questions, use #monitoring-mixins channel on the [Kubernetes Slack](https://slack.k8s.io/).
|
||||||
|
|
||||||
For bugs, use the GitHub [issue tracker](https://github.com/kubernetes-monitoring/kubernetes-mixin/issues/new).
|
For bugs, use the GitHub [issue tracker](https://github.com/kubernetes-monitoring/kubernetes-mixin/issues/new).
|
||||||
|
|
||||||
@@ -79,6 +79,3 @@ For bugs, use the GitHub [issue tracker](https://github.com/kubernetes-monitorin
|
|||||||
For documentation, check the [Jsonnet](https://jsonnet.org/) website.
|
For documentation, check the [Jsonnet](https://jsonnet.org/) website.
|
||||||
|
|
||||||
For questions, use the [mailing list](https://groups.google.com/forum/#!forum/jsonnet).
|
For questions, use the [mailing list](https://groups.google.com/forum/#!forum/jsonnet).
|
||||||
|
|
||||||
[Kubernetes Slack]: https://slack.k8s.io/
|
|
||||||
[CNCF Slack]: https://slack.cncf.io/
|
|
||||||
|
|||||||
@@ -1,15 +1,15 @@
|
|||||||
---
|
---
|
||||||
title: "Deploy to kind"
|
|
||||||
description: "Deploy kube-prometheus to Kubernets kind."
|
|
||||||
lead: "Deploy kube-prometheus to Kubernets kind."
|
|
||||||
date: 2021-03-08T23:04:32+01:00
|
|
||||||
draft: false
|
|
||||||
images: []
|
|
||||||
menu:
|
|
||||||
docs:
|
|
||||||
parent: "kube"
|
|
||||||
weight: 500
|
weight: 500
|
||||||
toc: true
|
toc: true
|
||||||
|
title: Deploy to kind
|
||||||
|
menu:
|
||||||
|
docs:
|
||||||
|
parent: kube
|
||||||
|
lead: Deploy kube-prometheus to Kubernets kind.
|
||||||
|
images: []
|
||||||
|
draft: false
|
||||||
|
description: Deploy kube-prometheus to Kubernets kind.
|
||||||
|
date: "2021-03-08T23:04:32+01:00"
|
||||||
---
|
---
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|||||||
@@ -1,15 +1,15 @@
|
|||||||
---
|
---
|
||||||
title: "Prometheus Rules and Grafana Dashboards"
|
|
||||||
description: "Create Prometheus Rules and Grafana Dashboards on top of kube-prometheus"
|
|
||||||
lead: "Create Prometheus Rules and Grafana Dashboards on top of kube-prometheus"
|
|
||||||
date: 2021-03-08T23:04:32+01:00
|
|
||||||
draft: false
|
|
||||||
images: []
|
|
||||||
menu:
|
|
||||||
docs:
|
|
||||||
parent: "kube"
|
|
||||||
weight: 650
|
weight: 650
|
||||||
toc: true
|
toc: true
|
||||||
|
title: Prometheus Rules and Grafana Dashboards
|
||||||
|
menu:
|
||||||
|
docs:
|
||||||
|
parent: kube
|
||||||
|
lead: Create Prometheus Rules and Grafana Dashboards on top of kube-prometheus
|
||||||
|
images: []
|
||||||
|
draft: false
|
||||||
|
description: Create Prometheus Rules and Grafana Dashboards on top of kube-prometheus
|
||||||
|
date: "2021-03-08T23:04:32+01:00"
|
||||||
---
|
---
|
||||||
|
|
||||||
`kube-prometheus` ships with a set of default [Prometheus rules](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) and [Grafana](http://grafana.com/) dashboards. At some point one might like to extend them, the purpose of this document is to explain how to do this.
|
`kube-prometheus` ships with a set of default [Prometheus rules](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) and [Grafana](http://grafana.com/) dashboards. At some point one might like to extend them, the purpose of this document is to explain how to do this.
|
||||||
@@ -20,8 +20,7 @@ For both the Prometheus rules and the Grafana dashboards Kubernetes `ConfigMap`s
|
|||||||
|
|
||||||
As a basis, all examples in this guide are based on the base example of the kube-prometheus [readme](../README.md):
|
As a basis, all examples in this guide are based on the base example of the kube-prometheus [readme](../README.md):
|
||||||
|
|
||||||
[embedmd]:# (../example.jsonnet)
|
```jsonnet mdox-exec="cat example.jsonnet"
|
||||||
```jsonnet
|
|
||||||
local kp =
|
local kp =
|
||||||
(import 'kube-prometheus/main.libsonnet') +
|
(import 'kube-prometheus/main.libsonnet') +
|
||||||
// Uncomment the following imports to enable its patches
|
// Uncomment the following imports to enable its patches
|
||||||
@@ -68,8 +67,7 @@ The format is exactly the Prometheus format, so there should be no changes neces
|
|||||||
|
|
||||||
> Note that alerts can just as well be included into this file, using the jsonnet `import` function. In this example it is just inlined in order to demonstrate their use in a single file.
|
> Note that alerts can just as well be included into this file, using the jsonnet `import` function. In this example it is just inlined in order to demonstrate their use in a single file.
|
||||||
|
|
||||||
[embedmd]:# (../examples/prometheus-additional-alert-rule-example.jsonnet)
|
```jsonnet mdox-exec="cat examples/prometheus-additional-alert-rule-example.jsonnet"
|
||||||
```jsonnet
|
|
||||||
local kp = (import 'kube-prometheus/main.libsonnet') + {
|
local kp = (import 'kube-prometheus/main.libsonnet') + {
|
||||||
values+:: {
|
values+:: {
|
||||||
common+: {
|
common+: {
|
||||||
@@ -124,8 +122,7 @@ In order to add a recording rule, simply do the same with the `prometheusRules`
|
|||||||
|
|
||||||
> Note that rules can just as well be included into this file, using the jsonnet `import` function. In this example it is just inlined in order to demonstrate their use in a single file.
|
> Note that rules can just as well be included into this file, using the jsonnet `import` function. In this example it is just inlined in order to demonstrate their use in a single file.
|
||||||
|
|
||||||
[embedmd]:# (../examples/prometheus-additional-recording-rule-example.jsonnet)
|
```jsonnet mdox-exec="cat examples/prometheus-additional-recording-rule-example.jsonnet"
|
||||||
```jsonnet
|
|
||||||
local kp = (import 'kube-prometheus/main.libsonnet') + {
|
local kp = (import 'kube-prometheus/main.libsonnet') + {
|
||||||
values+:: {
|
values+:: {
|
||||||
common+: {
|
common+: {
|
||||||
@@ -184,8 +181,7 @@ cat existingrule.yaml | gojsontoyaml -yamltojson > existingrule.json
|
|||||||
|
|
||||||
Then import it in jsonnet:
|
Then import it in jsonnet:
|
||||||
|
|
||||||
[embedmd]:# (../examples/prometheus-additional-rendered-rule-example.jsonnet)
|
```jsonnet mdox-exec="cat examples/prometheus-additional-rendered-rule-example.jsonnet"
|
||||||
```jsonnet
|
|
||||||
local kp = (import 'kube-prometheus/main.libsonnet') + {
|
local kp = (import 'kube-prometheus/main.libsonnet') + {
|
||||||
values+:: {
|
values+:: {
|
||||||
common+: {
|
common+: {
|
||||||
@@ -217,6 +213,7 @@ local kp = (import 'kube-prometheus/main.libsonnet') + {
|
|||||||
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } +
|
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) } +
|
||||||
{ ['example-application-' + name]: kp.exampleApplication[name] for name in std.objectFields(kp.exampleApplication) }
|
{ ['example-application-' + name]: kp.exampleApplication[name] for name in std.objectFields(kp.exampleApplication) }
|
||||||
```
|
```
|
||||||
|
|
||||||
### Changing default rules
|
### Changing default rules
|
||||||
|
|
||||||
Along with adding additional rules, we give the user the option to filter or adjust the existing rules imported by `kube-prometheus/main.libsonnet`. The recording rules can be found in [kube-prometheus/components/mixin/rules](../jsonnet/kube-prometheus/components/mixin/rules) and [kubernetes-mixin/rules](https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/rules) while the alerting rules can be found in [kube-prometheus/components/mixin/alerts](../jsonnet/kube-prometheus/components/mixin/alerts) and [kubernetes-mixin/alerts](https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/alerts).
|
Along with adding additional rules, we give the user the option to filter or adjust the existing rules imported by `kube-prometheus/main.libsonnet`. The recording rules can be found in [kube-prometheus/components/mixin/rules](../jsonnet/kube-prometheus/components/mixin/rules) and [kubernetes-mixin/rules](https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/rules) while the alerting rules can be found in [kube-prometheus/components/mixin/alerts](../jsonnet/kube-prometheus/components/mixin/alerts) and [kubernetes-mixin/alerts](https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/alerts).
|
||||||
@@ -224,7 +221,9 @@ Along with adding additional rules, we give the user the option to filter or adj
|
|||||||
Knowing which rules to change, the user can now use functions from the [Jsonnet standard library](https://jsonnet.org/ref/stdlib.html) to make these changes. Below are examples of both a filter and an adjustment being made to the default rules. These changes can be assigned to a local variable and then added to the `local kp` object as seen in the examples above.
|
Knowing which rules to change, the user can now use functions from the [Jsonnet standard library](https://jsonnet.org/ref/stdlib.html) to make these changes. Below are examples of both a filter and an adjustment being made to the default rules. These changes can be assigned to a local variable and then added to the `local kp` object as seen in the examples above.
|
||||||
|
|
||||||
#### Filter
|
#### Filter
|
||||||
|
|
||||||
Here the alert `KubeStatefulSetReplicasMismatch` is being filtered out of the group `kubernetes-apps`. The default rule can be seen [here](https://github.com/kubernetes-monitoring/kubernetes-mixin/blob/master/alerts/apps_alerts.libsonnet). You first need to find out in which component the rule is defined (here it is kuberentesControlPlane).
|
Here the alert `KubeStatefulSetReplicasMismatch` is being filtered out of the group `kubernetes-apps`. The default rule can be seen [here](https://github.com/kubernetes-monitoring/kubernetes-mixin/blob/master/alerts/apps_alerts.libsonnet). You first need to find out in which component the rule is defined (here it is kuberentesControlPlane).
|
||||||
|
|
||||||
```jsonnet
|
```jsonnet
|
||||||
local filter = {
|
local filter = {
|
||||||
kubernetesControlPlane+: {
|
kubernetesControlPlane+: {
|
||||||
@@ -251,7 +250,9 @@ local filter = {
|
|||||||
```
|
```
|
||||||
|
|
||||||
#### Adjustment
|
#### Adjustment
|
||||||
|
|
||||||
Here the expression for another alert in the same component is updated from its previous value. The default rule can be seen [here](https://github.com/kubernetes-monitoring/kubernetes-mixin/blob/master/alerts/apps_alerts.libsonnet).
|
Here the expression for another alert in the same component is updated from its previous value. The default rule can be seen [here](https://github.com/kubernetes-monitoring/kubernetes-mixin/blob/master/alerts/apps_alerts.libsonnet).
|
||||||
|
|
||||||
```jsonnet
|
```jsonnet
|
||||||
local update = {
|
local update = {
|
||||||
kubernetesControlPlane+: {
|
kubernetesControlPlane+: {
|
||||||
@@ -283,6 +284,7 @@ local update = {
|
|||||||
```
|
```
|
||||||
|
|
||||||
Using the example from above about adding in pre-rendered rules, the new local variables can be added in as follows:
|
Using the example from above about adding in pre-rendered rules, the new local variables can be added in as follows:
|
||||||
|
|
||||||
```jsonnet
|
```jsonnet
|
||||||
local add = {
|
local add = {
|
||||||
exampleApplication:: {
|
exampleApplication:: {
|
||||||
@@ -327,6 +329,7 @@ local kp = (import 'kube-prometheus/main.libsonnet') +
|
|||||||
{ ['kubernetes-' + name]: kp.kubernetesControlPlane[name] for name in std.objectFields(kp.kubernetesControlPlane) } +
|
{ ['kubernetes-' + name]: kp.kubernetesControlPlane[name] for name in std.objectFields(kp.kubernetesControlPlane) } +
|
||||||
{ ['exampleApplication-' + name]: kp.exampleApplication[name] for name in std.objectFields(kp.exampleApplication) }
|
{ ['exampleApplication-' + name]: kp.exampleApplication[name] for name in std.objectFields(kp.exampleApplication) }
|
||||||
```
|
```
|
||||||
|
|
||||||
## Dashboards
|
## Dashboards
|
||||||
|
|
||||||
Dashboards can either be added using jsonnet or simply a pre-rendered json dashboard.
|
Dashboards can either be added using jsonnet or simply a pre-rendered json dashboard.
|
||||||
@@ -337,8 +340,7 @@ We recommend using the [grafonnet](https://github.com/grafana/grafonnet-lib/) li
|
|||||||
|
|
||||||
> Note that dashboards can just as well be included into this file, using the jsonnet `import` function. In this example it is just inlined in order to demonstrate their use in a single file.
|
> Note that dashboards can just as well be included into this file, using the jsonnet `import` function. In this example it is just inlined in order to demonstrate their use in a single file.
|
||||||
|
|
||||||
[embedmd]:# (../examples/grafana-additional-jsonnet-dashboard-example.jsonnet)
|
```jsonnet mdox-exec="cat examples/grafana-additional-jsonnet-dashboard-example.jsonnet"
|
||||||
```jsonnet
|
|
||||||
local grafana = import 'grafonnet/grafana.libsonnet';
|
local grafana = import 'grafonnet/grafana.libsonnet';
|
||||||
local dashboard = grafana.dashboard;
|
local dashboard = grafana.dashboard;
|
||||||
local row = grafana.row;
|
local row = grafana.row;
|
||||||
@@ -394,8 +396,7 @@ local kp = (import 'kube-prometheus/main.libsonnet') + {
|
|||||||
|
|
||||||
As jsonnet is a superset of json, the jsonnet `import` function can be used to include Grafana dashboard json blobs. In this example we are importing a [provided example dashboard](../examples/example-grafana-dashboard.json).
|
As jsonnet is a superset of json, the jsonnet `import` function can be used to include Grafana dashboard json blobs. In this example we are importing a [provided example dashboard](../examples/example-grafana-dashboard.json).
|
||||||
|
|
||||||
[embedmd]:# (../examples/grafana-additional-rendered-dashboard-example.jsonnet)
|
```jsonnet mdox-exec="cat examples/grafana-additional-rendered-dashboard-example.jsonnet"
|
||||||
```jsonnet
|
|
||||||
local kp = (import 'kube-prometheus/main.libsonnet') + {
|
local kp = (import 'kube-prometheus/main.libsonnet') + {
|
||||||
values+:: {
|
values+:: {
|
||||||
common+:: {
|
common+:: {
|
||||||
@@ -419,8 +420,8 @@ local kp = (import 'kube-prometheus/main.libsonnet') + {
|
|||||||
```
|
```
|
||||||
|
|
||||||
In case you have lots of json dashboard exported out from grafana UI the above approach is going to take lots of time to improve performance we can use `rawDashboards` field and provide it's value as json string by using `importstr`
|
In case you have lots of json dashboard exported out from grafana UI the above approach is going to take lots of time to improve performance we can use `rawDashboards` field and provide it's value as json string by using `importstr`
|
||||||
[embedmd]:# (../examples/grafana-additional-rendered-dashboard-example-2.jsonnet)
|
|
||||||
```jsonnet
|
```jsonnet mdox-exec="cat examples/grafana-additional-rendered-dashboard-example-2.jsonnet"
|
||||||
local kp = (import 'kube-prometheus/main.libsonnet') + {
|
local kp = (import 'kube-prometheus/main.libsonnet') + {
|
||||||
values+:: {
|
values+:: {
|
||||||
common+:: {
|
common+:: {
|
||||||
@@ -523,8 +524,7 @@ values+:: {
|
|||||||
|
|
||||||
Full example of including etcd mixin using method described above:
|
Full example of including etcd mixin using method described above:
|
||||||
|
|
||||||
[embedmd]:# (../examples/mixin-inclusion.jsonnet)
|
```jsonnet mdox-exec="cat examples/mixin-inclusion.jsonnet"
|
||||||
```jsonnet
|
|
||||||
local addMixin = (import 'kube-prometheus/lib/mixin.libsonnet');
|
local addMixin = (import 'kube-prometheus/lib/mixin.libsonnet');
|
||||||
local etcdMixin = addMixin({
|
local etcdMixin = addMixin({
|
||||||
name: 'etcd',
|
name: 'etcd',
|
||||||
|
|||||||
@@ -1,15 +1,15 @@
|
|||||||
---
|
---
|
||||||
title: "Expose via Ingress"
|
|
||||||
description: "How to setup a Kubernetes Ingress to expose the Prometheus, Alertmanager and Grafana."
|
|
||||||
lead: "How to setup a Kubernetes Ingress to expose the Prometheus, Alertmanager and Grafana."
|
|
||||||
date: 2021-03-08T23:04:32+01:00
|
|
||||||
draft: false
|
|
||||||
images: []
|
|
||||||
menu:
|
|
||||||
docs:
|
|
||||||
parent: "kube"
|
|
||||||
weight: 500
|
weight: 500
|
||||||
toc: true
|
toc: true
|
||||||
|
title: Expose via Ingress
|
||||||
|
menu:
|
||||||
|
docs:
|
||||||
|
parent: kube
|
||||||
|
lead: How to setup a Kubernetes Ingress to expose the Prometheus, Alertmanager and Grafana.
|
||||||
|
images: []
|
||||||
|
draft: false
|
||||||
|
description: How to setup a Kubernetes Ingress to expose the Prometheus, Alertmanager and Grafana.
|
||||||
|
date: "2021-03-08T23:04:32+01:00"
|
||||||
---
|
---
|
||||||
|
|
||||||
In order to access the web interfaces via the Internet [Kubernetes Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) is a popular option. This guide explains, how Kubernetes Ingress can be setup, in order to expose the Prometheus, Alertmanager and Grafana UIs, that are included in the [kube-prometheus](https://github.com/prometheus-operator/kube-prometheus) project.
|
In order to access the web interfaces via the Internet [Kubernetes Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) is a popular option. This guide explains, how Kubernetes Ingress can be setup, in order to expose the Prometheus, Alertmanager and Grafana UIs, that are included in the [kube-prometheus](https://github.com/prometheus-operator/kube-prometheus) project.
|
||||||
@@ -104,7 +104,7 @@ k.core.v1.list.new([
|
|||||||
|
|
||||||
In order to expose Alertmanager and Grafana, simply create additional fields containing an ingress object, but simply pointing at the `alertmanager` or `grafana` instead of the `prometheus-k8s` Service. Make sure to also use the correct port respectively, for Alertmanager it is also `web`, for Grafana it is `http`. Be sure to also specify the appropriate external URL. Note that the external URL for grafana is set in a different way than the external URL for Prometheus or Alertmanager. See [ingress.jsonnet](../examples/ingress.jsonnet) for how to set the Grafana external URL.
|
In order to expose Alertmanager and Grafana, simply create additional fields containing an ingress object, but simply pointing at the `alertmanager` or `grafana` instead of the `prometheus-k8s` Service. Make sure to also use the correct port respectively, for Alertmanager it is also `web`, for Grafana it is `http`. Be sure to also specify the appropriate external URL. Note that the external URL for grafana is set in a different way than the external URL for Prometheus or Alertmanager. See [ingress.jsonnet](../examples/ingress.jsonnet) for how to set the Grafana external URL.
|
||||||
|
|
||||||
In order to render the ingress objects similar to the other objects use as demonstrated in the [main readme](../README.md#usage):
|
In order to render the ingress objects similar to the other objects use as demonstrated in the [main readme](../README.md):
|
||||||
|
|
||||||
```
|
```
|
||||||
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
{ ['00namespace-' + name]: kp.kubePrometheus[name] for name in std.objectFields(kp.kubePrometheus) } +
|
||||||
|
|||||||
@@ -1,15 +1,15 @@
|
|||||||
---
|
---
|
||||||
title: "Deploy to kubeadm"
|
|
||||||
description: "Deploy kube-prometheus to Kubernets kubeadm."
|
|
||||||
lead: "Deploy kube-prometheus to Kubernets kubeadm."
|
|
||||||
date: 2021-03-08T23:04:32+01:00
|
|
||||||
draft: false
|
|
||||||
images: []
|
|
||||||
menu:
|
|
||||||
docs:
|
|
||||||
parent: "kube"
|
|
||||||
weight: 500
|
weight: 500
|
||||||
toc: true
|
toc: true
|
||||||
|
title: Deploy to kubeadm
|
||||||
|
menu:
|
||||||
|
docs:
|
||||||
|
parent: kube
|
||||||
|
lead: Deploy kube-prometheus to Kubernets kubeadm.
|
||||||
|
images: []
|
||||||
|
draft: false
|
||||||
|
description: Deploy kube-prometheus to Kubernets kubeadm.
|
||||||
|
date: "2021-03-08T23:04:32+01:00"
|
||||||
---
|
---
|
||||||
|
|
||||||
The [kubeadm](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/) tool is linked by Kubernetes as the offical way to deploy and manage self-hosted clusters. kubeadm does a lot of heavy lifting by automatically configuring your Kubernetes cluster with some common options. This guide is intended to show you how to deploy Prometheus, Prometheus Operator and Kube Prometheus to get you started monitoring your cluster that was deployed with kubeadm.
|
The [kubeadm](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/) tool is linked by Kubernetes as the offical way to deploy and manage self-hosted clusters. kubeadm does a lot of heavy lifting by automatically configuring your Kubernetes cluster with some common options. This guide is intended to show you how to deploy Prometheus, Prometheus Operator and Kube Prometheus to get you started monitoring your cluster that was deployed with kubeadm.
|
||||||
@@ -93,7 +93,6 @@ Once you complete this guide you will monitor the following:
|
|||||||
* kube-scheduler
|
* kube-scheduler
|
||||||
* kube-controller-manager
|
* kube-controller-manager
|
||||||
|
|
||||||
|
|
||||||
## Getting Up and Running Fast with Kube-Prometheus
|
## Getting Up and Running Fast with Kube-Prometheus
|
||||||
|
|
||||||
To help get started more quickly with monitoring Kubernetes clusters, [kube-prometheus](https://github.com/coreos/kube-prometheus) was created. It is a collection of manifests including dashboards and alerting rules that can easily be deployed. It utilizes the Prometheus Operator and all the manifests demonstrated in this guide.
|
To help get started more quickly with monitoring Kubernetes clusters, [kube-prometheus](https://github.com/coreos/kube-prometheus) was created. It is a collection of manifests including dashboards and alerting rules that can easily be deployed. It utilizes the Prometheus Operator and all the manifests demonstrated in this guide.
|
||||||
|
|||||||
@@ -2,9 +2,9 @@
|
|||||||
|
|
||||||
An example conversion of a legacy custom jsonnet file to release-0.8
|
An example conversion of a legacy custom jsonnet file to release-0.8
|
||||||
format can be seen by viewing and comparing this
|
format can be seen by viewing and comparing this
|
||||||
[release-0.3 jsonnet file](./my.release-0.3.jsonnet) (when the github
|
[release-0.3 jsonnet file](my.release-0.3.jsonnet) (when the github
|
||||||
repo was under `https://github.com/coreos/kube-prometheus...`)
|
repo was under `https://github.com/coreos/kube-prometheus...`)
|
||||||
and the corresponding [release-0.8 jsonnet file](./my.release-0.8.jsonnet).
|
and the corresponding [release-0.8 jsonnet file](my.release-0.8.jsonnet).
|
||||||
|
|
||||||
These two files have had necessary blank lines added so that they
|
These two files have had necessary blank lines added so that they
|
||||||
can be compared side-by-side and line-by-line on screen.
|
can be compared side-by-side and line-by-line on screen.
|
||||||
@@ -18,6 +18,7 @@ not necessarily best practice for the files in release-0.3 or release-0.8.
|
|||||||
|
|
||||||
Below are three sample extracts of the conversion as an indication of the
|
Below are three sample extracts of the conversion as an indication of the
|
||||||
changes required.
|
changes required.
|
||||||
|
|
||||||
<table>
|
<table>
|
||||||
<tr>
|
<tr>
|
||||||
<th> release-0.3 </th>
|
<th> release-0.3 </th>
|
||||||
|
|||||||
@@ -33,14 +33,14 @@ Thanks to our community we identified a lot of short-commings of previous design
|
|||||||
|
|
||||||
Those concepts were already present in the repository but it wasn't clear which file is holding what. After refactoring we categorized jsonnet code into 3 buckets and put them into separate directories:
|
Those concepts were already present in the repository but it wasn't clear which file is holding what. After refactoring we categorized jsonnet code into 3 buckets and put them into separate directories:
|
||||||
- `components` - main building blocks for kube-prometheus, written as functions responsible for creating multiple objects representing kubernetes manifests. For example all objects for node_exporter deployment are bundled in `components/node_exporter.libsonnet` library
|
- `components` - main building blocks for kube-prometheus, written as functions responsible for creating multiple objects representing kubernetes manifests. For example all objects for node_exporter deployment are bundled in `components/node_exporter.libsonnet` library
|
||||||
- `addons` - everything that can enhance kube-prometheus deployment. Those are small snippets of code adding a small feature, for example adding anti-affinity to pods via [`addons/anti-affinity.libsonnet`][antiaffinity]. Addons are meant to be used in object-oriented way like `local kp = (import 'kube-prometheus/main.libsonnet') + (import 'kube-prometheus/addons/all-namespaces.libsonnet')`
|
- `addons` - everything that can enhance kube-prometheus deployment. Those are small snippets of code adding a small feature, for example adding anti-affinity to pods via [`addons/anti-affinity.libsonnet`](https://github.com/prometheus-operator/kube-prometheus/blob/main/jsonnet/kube-prometheus/addons/anti-affinity.libsonnet). Addons are meant to be used in object-oriented way like `local kp = (import 'kube-prometheus/main.libsonnet') + (import 'kube-prometheus/addons/all-namespaces.libsonnet')`
|
||||||
- `platforms` - currently those are `addons` specialized to allow deploying kube-prometheus project on a specific platform.
|
- `platforms` - currently those are `addons` specialized to allow deploying kube-prometheus project on a specific platform.
|
||||||
|
|
||||||
### Component configuration
|
### Component configuration
|
||||||
|
|
||||||
Refactoring main components to use functions allowed us to define APIs for said components. Each function has a default set of parameters that can be overridden or that are required to be set by a user. Those default parameters are represented in each component by `defaults` map at the top of each library file, for example in [`node_exporter.libsonnet`][node_exporter_defaults_example].
|
Refactoring main components to use functions allowed us to define APIs for said components. Each function has a default set of parameters that can be overridden or that are required to be set by a user. Those default parameters are represented in each component by `defaults` map at the top of each library file, for example in [`node_exporter.libsonnet`](https://github.com/prometheus-operator/kube-prometheus/blob/1d2a0e275af97948667777739a18b24464480dc8/jsonnet/kube-prometheus/components/node-exporter.libsonnet#L3-L34).
|
||||||
|
|
||||||
This API is meant to ease the use of kube-prometheus as parameters can be passed from a JSON file and don't need to be in jsonnet format. However, if you need to modify particular parts of the stack, jsonnet allows you to do this and we are also not restricting such access in any way. An example of such modifications can be seen in any of our `addons`, like the [`addons/anti-affinity.libsonnet`][antiaffinity] one.
|
This API is meant to ease the use of kube-prometheus as parameters can be passed from a JSON file and don't need to be in jsonnet format. However, if you need to modify particular parts of the stack, jsonnet allows you to do this and we are also not restricting such access in any way. An example of such modifications can be seen in any of our `addons`, like the [`addons/anti-affinity.libsonnet`](https://github.com/prometheus-operator/kube-prometheus/blob/main/jsonnet/kube-prometheus/addons/anti-affinity.libsonnet) one.
|
||||||
|
|
||||||
### Mixin integration
|
### Mixin integration
|
||||||
|
|
||||||
@@ -63,25 +63,14 @@ All examples from `examples/` directory were adapted to the new codebase. [Pleas
|
|||||||
|
|
||||||
## Legacy migration
|
## Legacy migration
|
||||||
|
|
||||||
An example of conversion of a legacy release-0.3 my.jsonnet file to release-0.8 can be found in [migration-example](./migration-example)
|
An example of conversion of a legacy release-0.3 my.jsonnet file to release-0.8 can be found in [migration-example](migration-example)
|
||||||
|
|
||||||
## Advanced usage examples
|
## Advanced usage examples
|
||||||
|
|
||||||
For more advanced usage examples you can take a look at those two, open to public, implementations:
|
For more advanced usage examples you can take a look at those two, open to public, implementations:
|
||||||
- [thaum-xyz/ankhmorpork][thaum] - extending kube-prometheus to adapt to a required environment
|
- [thaum-xyz/ankhmorpork](https://github.com/thaum-xyz/ankhmorpork/blob/master/apps/monitoring/jsonnet) - extending kube-prometheus to adapt to a required environment
|
||||||
- [openshift/cluster-monitoring-operator][openshift] - using kube-prometheus components as standalone libraries to build a custom solution
|
- [openshift/cluster-monitoring-operator](https://github.com/openshift/cluster-monitoring-operator/pull/1044) - using kube-prometheus components as standalone libraries to build a custom solution
|
||||||
|
|
||||||
## Final note
|
## Final note
|
||||||
|
|
||||||
Refactoring was a huge undertaking and possibly this document didn't describe in enough detail how to help you with migration to the new stack. If that is the case, please reach out to us by using [GitHub discussions][discussions] feature or directly on [#prometheus-operator kubernetes slack channel][slack].
|
Refactoring was a huge undertaking and possibly this document didn't describe in enough detail how to help you with migration to the new stack. If that is the case, please reach out to us by using [GitHub discussions](https://github.com/prometheus-operator/kube-prometheus/discussions) feature or directly on [#prometheus-operator kubernetes slack channel](http://slack.k8s.io/).
|
||||||
|
|
||||||
|
|
||||||
[antiaffinity]: https://github.com/prometheus-operator/kube-prometheus/blob/main/jsonnet/kube-prometheus/addons/anti-affinity.libsonnet
|
|
||||||
|
|
||||||
[node_exporter_defaults_example]: https://github.com/prometheus-operator/kube-prometheus/blob/1d2a0e275af97948667777739a18b24464480dc8/jsonnet/kube-prometheus/components/node-exporter.libsonnet#L3-L34
|
|
||||||
|
|
||||||
[openshift]: https://github.com/openshift/cluster-monitoring-operator/pull/1044
|
|
||||||
[thaum]: https://github.com/thaum-xyz/ankhmorpork/blob/master/apps/monitoring/jsonnet
|
|
||||||
|
|
||||||
[discussions]: https://github.com/prometheus-operator/kube-prometheus/discussions
|
|
||||||
[slack]: http://slack.k8s.io/
|
|
||||||
|
|||||||
@@ -1,23 +1,23 @@
|
|||||||
---
|
---
|
||||||
title: "Monitoring external etcd"
|
|
||||||
description: "This guide will help you monitor an external etcd cluster."
|
|
||||||
lead: "This guide will help you monitor an external etcd cluster."
|
|
||||||
date: 2021-03-08T23:04:32+01:00
|
|
||||||
draft: false
|
|
||||||
images: []
|
|
||||||
menu:
|
|
||||||
docs:
|
|
||||||
parent: "kube"
|
|
||||||
weight: 640
|
weight: 640
|
||||||
toc: true
|
toc: true
|
||||||
|
title: Monitoring external etcd
|
||||||
|
menu:
|
||||||
|
docs:
|
||||||
|
parent: kube
|
||||||
|
lead: This guide will help you monitor an external etcd cluster.
|
||||||
|
images: []
|
||||||
|
draft: false
|
||||||
|
description: This guide will help you monitor an external etcd cluster.
|
||||||
|
date: "2021-03-08T23:04:32+01:00"
|
||||||
---
|
---
|
||||||
|
|
||||||
When the etcd cluster is not hosted inside Kubernetes.
|
When the etcd cluster is not hosted inside Kubernetes.
|
||||||
This is often the case with Kubernetes setups. This approach has been tested with kube-aws but the same principals apply to other tools.
|
This is often the case with Kubernetes setups. This approach has been tested with kube-aws but the same principals apply to other tools.
|
||||||
|
|
||||||
Note that [etcd.jsonnet](../examples/etcd.jsonnet) & [kube-prometheus-static-etcd.libsonnet](../jsonnet/kube-prometheus/kube-prometheus-static-etcd.libsonnet) (which are described by a section of the [Readme](../README.md#static-etcd-configuration)) do the following:
|
Note that [etcd.jsonnet](../examples/etcd.jsonnet) & [static-etcd.libsonnet](../jsonnet/kube-prometheus/addons/static-etcd.libsonnet) (which are described by a section of the [Readme](../README.md#static-etcd-configuration)) do the following:
|
||||||
* Put the three etcd TLS client files (CA & cert & key) into a secret in the namespace, and have Prometheus Operator load the secret.
|
* Put the three etcd TLS client files (CA & cert & key) into a secret in the namespace, and have Prometheus Operator load the secret.
|
||||||
* Create the following (to expose etcd metrics - port 2379): a Service, Endpoint, & ServiceMonitor.
|
* Create the following (to expose etcd metrics - port 2379): a Service, Endpoint, & ServiceMonitor.
|
||||||
|
|
||||||
# Step 1: Open the port
|
# Step 1: Open the port
|
||||||
|
|
||||||
@@ -26,6 +26,7 @@ You now need to allow the nodes Prometheus are running on to talk to the etcd on
|
|||||||
If using kube-aws, you will need to edit the etcd security group inbound, specifying the security group of your Kubernetes node (worker) as the source.
|
If using kube-aws, you will need to edit the etcd security group inbound, specifying the security group of your Kubernetes node (worker) as the source.
|
||||||
|
|
||||||
## kube-aws and EIP or ENI inconsistency
|
## kube-aws and EIP or ENI inconsistency
|
||||||
|
|
||||||
With kube-aws, each etcd node has two IP addresses:
|
With kube-aws, each etcd node has two IP addresses:
|
||||||
|
|
||||||
* EC2 instance IP
|
* EC2 instance IP
|
||||||
@@ -40,6 +41,7 @@ Another idea woud be to use the DNS entries of etcd, but those are not currently
|
|||||||
# Step 2: verify
|
# Step 2: verify
|
||||||
|
|
||||||
Go to the Prometheus UI on :9090/config and check that you have an etcd job entry:
|
Go to the Prometheus UI on :9090/config and check that you have an etcd job entry:
|
||||||
|
|
||||||
```
|
```
|
||||||
- job_name: monitoring/etcd-k8s/0
|
- job_name: monitoring/etcd-k8s/0
|
||||||
scrape_interval: 30s
|
scrape_interval: 30s
|
||||||
@@ -48,6 +50,5 @@ Go to the Prometheus UI on :9090/config and check that you have an etcd job entr
|
|||||||
```
|
```
|
||||||
|
|
||||||
On the :9090/targets page:
|
On the :9090/targets page:
|
||||||
* You should see "etcd" with the UP state. If not, check the Error column for more information.
|
* You should see "etcd" with the UP state. If not, check the Error column for more information.
|
||||||
* If no "etcd" targets are even shown on this page, prometheus isn't attempting to scrape it.
|
* If no "etcd" targets are even shown on this page, prometheus isn't attempting to scrape it.
|
||||||
|
|
||||||
|
|||||||
@@ -1,24 +1,26 @@
|
|||||||
---
|
---
|
||||||
title: "Monitoring other Namespaces"
|
|
||||||
description: "This guide will help you monitor applications in other Namespaces."
|
|
||||||
lead: "This guide will help you monitor applications in other Namespaces."
|
|
||||||
date: 2021-03-08T23:04:32+01:00
|
|
||||||
draft: false
|
|
||||||
images: []
|
|
||||||
menu:
|
|
||||||
docs:
|
|
||||||
parent: "kube"
|
|
||||||
weight: 640
|
weight: 640
|
||||||
toc: true
|
toc: true
|
||||||
|
title: Monitoring other Namespaces
|
||||||
|
menu:
|
||||||
|
docs:
|
||||||
|
parent: kube
|
||||||
|
lead: This guide will help you monitor applications in other Namespaces.
|
||||||
|
images: []
|
||||||
|
draft: false
|
||||||
|
description: This guide will help you monitor applications in other Namespaces.
|
||||||
|
date: "2021-03-08T23:04:32+01:00"
|
||||||
---
|
---
|
||||||
|
|
||||||
This guide will help you monitor applications in other Namespaces. By default the RBAC rules are only enabled for the `Default` and `kube-system` Namespace during Install.
|
This guide will help you monitor applications in other Namespaces. By default the RBAC rules are only enabled for the `Default` and `kube-system` Namespace during Install.
|
||||||
|
|
||||||
# Setup
|
# Setup
|
||||||
|
|
||||||
You have to give the list of the Namespaces that you want to be able to monitor.
|
You have to give the list of the Namespaces that you want to be able to monitor.
|
||||||
This is done in the variable `prometheus.roleSpecificNamespaces`. You usually set this in your `.jsonnet` file when building the manifests.
|
This is done in the variable `prometheus.roleSpecificNamespaces`. You usually set this in your `.jsonnet` file when building the manifests.
|
||||||
|
|
||||||
Example to create the needed `Role` and `RoleBinding` for the Namespace `foo` :
|
Example to create the needed `Role` and `RoleBinding` for the Namespace `foo` :
|
||||||
|
|
||||||
```
|
```
|
||||||
local kp = (import 'kube-prometheus/main.libsonnet') + {
|
local kp = (import 'kube-prometheus/main.libsonnet') + {
|
||||||
_config+:: {
|
_config+:: {
|
||||||
|
|||||||
@@ -1,9 +1,11 @@
|
|||||||
# Setup Weave Net monitoring using kube-prometheus
|
# Setup Weave Net monitoring using kube-prometheus
|
||||||
|
|
||||||
[Weave Net](https://kubernetes.io/docs/concepts/cluster-administration/networking/#weave-net-from-weaveworks) is a resilient and simple to use CNI provider for Kubernetes. A well monitored and observed CNI provider helps in troubleshooting Kubernetes networking problems. [Weave Net](https://www.weave.works/docs/net/latest/concepts/how-it-works/) emits [prometheus metrics](https://www.weave.works/docs/net/latest/tasks/manage/metrics/) for monitoring Weave Net. There are many ways to install Weave Net in your cluster. One of them is using [kops](https://github.com/kubernetes/kops/blob/master/docs/networking.md).
|
[Weave Net](https://kubernetes.io/docs/concepts/cluster-administration/networking/#weave-net-from-weaveworks) is a resilient and simple to use CNI provider for Kubernetes. A well monitored and observed CNI provider helps in troubleshooting Kubernetes networking problems. [Weave Net](https://www.weave.works/docs/net/latest/concepts/how-it-works/) emits [prometheus metrics](https://www.weave.works/docs/net/latest/tasks/manage/metrics/) for monitoring Weave Net. There are many ways to install Weave Net in your cluster. One of them is using [kops](https://github.com/kubernetes/kops/blob/master/docs/networking.md).
|
||||||
|
|
||||||
Following this document, you can setup Weave Net monitoring for your cluster using kube-prometheus.
|
Following this document, you can setup Weave Net monitoring for your cluster using kube-prometheus.
|
||||||
|
|
||||||
## Contents
|
## Contents
|
||||||
|
|
||||||
Using kube-prometheus and kubectl you will be able install the following for monitoring Weave Net in your cluster:
|
Using kube-prometheus and kubectl you will be able install the following for monitoring Weave Net in your cluster:
|
||||||
|
|
||||||
1. [Service for Weave Net](https://gist.github.com/alok87/379c6234b582f555c141f6fddea9fbce) The service which the [service monitor](https://coreos.com/operators/prometheus/docs/latest/user-guides/cluster-monitoring.html) scrapes.
|
1. [Service for Weave Net](https://gist.github.com/alok87/379c6234b582f555c141f6fddea9fbce) The service which the [service monitor](https://coreos.com/operators/prometheus/docs/latest/user-guides/cluster-monitoring.html) scrapes.
|
||||||
@@ -15,8 +17,7 @@ Using kube-prometheus and kubectl you will be able install the following for mon
|
|||||||
## Instructions
|
## Instructions
|
||||||
- You can monitor Weave Net using an example like below. **Please note that some alert configurations are environment specific and may require modifications of alert thresholds**. For example: The FastDP flows have never gone below 15000 for us. But if this value is say 20000 for you then you can use an example like below to update the alert. The alerts which may require threshold modifications are `WeaveNetFastDPFlowsLow` and `WeaveNetIPAMUnreachable`.
|
- You can monitor Weave Net using an example like below. **Please note that some alert configurations are environment specific and may require modifications of alert thresholds**. For example: The FastDP flows have never gone below 15000 for us. But if this value is say 20000 for you then you can use an example like below to update the alert. The alerts which may require threshold modifications are `WeaveNetFastDPFlowsLow` and `WeaveNetIPAMUnreachable`.
|
||||||
|
|
||||||
[embedmd]:# (../examples/weave-net-example.jsonnet)
|
```jsonnet mdox-exec="cat examples/weave-net-example.jsonnet"
|
||||||
```jsonnet
|
|
||||||
local kp = (import 'kube-prometheus/main.libsonnet') +
|
local kp = (import 'kube-prometheus/main.libsonnet') +
|
||||||
(import 'kube-prometheus/addons/weave-net/weave-net.libsonnet') + {
|
(import 'kube-prometheus/addons/weave-net/weave-net.libsonnet') + {
|
||||||
values+:: {
|
values+:: {
|
||||||
@@ -66,6 +67,7 @@ local kp = (import 'kube-prometheus/main.libsonnet') +
|
|||||||
```
|
```
|
||||||
|
|
||||||
- After you have the required yamls file please run
|
- After you have the required yamls file please run
|
||||||
|
|
||||||
```
|
```
|
||||||
kubectl create -f prometheus-serviceWeaveNet.yaml
|
kubectl create -f prometheus-serviceWeaveNet.yaml
|
||||||
kubectl create -f prometheus-serviceMonitorWeaveNet.yaml
|
kubectl create -f prometheus-serviceMonitorWeaveNet.yaml
|
||||||
|
|||||||
@@ -1,11 +1,10 @@
|
|||||||
# Windows
|
# Windows
|
||||||
|
|
||||||
The [Windows addon](../examples/windows.jsonnet) adds the dashboards and rules from [kubernetes-monitoring/kubernetes-mixin](https://github.com/kubernetes-monitoring/kubernetes-mixin#dashboards-for-windows-nodes).
|
The [Windows addon](../examples/windows.jsonnet) adds the dashboards and rules from [kubernetes-monitoring/kubernetes-mixin](https://github.com/kubernetes-monitoring/kubernetes-mixin#dashboards-for-windows-nodes).
|
||||||
|
|
||||||
Currently, Windows does not support running with [windows_exporter](https://github.com/prometheus-community/windows_exporter) in a pod so this add on uses [additional scrape configuration](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/additional-scrape-config.md) to set up a static config to scrape the node ports where windows_exporter is configured.
|
Currently, Windows does not support running with [windows_exporter](https://github.com/prometheus-community/windows_exporter) in a pod so this add on uses [additional scrape configuration](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/additional-scrape-config.md) to set up a static config to scrape the node ports where windows_exporter is configured.
|
||||||
|
|
||||||
|
The addon requires you to specify the node ips and ports where it can find the windows_exporter. See the [full example](../examples/windows.jsonnet) for setup.
|
||||||
The addon requires you to specify the node ips and ports where it can find the windows_exporter. See the [full example](../examples/windows.jsonnet) for setup.
|
|
||||||
|
|
||||||
```
|
```
|
||||||
local kp = (import 'kube-prometheus/main.libsonnet') +
|
local kp = (import 'kube-prometheus/main.libsonnet') +
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ local kp = (import 'kube-prometheus/main.libsonnet') + {
|
|||||||
],
|
],
|
||||||
selector: {
|
selector: {
|
||||||
matchLabels: {
|
matchLabels: {
|
||||||
app: 'myapp',
|
'app.kubernetes.io/name': 'myapp',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -19,4 +19,4 @@ spec:
|
|||||||
- logging
|
- logging
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app: myapp
|
app.kubernetes.io/name: myapp
|
||||||
@@ -7,7 +7,7 @@ metadata:
|
|||||||
namespace: default
|
namespace: default
|
||||||
spec:
|
spec:
|
||||||
selector:
|
selector:
|
||||||
app: example-app
|
app.kubernetes.io/name: example-app
|
||||||
ports:
|
ports:
|
||||||
- name: web
|
- name: web
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
@@ -22,13 +22,13 @@ metadata:
|
|||||||
spec:
|
spec:
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app: example-app
|
app.kubernetes.io/name: example-app
|
||||||
version: 1.1.3
|
version: 1.1.3
|
||||||
replicas: 4
|
replicas: 4
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app: example-app
|
app.kubernetes.io/name: example-app
|
||||||
version: 1.1.3
|
version: 1.1.3
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
|
|||||||
36
examples/grafana-ldap.jsonnet
Normal file
36
examples/grafana-ldap.jsonnet
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
local kp =
|
||||||
|
(import 'kube-prometheus/main.libsonnet') +
|
||||||
|
{
|
||||||
|
values+:: {
|
||||||
|
common+: {
|
||||||
|
namespace: 'monitoring',
|
||||||
|
},
|
||||||
|
grafana+: {
|
||||||
|
config+: {
|
||||||
|
sections: {
|
||||||
|
'auth.ldap': {
|
||||||
|
enabled: true,
|
||||||
|
config_file: '/etc/grafana/ldap.toml',
|
||||||
|
allow_sign_up: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
ldap: |||
|
||||||
|
[[servers]]
|
||||||
|
host = "127.0.0.1"
|
||||||
|
port = 389
|
||||||
|
use_ssl = false
|
||||||
|
start_tls = false
|
||||||
|
ssl_skip_verify = false
|
||||||
|
|
||||||
|
bind_dn = "cn=admins,dc=example,dc=com"
|
||||||
|
bind_password = 'grafana'
|
||||||
|
|
||||||
|
search_filter = "(cn=%s)"
|
||||||
|
search_base_dns = ["dc=example,dc=com"]
|
||||||
|
|||,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
{ ['grafana-' + name]: kp.grafana[name] for name in std.objectFields(kp.grafana) }
|
||||||
@@ -31,6 +31,10 @@ local withImageRepository(repository) = {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
{
|
||||||
|
imageName:: imageName,
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
withImageRepository:: withImageRepository,
|
withImageRepository:: withImageRepository,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -81,7 +81,7 @@
|
|||||||
},
|
},
|
||||||
|
|
||||||
deployment:
|
deployment:
|
||||||
local podLabels = { app: 'ksm-autoscaler' };
|
local podLabels = { 'app.kubernetes.io/name': 'ksm-autoscaler' };
|
||||||
local c = {
|
local c = {
|
||||||
name: 'ksm-autoscaler',
|
name: 'ksm-autoscaler',
|
||||||
image: $.values.clusterVerticalAutoscaler.image,
|
image: $.values.clusterVerticalAutoscaler.image,
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ local defaults = {
|
|||||||
if !std.setMember(labelName, ['app.kubernetes.io/version'])
|
if !std.setMember(labelName, ['app.kubernetes.io/version'])
|
||||||
},
|
},
|
||||||
name: error 'must provide name',
|
name: error 'must provide name',
|
||||||
|
reloaderPort: 8080,
|
||||||
config: {
|
config: {
|
||||||
global: {
|
global: {
|
||||||
resolve_timeout: '5m',
|
resolve_timeout: '5m',
|
||||||
@@ -136,9 +137,9 @@ function(params) {
|
|||||||
spec: {
|
spec: {
|
||||||
ports: [
|
ports: [
|
||||||
{ name: 'web', targetPort: 'web', port: 9093 },
|
{ name: 'web', targetPort: 'web', port: 9093 },
|
||||||
|
{ name: 'reloader-web', port: am._config.reloaderPort, targetPort: 'reloader-web' },
|
||||||
],
|
],
|
||||||
selector: {
|
selector: {
|
||||||
app: 'alertmanager',
|
|
||||||
alertmanager: am._config.name,
|
alertmanager: am._config.name,
|
||||||
} + am._config.selectorLabels,
|
} + am._config.selectorLabels,
|
||||||
sessionAffinity: 'ClientIP',
|
sessionAffinity: 'ClientIP',
|
||||||
@@ -161,12 +162,13 @@ function(params) {
|
|||||||
},
|
},
|
||||||
endpoints: [
|
endpoints: [
|
||||||
{ port: 'web', interval: '30s' },
|
{ port: 'web', interval: '30s' },
|
||||||
|
{ port: 'reloader-web', interval: '30s' },
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
[if (defaults + params).replicas > 1 then 'podDisruptionBudget']: {
|
[if (defaults + params).replicas > 1 then 'podDisruptionBudget']: {
|
||||||
apiVersion: 'policy/v1beta1',
|
apiVersion: 'policy/v1',
|
||||||
kind: 'PodDisruptionBudget',
|
kind: 'PodDisruptionBudget',
|
||||||
metadata: {
|
metadata: {
|
||||||
name: 'alertmanager-' + am._config.name,
|
name: 'alertmanager-' + am._config.name,
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
local kubernetesGrafana = import 'github.com/brancz/kubernetes-grafana/grafana/grafana.libsonnet';
|
||||||
|
|
||||||
local defaults = {
|
local defaults = {
|
||||||
local defaults = self,
|
local defaults = self,
|
||||||
name: 'grafana',
|
name: 'grafana',
|
||||||
@@ -20,86 +22,35 @@ local defaults = {
|
|||||||
if !std.setMember(labelName, ['app.kubernetes.io/version'])
|
if !std.setMember(labelName, ['app.kubernetes.io/version'])
|
||||||
},
|
},
|
||||||
prometheusName: error 'must provide prometheus name',
|
prometheusName: error 'must provide prometheus name',
|
||||||
dashboards: {},
|
|
||||||
// TODO(paulfantom): expose those to have a stable API. After kubernetes-grafana refactor those could probably be removed.
|
|
||||||
rawDashboards: {},
|
|
||||||
folderDashboards: {},
|
|
||||||
containers: [],
|
|
||||||
datasources: [],
|
|
||||||
config: {},
|
|
||||||
plugins: [],
|
|
||||||
env: [],
|
|
||||||
};
|
};
|
||||||
|
|
||||||
function(params) {
|
function(params)
|
||||||
local g = self,
|
local config = defaults + params;
|
||||||
_config:: defaults + params,
|
|
||||||
// Safety check
|
// Safety check
|
||||||
assert std.isObject(g._config.resources),
|
assert std.isObject(config.resources);
|
||||||
|
|
||||||
local glib = (import 'github.com/brancz/kubernetes-grafana/grafana/grafana.libsonnet') + {
|
kubernetesGrafana(config) {
|
||||||
_config+:: {
|
local g = self,
|
||||||
namespace: g._config.namespace,
|
_config+:: config,
|
||||||
versions+:: {
|
|
||||||
grafana: g._config.version,
|
serviceMonitor: {
|
||||||
},
|
apiVersion: 'monitoring.coreos.com/v1',
|
||||||
imageRepos+:: {
|
kind: 'ServiceMonitor',
|
||||||
grafana: std.split(g._config.image, ':')[0],
|
metadata: {
|
||||||
},
|
name: 'grafana',
|
||||||
prometheus+:: {
|
namespace: g._config.namespace,
|
||||||
name: g._config.prometheusName,
|
|
||||||
},
|
|
||||||
grafana+:: {
|
|
||||||
labels: g._config.commonLabels,
|
labels: g._config.commonLabels,
|
||||||
dashboards: g._config.dashboards,
|
|
||||||
resources: g._config.resources,
|
|
||||||
rawDashboards: g._config.rawDashboards,
|
|
||||||
folderDashboards: g._config.folderDashboards,
|
|
||||||
containers: g._config.containers,
|
|
||||||
config+: g._config.config,
|
|
||||||
plugins+: g._config.plugins,
|
|
||||||
env: g._config.env,
|
|
||||||
} + (
|
|
||||||
// Conditionally overwrite default setting.
|
|
||||||
if std.length(g._config.datasources) > 0 then
|
|
||||||
{ datasources: g._config.datasources }
|
|
||||||
else {}
|
|
||||||
),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
|
|
||||||
config: glib.grafana.config,
|
|
||||||
service: glib.grafana.service,
|
|
||||||
serviceAccount: glib.grafana.serviceAccount,
|
|
||||||
deployment: glib.grafana.deployment,
|
|
||||||
dashboardDatasources: glib.grafana.dashboardDatasources,
|
|
||||||
dashboardSources: glib.grafana.dashboardSources,
|
|
||||||
|
|
||||||
dashboardDefinitions: if std.length(g._config.dashboards) > 0 ||
|
|
||||||
std.length(g._config.rawDashboards) > 0 ||
|
|
||||||
std.length(g._config.folderDashboards) > 0 then {
|
|
||||||
apiVersion: 'v1',
|
|
||||||
kind: 'ConfigMapList',
|
|
||||||
items: glib.grafana.dashboardDefinitions,
|
|
||||||
},
|
|
||||||
serviceMonitor: {
|
|
||||||
apiVersion: 'monitoring.coreos.com/v1',
|
|
||||||
kind: 'ServiceMonitor',
|
|
||||||
metadata: {
|
|
||||||
name: 'grafana',
|
|
||||||
namespace: g._config.namespace,
|
|
||||||
labels: g._config.commonLabels,
|
|
||||||
},
|
|
||||||
spec: {
|
|
||||||
selector: {
|
|
||||||
matchLabels: {
|
|
||||||
'app.kubernetes.io/name': 'grafana',
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
endpoints: [{
|
spec: {
|
||||||
port: 'http',
|
selector: {
|
||||||
interval: '15s',
|
matchLabels: {
|
||||||
}],
|
'app.kubernetes.io/name': 'grafana',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
endpoints: [{
|
||||||
|
port: 'http',
|
||||||
|
interval: '15s',
|
||||||
|
}],
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
}
|
||||||
}
|
|
||||||
|
|||||||
@@ -127,9 +127,7 @@ function(params) {
|
|||||||
action: 'drop',
|
action: 'drop',
|
||||||
regex: '(' + std.join('|',
|
regex: '(' + std.join('|',
|
||||||
[
|
[
|
||||||
'container_fs_.*', // add filesystem read/write data (nodes*disks*services*4)
|
|
||||||
'container_spec_.*', // everything related to cgroup specification and thus static data (nodes*services*5)
|
'container_spec_.*', // everything related to cgroup specification and thus static data (nodes*services*5)
|
||||||
'container_blkio_device_usage_total', // useful for containers, but not for system services (nodes*disks*services*operations*2)
|
|
||||||
'container_file_descriptors', // file descriptors limits and global numbers are exposed via (nodes*services)
|
'container_file_descriptors', // file descriptors limits and global numbers are exposed via (nodes*services)
|
||||||
'container_sockets', // used sockets in cgroup. Usually not important for system services (nodes*services)
|
'container_sockets', // used sockets in cgroup. Usually not important for system services (nodes*services)
|
||||||
'container_threads_max', // max number of threads in cgroup. Usually for system services it is not limited (nodes*services)
|
'container_threads_max', // max number of threads in cgroup. Usually for system services it is not limited (nodes*services)
|
||||||
@@ -138,6 +136,14 @@ function(params) {
|
|||||||
'container_last_seen', // not needed as system services are always running (nodes*services)
|
'container_last_seen', // not needed as system services are always running (nodes*services)
|
||||||
]) + ');;',
|
]) + ');;',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
sourceLabels: ['__name__', 'container'],
|
||||||
|
action: 'drop',
|
||||||
|
regex: '(' + std.join('|',
|
||||||
|
[
|
||||||
|
'container_blkio_device_usage_total',
|
||||||
|
]) + ');.+',
|
||||||
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ local defaults = {
|
|||||||
limits: { cpu: '250m', memory: '180Mi' },
|
limits: { cpu: '250m', memory: '180Mi' },
|
||||||
},
|
},
|
||||||
listenAddress: '127.0.0.1',
|
listenAddress: '127.0.0.1',
|
||||||
|
filesystemMountPointsExclude: '^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)',
|
||||||
port: 9100,
|
port: 9100,
|
||||||
commonLabels:: {
|
commonLabels:: {
|
||||||
'app.kubernetes.io/name': defaults.name,
|
'app.kubernetes.io/name': defaults.name,
|
||||||
@@ -180,7 +181,7 @@ function(params) {
|
|||||||
'--path.rootfs=/host/root',
|
'--path.rootfs=/host/root',
|
||||||
'--no-collector.wifi',
|
'--no-collector.wifi',
|
||||||
'--no-collector.hwmon',
|
'--no-collector.hwmon',
|
||||||
'--collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)',
|
'--collector.filesystem.mount-points-exclude=' + ne._config.filesystemMountPointsExclude,
|
||||||
// NOTE: ignore veth network interface associated with containers.
|
// NOTE: ignore veth network interface associated with containers.
|
||||||
// OVN renames veth.* to <rand-hex>@if<X> where X is /sys/class/net/<if>/ifindex
|
// OVN renames veth.* to <rand-hex>@if<X> where X is /sys/class/net/<if>/ifindex
|
||||||
// thus [a-z0-9] regex below
|
// thus [a-z0-9] regex below
|
||||||
|
|||||||
@@ -172,6 +172,21 @@ function(params) {
|
|||||||
insecureSkipVerify: true,
|
insecureSkipVerify: true,
|
||||||
},
|
},
|
||||||
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
bearerTokenFile: '/var/run/secrets/kubernetes.io/serviceaccount/token',
|
||||||
|
metricRelabelings: [
|
||||||
|
{
|
||||||
|
sourceLabels: ['__name__'],
|
||||||
|
action: 'drop',
|
||||||
|
regex: '(' + std.join('|',
|
||||||
|
[
|
||||||
|
'apiserver_client_certificate_.*', // The only client supposed to connect to the aggregated API is the apiserver so it is not really meaningful to monitor its certificate.
|
||||||
|
'apiserver_envelope_.*', // Prometheus-adapter isn't using envelope for storage.
|
||||||
|
'apiserver_flowcontrol_.*', // Prometheus-adapter isn't using flowcontrol.
|
||||||
|
'apiserver_storage_.*', // Prometheus-adapter isn't using the apiserver storage.
|
||||||
|
'apiserver_webhooks_.*', // Prometeus-adapter doesn't make use of apiserver webhooks.
|
||||||
|
'workqueue_.*', // Metrics related to the internal apiserver auth workqueues are not very useful to prometheus-adapter.
|
||||||
|
]) + ')',
|
||||||
|
},
|
||||||
|
],
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
@@ -363,7 +378,7 @@ function(params) {
|
|||||||
},
|
},
|
||||||
|
|
||||||
[if (defaults + params).replicas > 1 then 'podDisruptionBudget']: {
|
[if (defaults + params).replicas > 1 then 'podDisruptionBudget']: {
|
||||||
apiVersion: 'policy/v1beta1',
|
apiVersion: 'policy/v1',
|
||||||
kind: 'PodDisruptionBudget',
|
kind: 'PodDisruptionBudget',
|
||||||
metadata: {
|
metadata: {
|
||||||
name: pa._config.name,
|
name: pa._config.name,
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ local defaults = {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
thanos: null,
|
thanos: null,
|
||||||
|
reloaderPort: 8080,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -58,6 +59,7 @@ function(params) {
|
|||||||
targetGroups: {},
|
targetGroups: {},
|
||||||
sidecar: {
|
sidecar: {
|
||||||
selector: p._config.mixin._config.thanosSelector,
|
selector: p._config.mixin._config.thanosSelector,
|
||||||
|
thanosPrometheusCommonDimensions: 'namespace, pod',
|
||||||
dimensions: std.join(', ', ['job', 'instance']),
|
dimensions: std.join(', ', ['job', 'instance']),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -98,13 +100,14 @@ function(params) {
|
|||||||
spec: {
|
spec: {
|
||||||
ports: [
|
ports: [
|
||||||
{ name: 'web', targetPort: 'web', port: 9090 },
|
{ name: 'web', targetPort: 'web', port: 9090 },
|
||||||
|
{ name: 'reloader-web', port: p._config.reloaderPort, targetPort: 'reloader-web' },
|
||||||
] +
|
] +
|
||||||
(
|
(
|
||||||
if p._config.thanos != null then
|
if p._config.thanos != null then
|
||||||
[{ name: 'grpc', port: 10901, targetPort: 10901 }]
|
[{ name: 'grpc', port: 10901, targetPort: 10901 }]
|
||||||
else []
|
else []
|
||||||
),
|
),
|
||||||
selector: { app: 'prometheus' } + p._config.selectorLabels,
|
selector: p._config.selectorLabels,
|
||||||
sessionAffinity: 'ClientIP',
|
sessionAffinity: 'ClientIP',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -243,7 +246,7 @@ function(params) {
|
|||||||
},
|
},
|
||||||
|
|
||||||
[if (defaults + params).replicas > 1 then 'podDisruptionBudget']: {
|
[if (defaults + params).replicas > 1 then 'podDisruptionBudget']: {
|
||||||
apiVersion: 'policy/v1beta1',
|
apiVersion: 'policy/v1',
|
||||||
kind: 'PodDisruptionBudget',
|
kind: 'PodDisruptionBudget',
|
||||||
metadata: {
|
metadata: {
|
||||||
name: 'prometheus-' + p._config.name,
|
name: 'prometheus-' + p._config.name,
|
||||||
@@ -317,10 +320,10 @@ function(params) {
|
|||||||
selector: {
|
selector: {
|
||||||
matchLabels: p._config.selectorLabels,
|
matchLabels: p._config.selectorLabels,
|
||||||
},
|
},
|
||||||
endpoints: [{
|
endpoints: [
|
||||||
port: 'web',
|
{ port: 'web', interval: '30s' },
|
||||||
interval: '30s',
|
{ port: 'reloader-web', interval: '30s' },
|
||||||
}],
|
],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ local utils = import './lib/utils.libsonnet';
|
|||||||
images: {
|
images: {
|
||||||
alertmanager: 'quay.io/prometheus/alertmanager:v' + $.values.common.versions.alertmanager,
|
alertmanager: 'quay.io/prometheus/alertmanager:v' + $.values.common.versions.alertmanager,
|
||||||
blackboxExporter: 'quay.io/prometheus/blackbox-exporter:v' + $.values.common.versions.blackboxExporter,
|
blackboxExporter: 'quay.io/prometheus/blackbox-exporter:v' + $.values.common.versions.blackboxExporter,
|
||||||
grafana: 'grafana/grafana:v' + $.values.common.versions.grafana,
|
grafana: 'grafana/grafana:' + $.values.common.versions.grafana,
|
||||||
kubeStateMetrics: 'k8s.gcr.io/kube-state-metrics/kube-state-metrics:v' + $.values.common.versions.kubeStateMetrics,
|
kubeStateMetrics: 'k8s.gcr.io/kube-state-metrics/kube-state-metrics:v' + $.values.common.versions.kubeStateMetrics,
|
||||||
nodeExporter: 'quay.io/prometheus/node-exporter:v' + $.values.common.versions.nodeExporter,
|
nodeExporter: 'quay.io/prometheus/node-exporter:v' + $.values.common.versions.nodeExporter,
|
||||||
prometheus: 'quay.io/prometheus/prometheus:v' + $.values.common.versions.prometheus,
|
prometheus: 'quay.io/prometheus/prometheus:v' + $.values.common.versions.prometheus,
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
# Adding a new platform specific configuration
|
# Adding a new platform specific configuration
|
||||||
|
|
||||||
Adding a new platform specific configuration requires to update the [README](../../../README.md#cluster-creation-tools) and the [platforms.jsonnet](./platform.jsonnet) file by adding the platform to the list of existing ones. This allow the new platform to be discoverable and easily configurable by the users.
|
Adding a new platform specific configuration requires to update the [README](../../../README.md#cluster-creation-tools) and the [platforms.libsonnet](platforms.libsonnet) file by adding the platform to the list of existing ones. This allow the new platform to be discoverable and easily configurable by the users.
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"alertmanager": "0.23.0",
|
"alertmanager": "0.23.0",
|
||||||
"blackboxExporter": "0.19.0",
|
"blackboxExporter": "0.19.0",
|
||||||
"grafana": "8.1.3",
|
"grafana": "8.2.1",
|
||||||
"kubeStateMetrics": "2.2.0",
|
"kubeStateMetrics": "2.2.3",
|
||||||
"nodeExporter": "1.2.2",
|
"nodeExporter": "1.2.2",
|
||||||
"prometheus": "2.29.2",
|
"prometheus": "2.30.3",
|
||||||
"prometheusAdapter": "0.9.0",
|
"prometheusAdapter": "0.9.1",
|
||||||
"prometheusOperator": "0.50.0",
|
"prometheusOperator": "0.51.2",
|
||||||
"kubeRbacProxy": "0.11.0",
|
"kubeRbacProxy": "0.11.0",
|
||||||
"configmapReload": "0.5.0"
|
"configmapReload": "0.5.0"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,8 +8,8 @@
|
|||||||
"subdir": "grafana"
|
"subdir": "grafana"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "c3b14b24b83cfe9abf1064649d19e2d679f033fb",
|
"version": "199e363523104ff8b3a12483a4e3eca86372b078",
|
||||||
"sum": "YrE4DNQsWgYWs6h0j/FjQETt8xDXdYdsslb1WK7xQEk="
|
"sum": "/jDHzVAjHB4AOLkJHw1GyATX5ogZ1iMdcJXZAgaG3+g="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -18,8 +18,8 @@
|
|||||||
"subdir": "contrib/mixin"
|
"subdir": "contrib/mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "c2937d78d2722d774f69dbf91a956f382d32f4d3",
|
"version": "38a7d79810bd273bd078bf0931480b743afee003",
|
||||||
"sum": "5XhYOigrKipOWDbIn9hlrz7JcbelzvJnormxSaup9JI="
|
"sum": "drRRtMPhvpUZ8v7Weqz7Cg2pwDA2cSb6X1pjBPoCx1w="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -28,8 +28,8 @@
|
|||||||
"subdir": "grafonnet"
|
"subdir": "grafonnet"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "05fb200ee1a1816fc1b4c522071d5606d8dd71c1",
|
"version": "3626fc4dc2326931c530861ac5bebe39444f6cbf",
|
||||||
"sum": "mEoObbqbyVaXrHFEJSM2Nad31tOvadzIevWuyNHHBgI="
|
"sum": "gF8foHByYcB25jcUOBqP6jxk0OPifQMjPvKY0HaCk6w="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -38,8 +38,8 @@
|
|||||||
"subdir": "grafana-builder"
|
"subdir": "grafana-builder"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "746874e4836a4bfbb7034d32de0c98ab1282aaae",
|
"version": "87b6b50706dfa57b2470470422770f8e7574b7db",
|
||||||
"sum": "GRf2GvwEU4jhXV+JOonXSZ4wdDv8mnHBPCQ6TUVd+g8="
|
"sum": "U34Nd1ViO2LZ3D8IzygPPRfUcy6zOgCnTMVHZ+9O/QE="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -48,8 +48,8 @@
|
|||||||
"subdir": ""
|
"subdir": ""
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "2b27a09a667091cef74776b690ccceaf55995e29",
|
"version": "8dc2c0d69f762d943c5bfbdcc17645e346d610ca",
|
||||||
"sum": "j2jPdrcM3iuaUK+6V9jWn2M3Fapr0KtI8FZ1KQoHIGA="
|
"sum": "TamniMXp0Jy6E5OMOYtcrTJ1P+rFTVNuiOZSkxvckb8="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -58,7 +58,7 @@
|
|||||||
"subdir": "lib/promgrafonnet"
|
"subdir": "lib/promgrafonnet"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "2b27a09a667091cef74776b690ccceaf55995e29",
|
"version": "8dc2c0d69f762d943c5bfbdcc17645e346d610ca",
|
||||||
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
|
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -68,8 +68,8 @@
|
|||||||
"subdir": "jsonnet/kube-state-metrics"
|
"subdir": "jsonnet/kube-state-metrics"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "d111b6d8e07f8dde1dfe7e688f44242e4aa4f734",
|
"version": "b730cb415234509e6a1425c79e826f2e7688d27b",
|
||||||
"sum": "S5qI+PJUdNeYOv76jH5nxwYS9N6U7CRxvyuB1wI4cTE="
|
"sum": "U1wzIpTAtOvC1yj43Y8PfvT0JfvnAcMfNH12Wi+ab0Y="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -78,7 +78,7 @@
|
|||||||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
"subdir": "jsonnet/kube-state-metrics-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "d111b6d8e07f8dde1dfe7e688f44242e4aa4f734",
|
"version": "b730cb415234509e6a1425c79e826f2e7688d27b",
|
||||||
"sum": "u8gaydJoxEjzizQ8jY8xSjYgWooPmxw+wIWdDxifMAk="
|
"sum": "u8gaydJoxEjzizQ8jY8xSjYgWooPmxw+wIWdDxifMAk="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -88,8 +88,8 @@
|
|||||||
"subdir": "jsonnet/mixin"
|
"subdir": "jsonnet/mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "2c81b0cf6a5673e08057499a08ddce396b19dda4",
|
"version": "f710e9d66a09efdb8edc144af555718b7d7ed2e3",
|
||||||
"sum": "6reUygVmQrLEWQzTKcH8ceDbvM+2ztK3z2VBR2K2l+U=",
|
"sum": "qZ4WgiweaE6eeKtFK60QUjLO8sf2L9Q8fgafWvDcyfY=",
|
||||||
"name": "prometheus-operator-mixin"
|
"name": "prometheus-operator-mixin"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -99,8 +99,8 @@
|
|||||||
"subdir": "jsonnet/prometheus-operator"
|
"subdir": "jsonnet/prometheus-operator"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "2c81b0cf6a5673e08057499a08ddce396b19dda4",
|
"version": "f710e9d66a09efdb8edc144af555718b7d7ed2e3",
|
||||||
"sum": "WUuFzKqxzxmTWLeic/IU1SMjdCV/zClt11MHucJ9MSc="
|
"sum": "4e3A/CccaxvLdWFPKJlC/P9RbPhSX6cH/Nj8+N1DBzg="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -109,7 +109,7 @@
|
|||||||
"subdir": "doc/alertmanager-mixin"
|
"subdir": "doc/alertmanager-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "44011410d7065487789c447ce55157ae6e0b917d",
|
"version": "1b8afe7cb5aafe59442e35979ec57401145ea26b",
|
||||||
"sum": "pep+dHzfIjh2SU5pEkwilMCAT/NoL6YYflV4x8cr7vU=",
|
"sum": "pep+dHzfIjh2SU5pEkwilMCAT/NoL6YYflV4x8cr7vU=",
|
||||||
"name": "alertmanager"
|
"name": "alertmanager"
|
||||||
},
|
},
|
||||||
@@ -120,8 +120,8 @@
|
|||||||
"subdir": "docs/node-mixin"
|
"subdir": "docs/node-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "dc68e035a5b37a9a3b47e1547f07d96df29ba575",
|
"version": "a59b2d89903229db0019f73200ec209758f2fd26",
|
||||||
"sum": "OFNs9Te1QMqSscXqNqMv0zwaJoJxaEg7NyQVNyT4VeA="
|
"sum": "Yr1xB+EEdBYRbsCtl4MDvx6phDg3UoMQtfpWADHyeGk="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -130,7 +130,7 @@
|
|||||||
"subdir": "documentation/prometheus-mixin"
|
"subdir": "documentation/prometheus-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "46286cb6abfff961e8c257de091443e835ec444f",
|
"version": "c092a74be9cc3e8e3db41efe3136128cef6c1add",
|
||||||
"sum": "m4VHwft4fUcxzL4+52lLZG/V5aH5ZEdjaweb88vISL0=",
|
"sum": "m4VHwft4fUcxzL4+52lLZG/V5aH5ZEdjaweb88vISL0=",
|
||||||
"name": "prometheus"
|
"name": "prometheus"
|
||||||
},
|
},
|
||||||
@@ -141,8 +141,8 @@
|
|||||||
"subdir": "mixin"
|
"subdir": "mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "2dd8c22e8c15f5ec0daaa07ae20be44bed419aa5",
|
"version": "d2d53e575b489a8cbfc9e1723d0e3f62a68faf39",
|
||||||
"sum": "X+060DnePPeN/87fgj0SrfxVitywTk8hZA9V4nHxl1g=",
|
"sum": "Og+wEHfgzXBvBLAeeQvGNoiCw3FY4LQHlJdpsG/owj8=",
|
||||||
"name": "thanos-mixin"
|
"name": "thanos-mixin"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
apiVersion: policy/v1beta1
|
apiVersion: policy/v1
|
||||||
kind: PodDisruptionBudget
|
kind: PodDisruptionBudget
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
|
|||||||
@@ -16,7 +16,8 @@ spec:
|
|||||||
rules:
|
rules:
|
||||||
- alert: AlertmanagerFailedReload
|
- alert: AlertmanagerFailedReload
|
||||||
annotations:
|
annotations:
|
||||||
description: Configuration has failed to load for {{ $labels.namespace }}/{{ $labels.pod}}.
|
description: Configuration has failed to load for {{ $labels.namespace }}/{{
|
||||||
|
$labels.pod}}.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedreload
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedreload
|
||||||
summary: Reloading an Alertmanager configuration has failed.
|
summary: Reloading an Alertmanager configuration has failed.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -28,9 +29,11 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: AlertmanagerMembersInconsistent
|
- alert: AlertmanagerMembersInconsistent
|
||||||
annotations:
|
annotations:
|
||||||
description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has only found {{ $value }} members of the {{$labels.job}} cluster.
|
description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has only
|
||||||
|
found {{ $value }} members of the {{$labels.job}} cluster.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagermembersinconsistent
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagermembersinconsistent
|
||||||
summary: A member of an Alertmanager cluster has not found all other cluster members.
|
summary: A member of an Alertmanager cluster has not found all other cluster
|
||||||
|
members.
|
||||||
expr: |
|
expr: |
|
||||||
# Without max_over_time, failed scrapes could create false negatives, see
|
# Without max_over_time, failed scrapes could create false negatives, see
|
||||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
@@ -42,7 +45,9 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: AlertmanagerFailedToSendAlerts
|
- alert: AlertmanagerFailedToSendAlerts
|
||||||
annotations:
|
annotations:
|
||||||
description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed to send {{ $value | humanizePercentage }} of notifications to {{ $labels.integration }}.
|
description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed
|
||||||
|
to send {{ $value | humanizePercentage }} of notifications to {{ $labels.integration
|
||||||
|
}}.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedtosendalerts
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerfailedtosendalerts
|
||||||
summary: An Alertmanager instance failed to send notifications.
|
summary: An Alertmanager instance failed to send notifications.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -57,9 +62,12 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: AlertmanagerClusterFailedToSendAlerts
|
- alert: AlertmanagerClusterFailedToSendAlerts
|
||||||
annotations:
|
annotations:
|
||||||
description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}.
|
description: The minimum notification failure rate to {{ $labels.integration
|
||||||
|
}} sent from any instance in the {{$labels.job}} cluster is {{ $value |
|
||||||
|
humanizePercentage }}.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts
|
||||||
summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration.
|
summary: All Alertmanager instances in a cluster failed to send notifications
|
||||||
|
to a critical integration.
|
||||||
expr: |
|
expr: |
|
||||||
min by (namespace,service, integration) (
|
min by (namespace,service, integration) (
|
||||||
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m])
|
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m])
|
||||||
@@ -72,9 +80,12 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: AlertmanagerClusterFailedToSendAlerts
|
- alert: AlertmanagerClusterFailedToSendAlerts
|
||||||
annotations:
|
annotations:
|
||||||
description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}.
|
description: The minimum notification failure rate to {{ $labels.integration
|
||||||
|
}} sent from any instance in the {{$labels.job}} cluster is {{ $value |
|
||||||
|
humanizePercentage }}.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts
|
||||||
summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration.
|
summary: All Alertmanager instances in a cluster failed to send notifications
|
||||||
|
to a non-critical integration.
|
||||||
expr: |
|
expr: |
|
||||||
min by (namespace,service, integration) (
|
min by (namespace,service, integration) (
|
||||||
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m])
|
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m])
|
||||||
@@ -87,7 +98,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: AlertmanagerConfigInconsistent
|
- alert: AlertmanagerConfigInconsistent
|
||||||
annotations:
|
annotations:
|
||||||
description: Alertmanager instances within the {{$labels.job}} cluster have different configurations.
|
description: Alertmanager instances within the {{$labels.job}} cluster have
|
||||||
|
different configurations.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerconfiginconsistent
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerconfiginconsistent
|
||||||
summary: Alertmanager instances within the same cluster have different configurations.
|
summary: Alertmanager instances within the same cluster have different configurations.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -100,9 +112,12 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: AlertmanagerClusterDown
|
- alert: AlertmanagerClusterDown
|
||||||
annotations:
|
annotations:
|
||||||
description: '{{ $value | humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have been up for less than half of the last 5m.'
|
description: '{{ $value | humanizePercentage }} of Alertmanager instances
|
||||||
|
within the {{$labels.job}} cluster have been up for less than half of the
|
||||||
|
last 5m.'
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterdown
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterdown
|
||||||
summary: Half or more of the Alertmanager instances within the same cluster are down.
|
summary: Half or more of the Alertmanager instances within the same cluster
|
||||||
|
are down.
|
||||||
expr: |
|
expr: |
|
||||||
(
|
(
|
||||||
count by (namespace,service) (
|
count by (namespace,service) (
|
||||||
@@ -119,9 +134,12 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: AlertmanagerClusterCrashlooping
|
- alert: AlertmanagerClusterCrashlooping
|
||||||
annotations:
|
annotations:
|
||||||
description: '{{ $value | humanizePercentage }} of Alertmanager instances within the {{$labels.job}} cluster have restarted at least 5 times in the last 10m.'
|
description: '{{ $value | humanizePercentage }} of Alertmanager instances
|
||||||
|
within the {{$labels.job}} cluster have restarted at least 5 times in the
|
||||||
|
last 10m.'
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclustercrashlooping
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclustercrashlooping
|
||||||
summary: Half or more of the Alertmanager instances within the same cluster are crashlooping.
|
summary: Half or more of the Alertmanager instances within the same cluster
|
||||||
|
are crashlooping.
|
||||||
expr: |
|
expr: |
|
||||||
(
|
(
|
||||||
count by (namespace,service) (
|
count by (namespace,service) (
|
||||||
|
|||||||
@@ -14,9 +14,11 @@ spec:
|
|||||||
- name: web
|
- name: web
|
||||||
port: 9093
|
port: 9093
|
||||||
targetPort: web
|
targetPort: web
|
||||||
|
- name: reloader-web
|
||||||
|
port: 8080
|
||||||
|
targetPort: reloader-web
|
||||||
selector:
|
selector:
|
||||||
alertmanager: main
|
alertmanager: main
|
||||||
app: alertmanager
|
|
||||||
app.kubernetes.io/component: alert-router
|
app.kubernetes.io/component: alert-router
|
||||||
app.kubernetes.io/name: alertmanager
|
app.kubernetes.io/name: alertmanager
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ spec:
|
|||||||
endpoints:
|
endpoints:
|
||||||
- interval: 30s
|
- interval: 30s
|
||||||
port: web
|
port: web
|
||||||
|
- interval: 30s
|
||||||
|
port: reloader-web
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
alertmanager: main
|
alertmanager: main
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: grafana
|
app.kubernetes.io/component: grafana
|
||||||
app.kubernetes.io/name: grafana
|
app.kubernetes.io/name: grafana
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 8.1.3
|
app.kubernetes.io/version: 8.2.1
|
||||||
name: grafana-config
|
name: grafana-config
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
stringData:
|
stringData:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: grafana
|
app.kubernetes.io/component: grafana
|
||||||
app.kubernetes.io/name: grafana
|
app.kubernetes.io/name: grafana
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 8.1.3
|
app.kubernetes.io/version: 8.2.1
|
||||||
name: grafana-datasources
|
name: grafana-datasources
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
stringData:
|
stringData:
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -22,6 +22,6 @@ metadata:
|
|||||||
app.kubernetes.io/component: grafana
|
app.kubernetes.io/component: grafana
|
||||||
app.kubernetes.io/name: grafana
|
app.kubernetes.io/name: grafana
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 8.1.3
|
app.kubernetes.io/version: 8.2.1
|
||||||
name: grafana-dashboards
|
name: grafana-dashboards
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: grafana
|
app.kubernetes.io/component: grafana
|
||||||
app.kubernetes.io/name: grafana
|
app.kubernetes.io/name: grafana
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 8.1.3
|
app.kubernetes.io/version: 8.2.1
|
||||||
name: grafana
|
name: grafana
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
@@ -18,18 +18,18 @@ spec:
|
|||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
annotations:
|
annotations:
|
||||||
checksum/grafana-config: e1f5b84a1d40edb8a6527c98d24ff656
|
checksum/grafana-config: 11905dc0549e921f5d3befd288dbf9d5
|
||||||
checksum/grafana-dashboardproviders: 2c7c248e5512bb5576d633004725159c
|
checksum/grafana-dashboardproviders: 4278ba47b6379fd0ee12ad9c15fedda2
|
||||||
checksum/grafana-datasources: b2cbbea3079b8634b7bdf42cb56c1537
|
checksum/grafana-datasources: c83e12e4791b0aef701753f70bfc1fe9
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: grafana
|
app.kubernetes.io/component: grafana
|
||||||
app.kubernetes.io/name: grafana
|
app.kubernetes.io/name: grafana
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 8.1.3
|
app.kubernetes.io/version: 8.2.1
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- env: []
|
- env: []
|
||||||
image: grafana/grafana:8.1.3
|
image: grafana/grafana:8.2.1
|
||||||
name: grafana
|
name: grafana
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 3000
|
- containerPort: 3000
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: grafana
|
app.kubernetes.io/component: grafana
|
||||||
app.kubernetes.io/name: grafana
|
app.kubernetes.io/name: grafana
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 8.1.3
|
app.kubernetes.io/version: 8.2.1
|
||||||
name: grafana
|
name: grafana
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
|
|||||||
@@ -1,5 +1,10 @@
|
|||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ServiceAccount
|
kind: ServiceAccount
|
||||||
metadata:
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: grafana
|
||||||
|
app.kubernetes.io/name: grafana
|
||||||
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
app.kubernetes.io/version: 8.2.1
|
||||||
name: grafana
|
name: grafana
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: grafana
|
app.kubernetes.io/component: grafana
|
||||||
app.kubernetes.io/name: grafana
|
app.kubernetes.io/name: grafana
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 8.1.3
|
app.kubernetes.io/version: 8.2.1
|
||||||
name: grafana
|
name: grafana
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
|
|||||||
@@ -15,10 +15,12 @@ spec:
|
|||||||
rules:
|
rules:
|
||||||
- alert: TargetDown
|
- alert: TargetDown
|
||||||
annotations:
|
annotations:
|
||||||
description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace are down.'
|
description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service
|
||||||
|
}} targets in {{ $labels.namespace }} namespace are down.'
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown
|
||||||
summary: One or more targets are unreachable.
|
summary: One or more targets are unreachable.
|
||||||
expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10
|
expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job,
|
||||||
|
namespace, service)) > 10
|
||||||
for: 10m
|
for: 10m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
@@ -31,7 +33,8 @@ spec:
|
|||||||
mechanisms that send a notification when this alert is not firing. For example the
|
mechanisms that send a notification when this alert is not firing. For example the
|
||||||
"DeadMansSnitch" integration in PagerDuty.
|
"DeadMansSnitch" integration in PagerDuty.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog
|
||||||
summary: An alert that should always be firing to certify that Alertmanager is working properly.
|
summary: An alert that should always be firing to certify that Alertmanager
|
||||||
|
is working properly.
|
||||||
expr: vector(1)
|
expr: vector(1)
|
||||||
labels:
|
labels:
|
||||||
severity: none
|
severity: none
|
||||||
@@ -39,7 +42,8 @@ spec:
|
|||||||
rules:
|
rules:
|
||||||
- alert: NodeNetworkInterfaceFlapping
|
- alert: NodeNetworkInterfaceFlapping
|
||||||
annotations:
|
annotations:
|
||||||
description: Network interface "{{ $labels.device }}" changing its up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}
|
description: Network interface "{{ $labels.device }}" changing its up status
|
||||||
|
often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/nodenetworkinterfaceflapping
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/nodenetworkinterfaceflapping
|
||||||
summary: Network interface is often changing its status
|
summary: Network interface is often changing its status
|
||||||
expr: |
|
expr: |
|
||||||
@@ -49,17 +53,21 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- name: kube-prometheus-node-recording.rules
|
- name: kube-prometheus-node-recording.rules
|
||||||
rules:
|
rules:
|
||||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) BY (instance)
|
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m]))
|
||||||
|
BY (instance)
|
||||||
record: instance:node_cpu:rate:sum
|
record: instance:node_cpu:rate:sum
|
||||||
- expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
|
- expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
|
||||||
record: instance:node_network_receive_bytes:rate:sum
|
record: instance:node_network_receive_bytes:rate:sum
|
||||||
- expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
|
- expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
|
||||||
record: instance:node_network_transmit_bytes:rate:sum
|
record: instance:node_network_transmit_bytes:rate:sum
|
||||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)
|
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
|
||||||
|
WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total)
|
||||||
|
BY (instance, cpu)) BY (instance)
|
||||||
record: instance:node_cpu:ratio
|
record: instance:node_cpu:ratio
|
||||||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
|
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m]))
|
||||||
record: cluster:node_cpu:sum_rate5m
|
record: cluster:node_cpu:sum_rate5m
|
||||||
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))
|
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total)
|
||||||
|
BY (instance, cpu))
|
||||||
record: cluster:node_cpu:ratio
|
record: cluster:node_cpu:ratio
|
||||||
- name: kube-prometheus-general.rules
|
- name: kube-prometheus-general.rules
|
||||||
rules:
|
rules:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: exporter
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.2.0
|
app.kubernetes.io/version: 2.2.3
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
rules:
|
rules:
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: exporter
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.2.0
|
app.kubernetes.io/version: 2.2.3
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
roleRef:
|
roleRef:
|
||||||
apiGroup: rbac.authorization.k8s.io
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: exporter
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.2.0
|
app.kubernetes.io/version: 2.2.3
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
@@ -23,7 +23,7 @@ spec:
|
|||||||
app.kubernetes.io/component: exporter
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.2.0
|
app.kubernetes.io/version: 2.2.3
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- args:
|
- args:
|
||||||
@@ -31,7 +31,7 @@ spec:
|
|||||||
- --port=8081
|
- --port=8081
|
||||||
- --telemetry-host=127.0.0.1
|
- --telemetry-host=127.0.0.1
|
||||||
- --telemetry-port=8082
|
- --telemetry-port=8082
|
||||||
image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.2.0
|
image: k8s.gcr.io/kube-state-metrics/kube-state-metrics:v2.2.3
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: exporter
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.2.0
|
app.kubernetes.io/version: 2.2.3
|
||||||
prometheus: k8s
|
prometheus: k8s
|
||||||
role: alert-rules
|
role: alert-rules
|
||||||
name: kube-state-metrics-rules
|
name: kube-state-metrics-rules
|
||||||
@@ -16,7 +16,9 @@ spec:
|
|||||||
rules:
|
rules:
|
||||||
- alert: KubeStateMetricsListErrors
|
- alert: KubeStateMetricsListErrors
|
||||||
annotations:
|
annotations:
|
||||||
description: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
|
description: kube-state-metrics is experiencing errors at an elevated rate
|
||||||
|
in list operations. This is likely causing it to not be able to expose metrics
|
||||||
|
about Kubernetes objects correctly or at all.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricslisterrors
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricslisterrors
|
||||||
summary: kube-state-metrics is experiencing errors in list operations.
|
summary: kube-state-metrics is experiencing errors in list operations.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -29,7 +31,9 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: KubeStateMetricsWatchErrors
|
- alert: KubeStateMetricsWatchErrors
|
||||||
annotations:
|
annotations:
|
||||||
description: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
|
description: kube-state-metrics is experiencing errors at an elevated rate
|
||||||
|
in watch operations. This is likely causing it to not be able to expose
|
||||||
|
metrics about Kubernetes objects correctly or at all.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricswatcherrors
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricswatcherrors
|
||||||
summary: kube-state-metrics is experiencing errors in watch operations.
|
summary: kube-state-metrics is experiencing errors in watch operations.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -42,7 +46,9 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: KubeStateMetricsShardingMismatch
|
- alert: KubeStateMetricsShardingMismatch
|
||||||
annotations:
|
annotations:
|
||||||
description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all.
|
description: kube-state-metrics pods are running with different --total-shards
|
||||||
|
configuration, some Kubernetes objects may be exposed multiple times or
|
||||||
|
not exposed at all.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardingmismatch
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardingmismatch
|
||||||
summary: kube-state-metrics sharding is misconfigured.
|
summary: kube-state-metrics sharding is misconfigured.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -52,7 +58,8 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: KubeStateMetricsShardsMissing
|
- alert: KubeStateMetricsShardsMissing
|
||||||
annotations:
|
annotations:
|
||||||
description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed.
|
description: kube-state-metrics shards are missing, some Kubernetes objects
|
||||||
|
are not being exposed.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardsmissing
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardsmissing
|
||||||
summary: kube-state-metrics shards are missing.
|
summary: kube-state-metrics shards are missing.
|
||||||
expr: |
|
expr: |
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: exporter
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.2.0
|
app.kubernetes.io/version: 2.2.3
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
|
|||||||
@@ -5,6 +5,6 @@ metadata:
|
|||||||
app.kubernetes.io/component: exporter
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.2.0
|
app.kubernetes.io/version: 2.2.3
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: exporter
|
app.kubernetes.io/component: exporter
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.2.0
|
app.kubernetes.io/version: 2.2.3
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
|
|||||||
@@ -14,19 +14,19 @@ spec:
|
|||||||
rules:
|
rules:
|
||||||
- alert: KubePodCrashLooping
|
- alert: KubePodCrashLooping
|
||||||
annotations:
|
annotations:
|
||||||
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting {{ printf "%.2f" $value }} times / 10 minutes.
|
description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
|
||||||
|
}}) is in waiting state (reason: "CrashLoopBackOff").'
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping
|
||||||
summary: Pod is crash looping.
|
summary: Pod is crash looping.
|
||||||
expr: |
|
expr: |
|
||||||
increase(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[10m]) > 0
|
max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", job="kube-state-metrics"}[5m]) >= 1
|
||||||
and
|
|
||||||
kube_pod_container_status_waiting{job="kube-state-metrics"} == 1
|
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubePodNotReady
|
- alert: KubePodNotReady
|
||||||
annotations:
|
annotations:
|
||||||
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes.
|
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
|
||||||
|
state for longer than 15 minutes.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready
|
||||||
summary: Pod has been in a non-ready state for more than 15 minutes.
|
summary: Pod has been in a non-ready state for more than 15 minutes.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -42,7 +42,9 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeDeploymentGenerationMismatch
|
- alert: KubeDeploymentGenerationMismatch
|
||||||
annotations:
|
annotations:
|
||||||
description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back.
|
description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
|
||||||
|
}} does not match, this indicates that the Deployment has failed but has
|
||||||
|
not been rolled back.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch
|
||||||
summary: Deployment generation mismatch due to possible roll-back
|
summary: Deployment generation mismatch due to possible roll-back
|
||||||
expr: |
|
expr: |
|
||||||
@@ -54,7 +56,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeDeploymentReplicasMismatch
|
- alert: KubeDeploymentReplicasMismatch
|
||||||
annotations:
|
annotations:
|
||||||
description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes.
|
description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has
|
||||||
|
not matched the expected number of replicas for longer than 15 minutes.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch
|
||||||
summary: Deployment has not matched the expected number of replicas.
|
summary: Deployment has not matched the expected number of replicas.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -72,7 +75,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeStatefulSetReplicasMismatch
|
- alert: KubeStatefulSetReplicasMismatch
|
||||||
annotations:
|
annotations:
|
||||||
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes.
|
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }}
|
||||||
|
has not matched the expected number of replicas for longer than 15 minutes.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch
|
||||||
summary: Deployment has not matched the expected number of replicas.
|
summary: Deployment has not matched the expected number of replicas.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -90,7 +94,9 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeStatefulSetGenerationMismatch
|
- alert: KubeStatefulSetGenerationMismatch
|
||||||
annotations:
|
annotations:
|
||||||
description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
|
description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
|
||||||
|
}} does not match, this indicates that the StatefulSet has failed but has
|
||||||
|
not been rolled back.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch
|
||||||
summary: StatefulSet generation mismatch due to possible roll-back
|
summary: StatefulSet generation mismatch due to possible roll-back
|
||||||
expr: |
|
expr: |
|
||||||
@@ -102,7 +108,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeStatefulSetUpdateNotRolledOut
|
- alert: KubeStatefulSetUpdateNotRolledOut
|
||||||
annotations:
|
annotations:
|
||||||
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out.
|
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }}
|
||||||
|
update has not been rolled out.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout
|
||||||
summary: StatefulSet update has not been rolled out.
|
summary: StatefulSet update has not been rolled out.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -128,7 +135,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeDaemonSetRolloutStuck
|
- alert: KubeDaemonSetRolloutStuck
|
||||||
annotations:
|
annotations:
|
||||||
description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15 minutes.
|
description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has
|
||||||
|
not finished or progressed for at least 15 minutes.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck
|
||||||
summary: DaemonSet rollout is stuck.
|
summary: DaemonSet rollout is stuck.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -160,7 +168,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeContainerWaiting
|
- alert: KubeContainerWaiting
|
||||||
annotations:
|
annotations:
|
||||||
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} has been in waiting state for longer than 1 hour.
|
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}}
|
||||||
|
has been in waiting state for longer than 1 hour.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting
|
||||||
summary: Pod container waiting longer than 1 hour
|
summary: Pod container waiting longer than 1 hour
|
||||||
expr: |
|
expr: |
|
||||||
@@ -170,7 +179,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeDaemonSetNotScheduled
|
- alert: KubeDaemonSetNotScheduled
|
||||||
annotations:
|
annotations:
|
||||||
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.'
|
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
|
||||||
|
}} are not scheduled.'
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled
|
||||||
summary: DaemonSet pods are not scheduled.
|
summary: DaemonSet pods are not scheduled.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -182,7 +192,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeDaemonSetMisScheduled
|
- alert: KubeDaemonSetMisScheduled
|
||||||
annotations:
|
annotations:
|
||||||
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.'
|
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
|
||||||
|
}} are running where they are not supposed to run.'
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled
|
||||||
summary: DaemonSet pods are misscheduled.
|
summary: DaemonSet pods are misscheduled.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -192,7 +203,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeJobCompletion
|
- alert: KubeJobCompletion
|
||||||
annotations:
|
annotations:
|
||||||
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than 12 hours to complete.
|
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking
|
||||||
|
more than 12 hours to complete.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobcompletion
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobcompletion
|
||||||
summary: Job did not complete in time
|
summary: Job did not complete in time
|
||||||
expr: |
|
expr: |
|
||||||
@@ -202,7 +214,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeJobFailed
|
- alert: KubeJobFailed
|
||||||
annotations:
|
annotations:
|
||||||
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. Removing failed job after investigation should clear this alert.
|
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to
|
||||||
|
complete. Removing failed job after investigation should clear this alert.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed
|
||||||
summary: Job failed to complete.
|
summary: Job failed to complete.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -212,7 +225,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeHpaReplicasMismatch
|
- alert: KubeHpaReplicasMismatch
|
||||||
annotations:
|
annotations:
|
||||||
description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has not matched the desired number of replicas for longer than 15 minutes.
|
description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }}
|
||||||
|
has not matched the desired number of replicas for longer than 15 minutes.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch
|
||||||
summary: HPA has not matched descired number of replicas.
|
summary: HPA has not matched descired number of replicas.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -234,7 +248,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeHpaMaxedOut
|
- alert: KubeHpaMaxedOut
|
||||||
annotations:
|
annotations:
|
||||||
description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has been running at max replicas for longer than 15 minutes.
|
description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }}
|
||||||
|
has been running at max replicas for longer than 15 minutes.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout
|
||||||
summary: HPA is running at max replicas
|
summary: HPA is running at max replicas
|
||||||
expr: |
|
expr: |
|
||||||
@@ -248,7 +263,8 @@ spec:
|
|||||||
rules:
|
rules:
|
||||||
- alert: KubeCPUOvercommit
|
- alert: KubeCPUOvercommit
|
||||||
annotations:
|
annotations:
|
||||||
description: Cluster has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure.
|
description: Cluster has overcommitted CPU resource requests for Pods by {{
|
||||||
|
$value }} CPU shares and cannot tolerate node failure.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuovercommit
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuovercommit
|
||||||
summary: Cluster has overcommitted CPU resource requests.
|
summary: Cluster has overcommitted CPU resource requests.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -260,7 +276,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeMemoryOvercommit
|
- alert: KubeMemoryOvercommit
|
||||||
annotations:
|
annotations:
|
||||||
description: Cluster has overcommitted memory resource requests for Pods by {{ $value }} bytes and cannot tolerate node failure.
|
description: Cluster has overcommitted memory resource requests for Pods by
|
||||||
|
{{ $value }} bytes and cannot tolerate node failure.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit
|
||||||
summary: Cluster has overcommitted memory resource requests.
|
summary: Cluster has overcommitted memory resource requests.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -298,7 +315,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeQuotaAlmostFull
|
- alert: KubeQuotaAlmostFull
|
||||||
annotations:
|
annotations:
|
||||||
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.
|
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
||||||
|
}} of its {{ $labels.resource }} quota.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull
|
||||||
summary: Namespace quota is going to be full.
|
summary: Namespace quota is going to be full.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -311,7 +329,8 @@ spec:
|
|||||||
severity: info
|
severity: info
|
||||||
- alert: KubeQuotaFullyUsed
|
- alert: KubeQuotaFullyUsed
|
||||||
annotations:
|
annotations:
|
||||||
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.
|
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
||||||
|
}} of its {{ $labels.resource }} quota.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused
|
||||||
summary: Namespace quota is fully used.
|
summary: Namespace quota is fully used.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -324,7 +343,8 @@ spec:
|
|||||||
severity: info
|
severity: info
|
||||||
- alert: KubeQuotaExceeded
|
- alert: KubeQuotaExceeded
|
||||||
annotations:
|
annotations:
|
||||||
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.
|
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
||||||
|
}} of its {{ $labels.resource }} quota.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded
|
||||||
summary: Namespace quota has exceeded the limits.
|
summary: Namespace quota has exceeded the limits.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -337,7 +357,9 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: CPUThrottlingHigh
|
- alert: CPUThrottlingHigh
|
||||||
annotations:
|
annotations:
|
||||||
description: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.'
|
description: '{{ $value | humanizePercentage }} throttling of CPU in namespace
|
||||||
|
{{ $labels.namespace }} for container {{ $labels.container }} in pod {{
|
||||||
|
$labels.pod }}.'
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh
|
||||||
summary: Processes experience elevated CPU throttling.
|
summary: Processes experience elevated CPU throttling.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -352,7 +374,9 @@ spec:
|
|||||||
rules:
|
rules:
|
||||||
- alert: KubePersistentVolumeFillingUp
|
- alert: KubePersistentVolumeFillingUp
|
||||||
annotations:
|
annotations:
|
||||||
description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free.
|
description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
|
||||||
|
}} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage
|
||||||
|
}} free.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
|
||||||
summary: PersistentVolume is filling up.
|
summary: PersistentVolume is filling up.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -363,12 +387,17 @@ spec:
|
|||||||
) < 0.03
|
) < 0.03
|
||||||
and
|
and
|
||||||
kubelet_volume_stats_used_bytes{job="kubelet", metrics_path="/metrics"} > 0
|
kubelet_volume_stats_used_bytes{job="kubelet", metrics_path="/metrics"} > 0
|
||||||
|
unless on(namespace, persistentvolumeclaim)
|
||||||
|
kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
|
||||||
for: 1m
|
for: 1m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
- alert: KubePersistentVolumeFillingUp
|
- alert: KubePersistentVolumeFillingUp
|
||||||
annotations:
|
annotations:
|
||||||
description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available.
|
description: Based on recent sampling, the PersistentVolume claimed by {{
|
||||||
|
$labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is
|
||||||
|
expected to fill up within four days. Currently {{ $value | humanizePercentage
|
||||||
|
}} is available.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
|
||||||
summary: PersistentVolume is filling up.
|
summary: PersistentVolume is filling up.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -381,12 +410,15 @@ spec:
|
|||||||
kubelet_volume_stats_used_bytes{job="kubelet", metrics_path="/metrics"} > 0
|
kubelet_volume_stats_used_bytes{job="kubelet", metrics_path="/metrics"} > 0
|
||||||
and
|
and
|
||||||
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
|
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
|
||||||
|
unless on(namespace, persistentvolumeclaim)
|
||||||
|
kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
|
||||||
for: 1h
|
for: 1h
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubePersistentVolumeErrors
|
- alert: KubePersistentVolumeErrors
|
||||||
annotations:
|
annotations:
|
||||||
description: The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}.
|
description: The persistent volume {{ $labels.persistentvolume }} has status
|
||||||
|
{{ $labels.phase }}.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeerrors
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeerrors
|
||||||
summary: PersistentVolume is having issues with provisioning.
|
summary: PersistentVolume is having issues with provisioning.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -398,7 +430,8 @@ spec:
|
|||||||
rules:
|
rules:
|
||||||
- alert: KubeVersionMismatch
|
- alert: KubeVersionMismatch
|
||||||
annotations:
|
annotations:
|
||||||
description: There are {{ $value }} different semantic versions of Kubernetes components running.
|
description: There are {{ $value }} different semantic versions of Kubernetes
|
||||||
|
components running.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch
|
||||||
summary: Different semantic versions of Kubernetes components running.
|
summary: Different semantic versions of Kubernetes components running.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -408,7 +441,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeClientErrors
|
- alert: KubeClientErrors
|
||||||
annotations:
|
annotations:
|
||||||
description: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.'
|
description: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
|
||||||
|
}}' is experiencing {{ $value | humanizePercentage }} errors.'
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors
|
||||||
summary: Kubernetes API server client is experiencing errors.
|
summary: Kubernetes API server client is experiencing errors.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -481,7 +515,8 @@ spec:
|
|||||||
rules:
|
rules:
|
||||||
- alert: KubeClientCertificateExpiration
|
- alert: KubeClientCertificateExpiration
|
||||||
annotations:
|
annotations:
|
||||||
description: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
|
description: A client certificate used to authenticate to the apiserver is
|
||||||
|
expiring in less than 7.0 days.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration
|
||||||
summary: Client certificate is about to expire.
|
summary: Client certificate is about to expire.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -490,7 +525,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeClientCertificateExpiration
|
- alert: KubeClientCertificateExpiration
|
||||||
annotations:
|
annotations:
|
||||||
description: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
|
description: A client certificate used to authenticate to the apiserver is
|
||||||
|
expiring in less than 24.0 hours.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration
|
||||||
summary: Client certificate is about to expire.
|
summary: Client certificate is about to expire.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -499,7 +535,9 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: AggregatedAPIErrors
|
- alert: AggregatedAPIErrors
|
||||||
annotations:
|
annotations:
|
||||||
description: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m.
|
description: An aggregated API {{ $labels.name }}/{{ $labels.namespace }}
|
||||||
|
has reported errors. It has appeared unavailable {{ $value | humanize }}
|
||||||
|
times averaged over the past 10m.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/aggregatedapierrors
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/aggregatedapierrors
|
||||||
summary: An aggregated API has reported errors.
|
summary: An aggregated API has reported errors.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -508,7 +546,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: AggregatedAPIDown
|
- alert: AggregatedAPIDown
|
||||||
annotations:
|
annotations:
|
||||||
description: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m.
|
description: An aggregated API {{ $labels.name }}/{{ $labels.namespace }}
|
||||||
|
has been only {{ $value | humanize }}% available over the last 10m.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/aggregatedapidown
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/aggregatedapidown
|
||||||
summary: An aggregated API is down.
|
summary: An aggregated API is down.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -528,9 +567,11 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: KubeAPITerminatedRequests
|
- alert: KubeAPITerminatedRequests
|
||||||
annotations:
|
annotations:
|
||||||
description: The apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.
|
description: The apiserver has terminated {{ $value | humanizePercentage }}
|
||||||
|
of its incoming requests.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeapiterminatedrequests
|
||||||
summary: The apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.
|
summary: The apiserver has terminated {{ $value | humanizePercentage }} of
|
||||||
|
its incoming requests.
|
||||||
expr: |
|
expr: |
|
||||||
sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / ( sum(rate(apiserver_request_total{job="apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20
|
sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / ( sum(rate(apiserver_request_total{job="apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20
|
||||||
for: 5m
|
for: 5m
|
||||||
@@ -550,7 +591,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeNodeUnreachable
|
- alert: KubeNodeUnreachable
|
||||||
annotations:
|
annotations:
|
||||||
description: '{{ $labels.node }} is unreachable and some workloads may be rescheduled.'
|
description: '{{ $labels.node }} is unreachable and some workloads may be
|
||||||
|
rescheduled.'
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodeunreachable
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodeunreachable
|
||||||
summary: Node is unreachable.
|
summary: Node is unreachable.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -560,7 +602,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeletTooManyPods
|
- alert: KubeletTooManyPods
|
||||||
annotations:
|
annotations:
|
||||||
description: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.
|
description: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage
|
||||||
|
}} of its Pod capacity.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods
|
||||||
summary: Kubelet is running at capacity.
|
summary: Kubelet is running at capacity.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -576,7 +619,8 @@ spec:
|
|||||||
severity: info
|
severity: info
|
||||||
- alert: KubeNodeReadinessFlapping
|
- alert: KubeNodeReadinessFlapping
|
||||||
annotations:
|
annotations:
|
||||||
description: The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes.
|
description: The readiness status of node {{ $labels.node }} has changed {{
|
||||||
|
$value }} times in the last 15 minutes.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping
|
||||||
summary: Node readiness status is flapping.
|
summary: Node readiness status is flapping.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -586,7 +630,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeletPlegDurationHigh
|
- alert: KubeletPlegDurationHigh
|
||||||
annotations:
|
annotations:
|
||||||
description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.
|
description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile
|
||||||
|
duration of {{ $value }} seconds on node {{ $labels.node }}.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletplegdurationhigh
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletplegdurationhigh
|
||||||
summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist.
|
summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -596,7 +641,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeletPodStartUpLatencyHigh
|
- alert: KubeletPodStartUpLatencyHigh
|
||||||
annotations:
|
annotations:
|
||||||
description: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.
|
description: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds
|
||||||
|
on node {{ $labels.node }}.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh
|
||||||
summary: Kubelet Pod startup latency is too high.
|
summary: Kubelet Pod startup latency is too high.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -606,7 +652,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeletClientCertificateExpiration
|
- alert: KubeletClientCertificateExpiration
|
||||||
annotations:
|
annotations:
|
||||||
description: Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.
|
description: Client certificate for Kubelet on node {{ $labels.node }} expires
|
||||||
|
in {{ $value | humanizeDuration }}.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration
|
||||||
summary: Kubelet client certificate is about to expire.
|
summary: Kubelet client certificate is about to expire.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -615,7 +662,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeletClientCertificateExpiration
|
- alert: KubeletClientCertificateExpiration
|
||||||
annotations:
|
annotations:
|
||||||
description: Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.
|
description: Client certificate for Kubelet on node {{ $labels.node }} expires
|
||||||
|
in {{ $value | humanizeDuration }}.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificateexpiration
|
||||||
summary: Kubelet client certificate is about to expire.
|
summary: Kubelet client certificate is about to expire.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -624,7 +672,8 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: KubeletServerCertificateExpiration
|
- alert: KubeletServerCertificateExpiration
|
||||||
annotations:
|
annotations:
|
||||||
description: Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.
|
description: Server certificate for Kubelet on node {{ $labels.node }} expires
|
||||||
|
in {{ $value | humanizeDuration }}.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration
|
||||||
summary: Kubelet server certificate is about to expire.
|
summary: Kubelet server certificate is about to expire.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -633,7 +682,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeletServerCertificateExpiration
|
- alert: KubeletServerCertificateExpiration
|
||||||
annotations:
|
annotations:
|
||||||
description: Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.
|
description: Server certificate for Kubelet on node {{ $labels.node }} expires
|
||||||
|
in {{ $value | humanizeDuration }}.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificateexpiration
|
||||||
summary: Kubelet server certificate is about to expire.
|
summary: Kubelet server certificate is about to expire.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -642,7 +692,8 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: KubeletClientCertificateRenewalErrors
|
- alert: KubeletClientCertificateRenewalErrors
|
||||||
annotations:
|
annotations:
|
||||||
description: Kubelet on node {{ $labels.node }} has failed to renew its client certificate ({{ $value | humanize }} errors in the last 5 minutes).
|
description: Kubelet on node {{ $labels.node }} has failed to renew its client
|
||||||
|
certificate ({{ $value | humanize }} errors in the last 5 minutes).
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificaterenewalerrors
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletclientcertificaterenewalerrors
|
||||||
summary: Kubelet has failed to renew its client certificate.
|
summary: Kubelet has failed to renew its client certificate.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -652,7 +703,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeletServerCertificateRenewalErrors
|
- alert: KubeletServerCertificateRenewalErrors
|
||||||
annotations:
|
annotations:
|
||||||
description: Kubelet on node {{ $labels.node }} has failed to renew its server certificate ({{ $value | humanize }} errors in the last 5 minutes).
|
description: Kubelet on node {{ $labels.node }} has failed to renew its server
|
||||||
|
certificate ({{ $value | humanize }} errors in the last 5 minutes).
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificaterenewalerrors
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletservercertificaterenewalerrors
|
||||||
summary: Kubelet has failed to renew its server certificate.
|
summary: Kubelet has failed to renew its server certificate.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -686,7 +738,8 @@ spec:
|
|||||||
rules:
|
rules:
|
||||||
- alert: KubeControllerManagerDown
|
- alert: KubeControllerManagerDown
|
||||||
annotations:
|
annotations:
|
||||||
description: KubeControllerManager has disappeared from Prometheus target discovery.
|
description: KubeControllerManager has disappeared from Prometheus target
|
||||||
|
discovery.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontrollermanagerdown
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontrollermanagerdown
|
||||||
summary: Target disappeared from Prometheus target discovery.
|
summary: Target disappeared from Prometheus target discovery.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -694,6 +747,18 @@ spec:
|
|||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
|
- name: kubernetes-system-kube-proxy
|
||||||
|
rules:
|
||||||
|
- alert: KubeProxyDown
|
||||||
|
annotations:
|
||||||
|
description: KubeProxy has disappeared from Prometheus target discovery.
|
||||||
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeproxydown
|
||||||
|
summary: Target disappeared from Prometheus target discovery.
|
||||||
|
expr: |
|
||||||
|
absent(up{job="kube-proxy"} == 1)
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
- name: kube-apiserver-burnrate.rules
|
- name: kube-apiserver-burnrate.rules
|
||||||
rules:
|
rules:
|
||||||
- expr: |
|
- expr: |
|
||||||
|
|||||||
@@ -61,11 +61,16 @@ spec:
|
|||||||
sourceLabels:
|
sourceLabels:
|
||||||
- __name__
|
- __name__
|
||||||
- action: drop
|
- action: drop
|
||||||
regex: (container_fs_.*|container_spec_.*|container_blkio_device_usage_total|container_file_descriptors|container_sockets|container_threads_max|container_threads|container_start_time_seconds|container_last_seen);;
|
regex: (container_spec_.*|container_file_descriptors|container_sockets|container_threads_max|container_threads|container_start_time_seconds|container_last_seen);;
|
||||||
sourceLabels:
|
sourceLabels:
|
||||||
- __name__
|
- __name__
|
||||||
- pod
|
- pod
|
||||||
- namespace
|
- namespace
|
||||||
|
- action: drop
|
||||||
|
regex: (container_blkio_device_usage_total);.+
|
||||||
|
sourceLabels:
|
||||||
|
- __name__
|
||||||
|
- container
|
||||||
path: /metrics/cadvisor
|
path: /metrics/cadvisor
|
||||||
port: https-metrics
|
port: https-metrics
|
||||||
relabelings:
|
relabelings:
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ spec:
|
|||||||
- --path.rootfs=/host/root
|
- --path.rootfs=/host/root
|
||||||
- --no-collector.wifi
|
- --no-collector.wifi
|
||||||
- --no-collector.hwmon
|
- --no-collector.hwmon
|
||||||
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
|
- --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
|
||||||
- --collector.netclass.ignored-devices=^(veth.*|[a-f0-9]{15})$
|
- --collector.netclass.ignored-devices=^(veth.*|[a-f0-9]{15})$
|
||||||
- --collector.netdev.device-exclude=^(veth.*|[a-f0-9]{15})$
|
- --collector.netdev.device-exclude=^(veth.*|[a-f0-9]{15})$
|
||||||
image: quay.io/prometheus/node-exporter:v1.2.2
|
image: quay.io/prometheus/node-exporter:v1.2.2
|
||||||
|
|||||||
@@ -16,7 +16,9 @@ spec:
|
|||||||
rules:
|
rules:
|
||||||
- alert: NodeFilesystemSpaceFillingUp
|
- alert: NodeFilesystemSpaceFillingUp
|
||||||
annotations:
|
annotations:
|
||||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up.
|
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||||
|
has only {{ printf "%.2f" $value }}% available space left and is filling
|
||||||
|
up.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
|
||||||
summary: Filesystem is predicted to run out of space within the next 24 hours.
|
summary: Filesystem is predicted to run out of space within the next 24 hours.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -32,7 +34,9 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: NodeFilesystemSpaceFillingUp
|
- alert: NodeFilesystemSpaceFillingUp
|
||||||
annotations:
|
annotations:
|
||||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast.
|
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||||
|
has only {{ printf "%.2f" $value }}% available space left and is filling
|
||||||
|
up fast.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
|
||||||
summary: Filesystem is predicted to run out of space within the next 4 hours.
|
summary: Filesystem is predicted to run out of space within the next 4 hours.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -48,7 +52,8 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: NodeFilesystemAlmostOutOfSpace
|
- alert: NodeFilesystemAlmostOutOfSpace
|
||||||
annotations:
|
annotations:
|
||||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
|
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||||
|
has only {{ printf "%.2f" $value }}% available space left.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
|
||||||
summary: Filesystem has less than 5% space left.
|
summary: Filesystem has less than 5% space left.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -62,7 +67,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: NodeFilesystemAlmostOutOfSpace
|
- alert: NodeFilesystemAlmostOutOfSpace
|
||||||
annotations:
|
annotations:
|
||||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
|
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||||
|
has only {{ printf "%.2f" $value }}% available space left.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
|
||||||
summary: Filesystem has less than 3% space left.
|
summary: Filesystem has less than 3% space left.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -76,7 +82,9 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: NodeFilesystemFilesFillingUp
|
- alert: NodeFilesystemFilesFillingUp
|
||||||
annotations:
|
annotations:
|
||||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.
|
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||||
|
has only {{ printf "%.2f" $value }}% available inodes left and is filling
|
||||||
|
up.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
|
||||||
summary: Filesystem is predicted to run out of inodes within the next 24 hours.
|
summary: Filesystem is predicted to run out of inodes within the next 24 hours.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -92,7 +100,9 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: NodeFilesystemFilesFillingUp
|
- alert: NodeFilesystemFilesFillingUp
|
||||||
annotations:
|
annotations:
|
||||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
|
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||||
|
has only {{ printf "%.2f" $value }}% available inodes left and is filling
|
||||||
|
up fast.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
|
||||||
summary: Filesystem is predicted to run out of inodes within the next 4 hours.
|
summary: Filesystem is predicted to run out of inodes within the next 4 hours.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -108,7 +118,8 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: NodeFilesystemAlmostOutOfFiles
|
- alert: NodeFilesystemAlmostOutOfFiles
|
||||||
annotations:
|
annotations:
|
||||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
|
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||||
|
has only {{ printf "%.2f" $value }}% available inodes left.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
|
||||||
summary: Filesystem has less than 5% inodes left.
|
summary: Filesystem has less than 5% inodes left.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -122,7 +133,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: NodeFilesystemAlmostOutOfFiles
|
- alert: NodeFilesystemAlmostOutOfFiles
|
||||||
annotations:
|
annotations:
|
||||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
|
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||||
|
has only {{ printf "%.2f" $value }}% available inodes left.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
|
||||||
summary: Filesystem has less than 3% inodes left.
|
summary: Filesystem has less than 3% inodes left.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -136,7 +148,8 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: NodeNetworkReceiveErrs
|
- alert: NodeNetworkReceiveErrs
|
||||||
annotations:
|
annotations:
|
||||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.'
|
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
|
||||||
|
{{ printf "%.0f" $value }} receive errors in the last two minutes.'
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs
|
||||||
summary: Network interface is reporting many receive errors.
|
summary: Network interface is reporting many receive errors.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -146,7 +159,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: NodeNetworkTransmitErrs
|
- alert: NodeNetworkTransmitErrs
|
||||||
annotations:
|
annotations:
|
||||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
|
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
|
||||||
|
{{ printf "%.0f" $value }} transmit errors in the last two minutes.'
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs
|
||||||
summary: Network interface is reporting many transmit errors.
|
summary: Network interface is reporting many transmit errors.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -174,7 +188,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: NodeClockSkewDetected
|
- alert: NodeClockSkewDetected
|
||||||
annotations:
|
annotations:
|
||||||
description: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
|
description: Clock on {{ $labels.instance }} is out of sync by more than 300s.
|
||||||
|
Ensure NTP is configured correctly on this host.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected
|
||||||
summary: Clock skew detected.
|
summary: Clock skew detected.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -194,7 +209,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: NodeClockNotSynchronising
|
- alert: NodeClockNotSynchronising
|
||||||
annotations:
|
annotations:
|
||||||
description: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.
|
description: Clock on {{ $labels.instance }} is not synchronising. Ensure
|
||||||
|
NTP is configured on this host.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising
|
||||||
summary: Clock not synchronising.
|
summary: Clock not synchronising.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -206,7 +222,9 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: NodeRAIDDegraded
|
- alert: NodeRAIDDegraded
|
||||||
annotations:
|
annotations:
|
||||||
description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.
|
description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is
|
||||||
|
in degraded state due to one or more disks failures. Number of spare drives
|
||||||
|
is insufficient to fix issue automatically.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded
|
||||||
summary: RAID Array is degraded
|
summary: RAID Array is degraded
|
||||||
expr: |
|
expr: |
|
||||||
@@ -216,7 +234,8 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: NodeRAIDDiskFailure
|
- alert: NodeRAIDDiskFailure
|
||||||
annotations:
|
annotations:
|
||||||
description: At least one device in RAID array on {{ $labels.instance }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap.
|
description: At least one device in RAID array on {{ $labels.instance }} failed.
|
||||||
|
Array '{{ $labels.device }}' needs attention and possibly a disk swap.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure
|
||||||
summary: Failed device in RAID array
|
summary: Failed device in RAID array
|
||||||
expr: |
|
expr: |
|
||||||
@@ -225,7 +244,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: NodeFileDescriptorLimit
|
- alert: NodeFileDescriptorLimit
|
||||||
annotations:
|
annotations:
|
||||||
description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%.
|
description: File descriptors limit at {{ $labels.instance }} is currently
|
||||||
|
at {{ printf "%.2f" $value }}%.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
|
||||||
summary: Kernel is predicted to exhaust file descriptors limit soon.
|
summary: Kernel is predicted to exhaust file descriptors limit soon.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -237,7 +257,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: NodeFileDescriptorLimit
|
- alert: NodeFileDescriptorLimit
|
||||||
annotations:
|
annotations:
|
||||||
description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%.
|
description: File descriptors limit at {{ $labels.instance }} is currently
|
||||||
|
at {{ printf "%.2f" $value }}%.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
|
||||||
summary: Kernel is predicted to exhaust file descriptors limit soon.
|
summary: Kernel is predicted to exhaust file descriptors limit soon.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -269,6 +290,16 @@ spec:
|
|||||||
- expr: |
|
- expr: |
|
||||||
1 - (
|
1 - (
|
||||||
node_memory_MemAvailable_bytes{job="node-exporter"}
|
node_memory_MemAvailable_bytes{job="node-exporter"}
|
||||||
|
or
|
||||||
|
(
|
||||||
|
node_memory_Buffers_bytes{job="node-exporter"}
|
||||||
|
+
|
||||||
|
node_memory_Cached_bytes{job="node-exporter"}
|
||||||
|
+
|
||||||
|
node_memory_MemFree_bytes{job="node-exporter"}
|
||||||
|
+
|
||||||
|
node_memory_Slab_bytes{job="node-exporter"}
|
||||||
|
)
|
||||||
/
|
/
|
||||||
node_memory_MemTotal_bytes{job="node-exporter"}
|
node_memory_MemTotal_bytes{job="node-exporter"}
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: metrics-adapter
|
app.kubernetes.io/component: metrics-adapter
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 0.9.0
|
app.kubernetes.io/version: 0.9.1
|
||||||
name: v1beta1.metrics.k8s.io
|
name: v1beta1.metrics.k8s.io
|
||||||
spec:
|
spec:
|
||||||
group: metrics.k8s.io
|
group: metrics.k8s.io
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: metrics-adapter
|
app.kubernetes.io/component: metrics-adapter
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 0.9.0
|
app.kubernetes.io/version: 0.9.1
|
||||||
name: prometheus-adapter
|
name: prometheus-adapter
|
||||||
rules:
|
rules:
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: metrics-adapter
|
app.kubernetes.io/component: metrics-adapter
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 0.9.0
|
app.kubernetes.io/version: 0.9.1
|
||||||
rbac.authorization.k8s.io/aggregate-to-admin: "true"
|
rbac.authorization.k8s.io/aggregate-to-admin: "true"
|
||||||
rbac.authorization.k8s.io/aggregate-to-edit: "true"
|
rbac.authorization.k8s.io/aggregate-to-edit: "true"
|
||||||
rbac.authorization.k8s.io/aggregate-to-view: "true"
|
rbac.authorization.k8s.io/aggregate-to-view: "true"
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: metrics-adapter
|
app.kubernetes.io/component: metrics-adapter
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 0.9.0
|
app.kubernetes.io/version: 0.9.1
|
||||||
name: prometheus-adapter
|
name: prometheus-adapter
|
||||||
roleRef:
|
roleRef:
|
||||||
apiGroup: rbac.authorization.k8s.io
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: metrics-adapter
|
app.kubernetes.io/component: metrics-adapter
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 0.9.0
|
app.kubernetes.io/version: 0.9.1
|
||||||
name: resource-metrics:system:auth-delegator
|
name: resource-metrics:system:auth-delegator
|
||||||
roleRef:
|
roleRef:
|
||||||
apiGroup: rbac.authorization.k8s.io
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: metrics-adapter
|
app.kubernetes.io/component: metrics-adapter
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 0.9.0
|
app.kubernetes.io/version: 0.9.1
|
||||||
name: resource-metrics-server-resources
|
name: resource-metrics-server-resources
|
||||||
rules:
|
rules:
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
|
|||||||
@@ -64,6 +64,6 @@ metadata:
|
|||||||
app.kubernetes.io/component: metrics-adapter
|
app.kubernetes.io/component: metrics-adapter
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 0.9.0
|
app.kubernetes.io/version: 0.9.1
|
||||||
name: adapter-config
|
name: adapter-config
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: metrics-adapter
|
app.kubernetes.io/component: metrics-adapter
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 0.9.0
|
app.kubernetes.io/version: 0.9.1
|
||||||
name: prometheus-adapter
|
name: prometheus-adapter
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
@@ -25,7 +25,7 @@ spec:
|
|||||||
app.kubernetes.io/component: metrics-adapter
|
app.kubernetes.io/component: metrics-adapter
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 0.9.0
|
app.kubernetes.io/version: 0.9.1
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- args:
|
- args:
|
||||||
@@ -36,7 +36,7 @@ spec:
|
|||||||
- --prometheus-url=http://prometheus-k8s.monitoring.svc.cluster.local:9090/
|
- --prometheus-url=http://prometheus-k8s.monitoring.svc.cluster.local:9090/
|
||||||
- --secure-port=6443
|
- --secure-port=6443
|
||||||
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA,TLS_RSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA
|
- --tls-cipher-suites=TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA,TLS_RSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA
|
||||||
image: k8s.gcr.io/prometheus-adapter/prometheus-adapter:v0.9.0
|
image: k8s.gcr.io/prometheus-adapter/prometheus-adapter:v0.9.1
|
||||||
name: prometheus-adapter
|
name: prometheus-adapter
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 6443
|
- containerPort: 6443
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
apiVersion: policy/v1beta1
|
apiVersion: policy/v1
|
||||||
kind: PodDisruptionBudget
|
kind: PodDisruptionBudget
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: metrics-adapter
|
app.kubernetes.io/component: metrics-adapter
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 0.9.0
|
app.kubernetes.io/version: 0.9.1
|
||||||
name: prometheus-adapter
|
name: prometheus-adapter
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: metrics-adapter
|
app.kubernetes.io/component: metrics-adapter
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 0.9.0
|
app.kubernetes.io/version: 0.9.1
|
||||||
name: resource-metrics-auth-reader
|
name: resource-metrics-auth-reader
|
||||||
namespace: kube-system
|
namespace: kube-system
|
||||||
roleRef:
|
roleRef:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: metrics-adapter
|
app.kubernetes.io/component: metrics-adapter
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 0.9.0
|
app.kubernetes.io/version: 0.9.1
|
||||||
name: prometheus-adapter
|
name: prometheus-adapter
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
|
|||||||
@@ -5,6 +5,6 @@ metadata:
|
|||||||
app.kubernetes.io/component: metrics-adapter
|
app.kubernetes.io/component: metrics-adapter
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 0.9.0
|
app.kubernetes.io/version: 0.9.1
|
||||||
name: prometheus-adapter
|
name: prometheus-adapter
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
|
|||||||
@@ -5,13 +5,18 @@ metadata:
|
|||||||
app.kubernetes.io/component: metrics-adapter
|
app.kubernetes.io/component: metrics-adapter
|
||||||
app.kubernetes.io/name: prometheus-adapter
|
app.kubernetes.io/name: prometheus-adapter
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 0.9.0
|
app.kubernetes.io/version: 0.9.1
|
||||||
name: prometheus-adapter
|
name: prometheus-adapter
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
endpoints:
|
endpoints:
|
||||||
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||||
interval: 30s
|
interval: 30s
|
||||||
|
metricRelabelings:
|
||||||
|
- action: drop
|
||||||
|
regex: (apiserver_client_certificate_.*|apiserver_envelope_.*|apiserver_flowcontrol_.*|apiserver_storage_.*|apiserver_webhooks_.*|workqueue_.*)
|
||||||
|
sourceLabels:
|
||||||
|
- __name__
|
||||||
port: https
|
port: https
|
||||||
scheme: https
|
scheme: https
|
||||||
tlsConfig:
|
tlsConfig:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: prometheus
|
app.kubernetes.io/component: prometheus
|
||||||
app.kubernetes.io/name: prometheus
|
app.kubernetes.io/name: prometheus
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.29.2
|
app.kubernetes.io/version: 2.30.3
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
rules:
|
rules:
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: prometheus
|
app.kubernetes.io/component: prometheus
|
||||||
app.kubernetes.io/name: prometheus
|
app.kubernetes.io/name: prometheus
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.29.2
|
app.kubernetes.io/version: 2.30.3
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
roleRef:
|
roleRef:
|
||||||
apiGroup: rbac.authorization.k8s.io
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 0.50.0
|
app.kubernetes.io/version: 0.51.2
|
||||||
prometheus: k8s
|
prometheus: k8s
|
||||||
role: alert-rules
|
role: alert-rules
|
||||||
name: prometheus-operator-rules
|
name: prometheus-operator-rules
|
||||||
@@ -16,7 +16,8 @@ spec:
|
|||||||
rules:
|
rules:
|
||||||
- alert: PrometheusOperatorListErrors
|
- alert: PrometheusOperatorListErrors
|
||||||
annotations:
|
annotations:
|
||||||
description: Errors while performing List operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.
|
description: Errors while performing List operations in controller {{$labels.controller}}
|
||||||
|
in {{$labels.namespace}} namespace.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorlisterrors
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorlisterrors
|
||||||
summary: Errors while performing list operations in controller.
|
summary: Errors while performing list operations in controller.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -26,7 +27,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: PrometheusOperatorWatchErrors
|
- alert: PrometheusOperatorWatchErrors
|
||||||
annotations:
|
annotations:
|
||||||
description: Errors while performing watch operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.
|
description: Errors while performing watch operations in controller {{$labels.controller}}
|
||||||
|
in {{$labels.namespace}} namespace.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorwatcherrors
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorwatcherrors
|
||||||
summary: Errors while performing watch operations in controller.
|
summary: Errors while performing watch operations in controller.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -36,7 +38,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: PrometheusOperatorSyncFailed
|
- alert: PrometheusOperatorSyncFailed
|
||||||
annotations:
|
annotations:
|
||||||
description: Controller {{ $labels.controller }} in {{ $labels.namespace }} namespace fails to reconcile {{ $value }} objects.
|
description: Controller {{ $labels.controller }} in {{ $labels.namespace }}
|
||||||
|
namespace fails to reconcile {{ $value }} objects.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorsyncfailed
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorsyncfailed
|
||||||
summary: Last controller reconciliation failed
|
summary: Last controller reconciliation failed
|
||||||
expr: |
|
expr: |
|
||||||
@@ -46,7 +49,9 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: PrometheusOperatorReconcileErrors
|
- alert: PrometheusOperatorReconcileErrors
|
||||||
annotations:
|
annotations:
|
||||||
description: '{{ $value | humanizePercentage }} of reconciling operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.'
|
description: '{{ $value | humanizePercentage }} of reconciling operations
|
||||||
|
failed for {{ $labels.controller }} controller in {{ $labels.namespace }}
|
||||||
|
namespace.'
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorreconcileerrors
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorreconcileerrors
|
||||||
summary: Errors while reconciling controller.
|
summary: Errors while reconciling controller.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -56,7 +61,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: PrometheusOperatorNodeLookupErrors
|
- alert: PrometheusOperatorNodeLookupErrors
|
||||||
annotations:
|
annotations:
|
||||||
description: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.
|
description: Errors while reconciling Prometheus in {{ $labels.namespace }}
|
||||||
|
Namespace.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatornodelookuperrors
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatornodelookuperrors
|
||||||
summary: Errors while reconciling Prometheus.
|
summary: Errors while reconciling Prometheus.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -66,7 +72,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: PrometheusOperatorNotReady
|
- alert: PrometheusOperatorNotReady
|
||||||
annotations:
|
annotations:
|
||||||
description: Prometheus operator in {{ $labels.namespace }} namespace isn't ready to reconcile {{ $labels.controller }} resources.
|
description: Prometheus operator in {{ $labels.namespace }} namespace isn't
|
||||||
|
ready to reconcile {{ $labels.controller }} resources.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatornotready
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatornotready
|
||||||
summary: Prometheus operator not ready
|
summary: Prometheus operator not ready
|
||||||
expr: |
|
expr: |
|
||||||
@@ -76,7 +83,9 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: PrometheusOperatorRejectedResources
|
- alert: PrometheusOperatorRejectedResources
|
||||||
annotations:
|
annotations:
|
||||||
description: Prometheus operator in {{ $labels.namespace }} namespace rejected {{ printf "%0.0f" $value }} {{ $labels.controller }}/{{ $labels.resource }} resources.
|
description: Prometheus operator in {{ $labels.namespace }} namespace rejected
|
||||||
|
{{ printf "%0.0f" $value }} {{ $labels.controller }}/{{ $labels.resource
|
||||||
|
}} resources.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorrejectedresources
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/prometheusoperatorrejectedresources
|
||||||
summary: Resources rejected by Prometheus operator
|
summary: Resources rejected by Prometheus operator
|
||||||
expr: |
|
expr: |
|
||||||
@@ -84,3 +93,17 @@ spec:
|
|||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
|
- name: config-reloaders
|
||||||
|
rules:
|
||||||
|
- alert: ConfigReloaderSidecarErrors
|
||||||
|
annotations:
|
||||||
|
description: |-
|
||||||
|
Errors encountered while the {{$labels.pod}} config-reloader sidecar attempts to sync config in {{$labels.namespace}} namespace.
|
||||||
|
As a result, configuration for service running in {{$labels.pod}} may be stale and cannot be updated anymore.
|
||||||
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus-operator/configreloadersidecarerrors
|
||||||
|
summary: config-reloader sidecar has not had a successful reload for 10m
|
||||||
|
expr: |
|
||||||
|
max_over_time(reloader_last_reload_successful{namespace=~".+"}[5m]) == 0
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 0.50.0
|
app.kubernetes.io/version: 0.51.2
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
@@ -21,4 +21,4 @@ spec:
|
|||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 0.50.0
|
app.kubernetes.io/version: 0.51.2
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
apiVersion: policy/v1beta1
|
apiVersion: policy/v1
|
||||||
kind: PodDisruptionBudget
|
kind: PodDisruptionBudget
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: prometheus
|
app.kubernetes.io/component: prometheus
|
||||||
app.kubernetes.io/name: prometheus
|
app.kubernetes.io/name: prometheus
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.29.2
|
app.kubernetes.io/version: 2.30.3
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: prometheus
|
app.kubernetes.io/component: prometheus
|
||||||
app.kubernetes.io/name: prometheus
|
app.kubernetes.io/name: prometheus
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.29.2
|
app.kubernetes.io/version: 2.30.3
|
||||||
prometheus: k8s
|
prometheus: k8s
|
||||||
name: k8s
|
name: k8s
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
@@ -18,7 +18,7 @@ spec:
|
|||||||
port: web
|
port: web
|
||||||
enableFeatures: []
|
enableFeatures: []
|
||||||
externalLabels: {}
|
externalLabels: {}
|
||||||
image: quay.io/prometheus/prometheus:v2.29.2
|
image: quay.io/prometheus/prometheus:v2.30.3
|
||||||
nodeSelector:
|
nodeSelector:
|
||||||
kubernetes.io/os: linux
|
kubernetes.io/os: linux
|
||||||
podMetadata:
|
podMetadata:
|
||||||
@@ -26,7 +26,7 @@ spec:
|
|||||||
app.kubernetes.io/component: prometheus
|
app.kubernetes.io/component: prometheus
|
||||||
app.kubernetes.io/name: prometheus
|
app.kubernetes.io/name: prometheus
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.29.2
|
app.kubernetes.io/version: 2.30.3
|
||||||
podMonitorNamespaceSelector: {}
|
podMonitorNamespaceSelector: {}
|
||||||
podMonitorSelector: {}
|
podMonitorSelector: {}
|
||||||
probeNamespaceSelector: {}
|
probeNamespaceSelector: {}
|
||||||
@@ -44,4 +44,4 @@ spec:
|
|||||||
serviceAccountName: prometheus-k8s
|
serviceAccountName: prometheus-k8s
|
||||||
serviceMonitorNamespaceSelector: {}
|
serviceMonitorNamespaceSelector: {}
|
||||||
serviceMonitorSelector: {}
|
serviceMonitorSelector: {}
|
||||||
version: 2.29.2
|
version: 2.30.3
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: prometheus
|
app.kubernetes.io/component: prometheus
|
||||||
app.kubernetes.io/name: prometheus
|
app.kubernetes.io/name: prometheus
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.29.2
|
app.kubernetes.io/version: 2.30.3
|
||||||
prometheus: k8s
|
prometheus: k8s
|
||||||
role: alert-rules
|
role: alert-rules
|
||||||
name: prometheus-k8s-prometheus-rules
|
name: prometheus-k8s-prometheus-rules
|
||||||
@@ -16,7 +16,8 @@ spec:
|
|||||||
rules:
|
rules:
|
||||||
- alert: PrometheusBadConfig
|
- alert: PrometheusBadConfig
|
||||||
annotations:
|
annotations:
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to reload its configuration.
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to
|
||||||
|
reload its configuration.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusbadconfig
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusbadconfig
|
||||||
summary: Failed Prometheus configuration reload.
|
summary: Failed Prometheus configuration reload.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -28,9 +29,11 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: PrometheusNotificationQueueRunningFull
|
- alert: PrometheusNotificationQueueRunningFull
|
||||||
annotations:
|
annotations:
|
||||||
description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} is running full.
|
description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}}
|
||||||
|
is running full.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotificationqueuerunningfull
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotificationqueuerunningfull
|
||||||
summary: Prometheus alert notification queue predicted to run full in less than 30m.
|
summary: Prometheus alert notification queue predicted to run full in less
|
||||||
|
than 30m.
|
||||||
expr: |
|
expr: |
|
||||||
# Without min_over_time, failed scrapes could create false negatives, see
|
# Without min_over_time, failed scrapes could create false negatives, see
|
||||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
@@ -44,9 +47,11 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
|
- alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
|
||||||
annotations:
|
annotations:
|
||||||
description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.'
|
description: '{{ printf "%.1f" $value }}% errors while sending alerts from
|
||||||
|
Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.'
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstosomealertmanagers
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstosomealertmanagers
|
||||||
summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
|
summary: Prometheus has encountered more than 1% errors sending alerts to
|
||||||
|
a specific Alertmanager.
|
||||||
expr: |
|
expr: |
|
||||||
(
|
(
|
||||||
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
|
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||||
@@ -60,7 +65,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: PrometheusNotConnectedToAlertmanagers
|
- alert: PrometheusNotConnectedToAlertmanagers
|
||||||
annotations:
|
annotations:
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected to any Alertmanagers.
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected
|
||||||
|
to any Alertmanagers.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotconnectedtoalertmanagers
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotconnectedtoalertmanagers
|
||||||
summary: Prometheus is not connected to any Alertmanagers.
|
summary: Prometheus is not connected to any Alertmanagers.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -72,7 +78,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: PrometheusTSDBReloadsFailing
|
- alert: PrometheusTSDBReloadsFailing
|
||||||
annotations:
|
annotations:
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} reload failures over the last 3h.
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected
|
||||||
|
{{$value | humanize}} reload failures over the last 3h.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbreloadsfailing
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbreloadsfailing
|
||||||
summary: Prometheus has issues reloading blocks from disk.
|
summary: Prometheus has issues reloading blocks from disk.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -82,7 +89,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: PrometheusTSDBCompactionsFailing
|
- alert: PrometheusTSDBCompactionsFailing
|
||||||
annotations:
|
annotations:
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} compaction failures over the last 3h.
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected
|
||||||
|
{{$value | humanize}} compaction failures over the last 3h.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbcompactionsfailing
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustsdbcompactionsfailing
|
||||||
summary: Prometheus has issues compacting blocks.
|
summary: Prometheus has issues compacting blocks.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -92,7 +100,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: PrometheusNotIngestingSamples
|
- alert: PrometheusNotIngestingSamples
|
||||||
annotations:
|
annotations:
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting samples.
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting
|
||||||
|
samples.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotingestingsamples
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusnotingestingsamples
|
||||||
summary: Prometheus is not ingesting samples.
|
summary: Prometheus is not ingesting samples.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -110,7 +119,9 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: PrometheusDuplicateTimestamps
|
- alert: PrometheusDuplicateTimestamps
|
||||||
annotations:
|
annotations:
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with different values but duplicated timestamp.
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping
|
||||||
|
{{ printf "%.4g" $value }} samples/s with different values but duplicated
|
||||||
|
timestamp.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusduplicatetimestamps
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusduplicatetimestamps
|
||||||
summary: Prometheus is dropping samples with duplicate timestamps.
|
summary: Prometheus is dropping samples with duplicate timestamps.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -120,7 +131,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: PrometheusOutOfOrderTimestamps
|
- alert: PrometheusOutOfOrderTimestamps
|
||||||
annotations:
|
annotations:
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with timestamps arriving out of order.
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping
|
||||||
|
{{ printf "%.4g" $value }} samples/s with timestamps arriving out of order.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusoutofordertimestamps
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusoutofordertimestamps
|
||||||
summary: Prometheus drops samples with out-of-order timestamps.
|
summary: Prometheus drops samples with out-of-order timestamps.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -130,7 +142,9 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: PrometheusRemoteStorageFailures
|
- alert: PrometheusRemoteStorageFailures
|
||||||
annotations:
|
annotations:
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send
|
||||||
|
{{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{
|
||||||
|
$labels.url }}
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotestoragefailures
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotestoragefailures
|
||||||
summary: Prometheus fails to send samples to remote storage.
|
summary: Prometheus fails to send samples to remote storage.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -150,7 +164,9 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: PrometheusRemoteWriteBehind
|
- alert: PrometheusRemoteWriteBehind
|
||||||
annotations:
|
annotations:
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
|
||||||
|
is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url
|
||||||
|
}}.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritebehind
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritebehind
|
||||||
summary: Prometheus remote write is behind.
|
summary: Prometheus remote write is behind.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -167,9 +183,14 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: PrometheusRemoteWriteDesiredShards
|
- alert: PrometheusRemoteWriteDesiredShards
|
||||||
annotations:
|
annotations:
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}` $labels.instance | query | first | value }}.
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
|
||||||
|
desired shards calculation wants to run {{ $value }} shards for queue {{
|
||||||
|
$labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{
|
||||||
|
printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}`
|
||||||
|
$labels.instance | query | first | value }}.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritedesiredshards
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusremotewritedesiredshards
|
||||||
summary: Prometheus remote write desired shards calculation wants to run more than configured max shards.
|
summary: Prometheus remote write desired shards calculation wants to run more
|
||||||
|
than configured max shards.
|
||||||
expr: |
|
expr: |
|
||||||
# Without max_over_time, failed scrapes could create false negatives, see
|
# Without max_over_time, failed scrapes could create false negatives, see
|
||||||
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
|
||||||
@@ -183,7 +204,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: PrometheusRuleFailures
|
- alert: PrometheusRuleFailures
|
||||||
annotations:
|
annotations:
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to evaluate {{ printf "%.0f" $value }} rules in the last 5m.
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to
|
||||||
|
evaluate {{ printf "%.0f" $value }} rules in the last 5m.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusrulefailures
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusrulefailures
|
||||||
summary: Prometheus is failing rule evaluations.
|
summary: Prometheus is failing rule evaluations.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -193,7 +215,8 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: PrometheusMissingRuleEvaluations
|
- alert: PrometheusMissingRuleEvaluations
|
||||||
annotations:
|
annotations:
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{ printf "%.0f" $value }} rule group evaluations in the last 5m.
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{
|
||||||
|
printf "%.0f" $value }} rule group evaluations in the last 5m.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusmissingruleevaluations
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheusmissingruleevaluations
|
||||||
summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
|
summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -203,9 +226,12 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: PrometheusTargetLimitHit
|
- alert: PrometheusTargetLimitHit
|
||||||
annotations:
|
annotations:
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because the number of targets exceeded the configured target_limit.
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped
|
||||||
|
{{ printf "%.0f" $value }} targets because the number of targets exceeded
|
||||||
|
the configured target_limit.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetlimithit
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetlimithit
|
||||||
summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit.
|
summary: Prometheus has dropped targets because some scrape configs have exceeded
|
||||||
|
the targets limit.
|
||||||
expr: |
|
expr: |
|
||||||
increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||||
for: 15m
|
for: 15m
|
||||||
@@ -213,9 +239,12 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: PrometheusLabelLimitHit
|
- alert: PrometheusLabelLimitHit
|
||||||
annotations:
|
annotations:
|
||||||
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit.
|
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped
|
||||||
|
{{ printf "%.0f" $value }} targets because some samples exceeded the configured
|
||||||
|
label_limit, label_name_length_limit or label_value_length_limit.
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuslabellimithit
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuslabellimithit
|
||||||
summary: Prometheus has dropped targets because some scrape configs have exceeded the labels limit.
|
summary: Prometheus has dropped targets because some scrape configs have exceeded
|
||||||
|
the labels limit.
|
||||||
expr: |
|
expr: |
|
||||||
increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
|
||||||
for: 15m
|
for: 15m
|
||||||
@@ -223,7 +252,8 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- alert: PrometheusTargetSyncFailure
|
- alert: PrometheusTargetSyncFailure
|
||||||
annotations:
|
annotations:
|
||||||
description: '{{ printf "%.0f" $value }} targets in Prometheus {{$labels.namespace}}/{{$labels.pod}} have failed to sync because invalid configuration was supplied.'
|
description: '{{ printf "%.0f" $value }} targets in Prometheus {{$labels.namespace}}/{{$labels.pod}}
|
||||||
|
have failed to sync because invalid configuration was supplied.'
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetsyncfailure
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheustargetsyncfailure
|
||||||
summary: Prometheus has failed to sync targets.
|
summary: Prometheus has failed to sync targets.
|
||||||
expr: |
|
expr: |
|
||||||
@@ -233,7 +263,8 @@ spec:
|
|||||||
severity: critical
|
severity: critical
|
||||||
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager
|
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager
|
||||||
annotations:
|
annotations:
|
||||||
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
|
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts
|
||||||
|
from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
|
||||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstoanyalertmanager
|
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/prometheus/prometheuserrorsendingalertstoanyalertmanager
|
||||||
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
|
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
|
||||||
expr: |
|
expr: |
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: prometheus
|
app.kubernetes.io/component: prometheus
|
||||||
app.kubernetes.io/name: prometheus
|
app.kubernetes.io/name: prometheus
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.29.2
|
app.kubernetes.io/version: 2.30.3
|
||||||
name: prometheus-k8s-config
|
name: prometheus-k8s-config
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
roleRef:
|
roleRef:
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ items:
|
|||||||
app.kubernetes.io/component: prometheus
|
app.kubernetes.io/component: prometheus
|
||||||
app.kubernetes.io/name: prometheus
|
app.kubernetes.io/name: prometheus
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.29.2
|
app.kubernetes.io/version: 2.30.3
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: default
|
namespace: default
|
||||||
roleRef:
|
roleRef:
|
||||||
@@ -25,7 +25,7 @@ items:
|
|||||||
app.kubernetes.io/component: prometheus
|
app.kubernetes.io/component: prometheus
|
||||||
app.kubernetes.io/name: prometheus
|
app.kubernetes.io/name: prometheus
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.29.2
|
app.kubernetes.io/version: 2.30.3
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: kube-system
|
namespace: kube-system
|
||||||
roleRef:
|
roleRef:
|
||||||
@@ -43,7 +43,7 @@ items:
|
|||||||
app.kubernetes.io/component: prometheus
|
app.kubernetes.io/component: prometheus
|
||||||
app.kubernetes.io/name: prometheus
|
app.kubernetes.io/name: prometheus
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.29.2
|
app.kubernetes.io/version: 2.30.3
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
roleRef:
|
roleRef:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: prometheus
|
app.kubernetes.io/component: prometheus
|
||||||
app.kubernetes.io/name: prometheus
|
app.kubernetes.io/name: prometheus
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.29.2
|
app.kubernetes.io/version: 2.30.3
|
||||||
name: prometheus-k8s-config
|
name: prometheus-k8s-config
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
rules:
|
rules:
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ items:
|
|||||||
app.kubernetes.io/component: prometheus
|
app.kubernetes.io/component: prometheus
|
||||||
app.kubernetes.io/name: prometheus
|
app.kubernetes.io/name: prometheus
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.29.2
|
app.kubernetes.io/version: 2.30.3
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: default
|
namespace: default
|
||||||
rules:
|
rules:
|
||||||
@@ -44,7 +44,7 @@ items:
|
|||||||
app.kubernetes.io/component: prometheus
|
app.kubernetes.io/component: prometheus
|
||||||
app.kubernetes.io/name: prometheus
|
app.kubernetes.io/name: prometheus
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.29.2
|
app.kubernetes.io/version: 2.30.3
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: kube-system
|
namespace: kube-system
|
||||||
rules:
|
rules:
|
||||||
@@ -81,7 +81,7 @@ items:
|
|||||||
app.kubernetes.io/component: prometheus
|
app.kubernetes.io/component: prometheus
|
||||||
app.kubernetes.io/name: prometheus
|
app.kubernetes.io/name: prometheus
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.29.2
|
app.kubernetes.io/version: 2.30.3
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
rules:
|
rules:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ metadata:
|
|||||||
app.kubernetes.io/component: prometheus
|
app.kubernetes.io/component: prometheus
|
||||||
app.kubernetes.io/name: prometheus
|
app.kubernetes.io/name: prometheus
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.29.2
|
app.kubernetes.io/version: 2.30.3
|
||||||
prometheus: k8s
|
prometheus: k8s
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
@@ -14,8 +14,10 @@ spec:
|
|||||||
- name: web
|
- name: web
|
||||||
port: 9090
|
port: 9090
|
||||||
targetPort: web
|
targetPort: web
|
||||||
|
- name: reloader-web
|
||||||
|
port: 8080
|
||||||
|
targetPort: reloader-web
|
||||||
selector:
|
selector:
|
||||||
app: prometheus
|
|
||||||
app.kubernetes.io/component: prometheus
|
app.kubernetes.io/component: prometheus
|
||||||
app.kubernetes.io/name: prometheus
|
app.kubernetes.io/name: prometheus
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
|
|||||||
@@ -5,6 +5,6 @@ metadata:
|
|||||||
app.kubernetes.io/component: prometheus
|
app.kubernetes.io/component: prometheus
|
||||||
app.kubernetes.io/name: prometheus
|
app.kubernetes.io/name: prometheus
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.29.2
|
app.kubernetes.io/version: 2.30.3
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
|
|||||||
@@ -5,13 +5,15 @@ metadata:
|
|||||||
app.kubernetes.io/component: prometheus
|
app.kubernetes.io/component: prometheus
|
||||||
app.kubernetes.io/name: prometheus
|
app.kubernetes.io/name: prometheus
|
||||||
app.kubernetes.io/part-of: kube-prometheus
|
app.kubernetes.io/part-of: kube-prometheus
|
||||||
app.kubernetes.io/version: 2.29.2
|
app.kubernetes.io/version: 2.30.3
|
||||||
name: prometheus-k8s
|
name: prometheus-k8s
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
endpoints:
|
endpoints:
|
||||||
- interval: 30s
|
- interval: 30s
|
||||||
port: web
|
port: web
|
||||||
|
- interval: 30s
|
||||||
|
port: reloader-web
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app.kubernetes.io/component: prometheus
|
app.kubernetes.io/component: prometheus
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user