Compare commits
26 Commits
release-0.
...
release-0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1f1140b2eb | ||
|
|
f0b43ca7fc | ||
|
|
3ae6b4022f | ||
|
|
1dcc2116e9 | ||
|
|
6e69256e2a | ||
|
|
83412cc25e | ||
|
|
6e09b9d0ab | ||
|
|
b5fbe81d9a | ||
|
|
92ff6e72ca | ||
|
|
486fa0797b | ||
|
|
0a96ba0d92 | ||
|
|
850326d6e0 | ||
|
|
98c8346efe | ||
|
|
84b2aaaabe | ||
|
|
17b28b9381 | ||
|
|
56bd1160c0 | ||
|
|
3caeca9803 | ||
|
|
3624e83af6 | ||
|
|
db694f9965 | ||
|
|
efa1b8c189 | ||
|
|
767b0d953f | ||
|
|
69fffc34c6 | ||
|
|
2a119837eb | ||
|
|
6c28f1458a | ||
|
|
c065734d30 | ||
|
|
da94c33d4f |
54
.github/workflows/ci.yaml
vendored
Normal file
54
.github/workflows/ci.yaml
vendored
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
name: ci
|
||||||
|
on:
|
||||||
|
- push
|
||||||
|
- pull_request
|
||||||
|
env:
|
||||||
|
golang-version: '1.15'
|
||||||
|
kind-version: 'v0.11.1'
|
||||||
|
jobs:
|
||||||
|
generate:
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os:
|
||||||
|
- macos-latest
|
||||||
|
- ubuntu-latest
|
||||||
|
name: Generate
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- uses: actions/setup-go@v2
|
||||||
|
with:
|
||||||
|
go-version: ${{ env.golang-version }}
|
||||||
|
- run: make --always-make generate && git diff --exit-code
|
||||||
|
unit-tests:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
name: Unit tests
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- run: make --always-make test
|
||||||
|
e2e-tests:
|
||||||
|
name: E2E tests
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
kind-image:
|
||||||
|
- 'kindest/node:v1.18.19'
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- name: Start KinD
|
||||||
|
uses: engineerd/setup-kind@v0.5.0
|
||||||
|
with:
|
||||||
|
version: ${{ env.kind-version }}
|
||||||
|
image: ${{ matrix.kind-image }}
|
||||||
|
wait: 300s
|
||||||
|
- name: Wait for cluster to finish bootstraping
|
||||||
|
run: kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout=300s
|
||||||
|
- name: Create kube-prometheus stack
|
||||||
|
run: |
|
||||||
|
kubectl create -f manifests/setup
|
||||||
|
until kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done
|
||||||
|
kubectl create -f manifests/
|
||||||
|
- name: Run tests
|
||||||
|
run: |
|
||||||
|
export KUBECONFIG="${HOME}/.kube/config"
|
||||||
|
make test-e2e
|
||||||
21
.travis.yml
21
.travis.yml
@@ -1,21 +0,0 @@
|
|||||||
sudo: required
|
|
||||||
dist: xenial
|
|
||||||
language: go
|
|
||||||
|
|
||||||
go:
|
|
||||||
- "1.13.x"
|
|
||||||
go_import_path: github.com/coreos/kube-prometheus
|
|
||||||
|
|
||||||
cache:
|
|
||||||
directories:
|
|
||||||
- $GOCACHE
|
|
||||||
- $GOPATH/pkg/mod
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
include:
|
|
||||||
- name: Check generated files
|
|
||||||
script: make --always-make generate && git diff --exit-code
|
|
||||||
- name: Run tests
|
|
||||||
script: make --always-make test
|
|
||||||
- name: Run e2e tests
|
|
||||||
script: ./tests/e2e/travis-e2e.sh
|
|
||||||
4
Makefile
4
Makefile
@@ -33,6 +33,10 @@ vendor: $(JB_BIN) jsonnetfile.json jsonnetfile.lock.json
|
|||||||
rm -rf vendor
|
rm -rf vendor
|
||||||
$(JB_BIN) install
|
$(JB_BIN) install
|
||||||
|
|
||||||
|
.PHONY: update
|
||||||
|
update: $(JB_BIN)
|
||||||
|
$(JB_BIN) update
|
||||||
|
|
||||||
.PHONY: fmt
|
.PHONY: fmt
|
||||||
fmt: $(JSONNETFMT_BIN)
|
fmt: $(JSONNETFMT_BIN)
|
||||||
find . -name 'vendor' -prune -o -name '*.libsonnet' -o -name '*.jsonnet' -print | \
|
find . -name 'vendor' -prune -o -name '*.libsonnet' -o -name '*.jsonnet' -print | \
|
||||||
|
|||||||
17
README.md
17
README.md
@@ -76,7 +76,7 @@ This adapter is an Extension API Server and Kubernetes needs to be have this fea
|
|||||||
To try out this stack, start [minikube](https://github.com/kubernetes/minikube) with the following command:
|
To try out this stack, start [minikube](https://github.com/kubernetes/minikube) with the following command:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ minikube delete && minikube start --kubernetes-version=v1.17.3 --memory=6g --bootstrapper=kubeadm --extra-config=kubelet.authentication-token-webhook=true --extra-config=kubelet.authorization-mode=Webhook --extra-config=scheduler.address=0.0.0.0 --extra-config=controller-manager.address=0.0.0.0
|
$ minikube delete && minikube start --kubernetes-version=v1.18.1 --memory=6g --bootstrapper=kubeadm --extra-config=kubelet.authentication-token-webhook=true --extra-config=kubelet.authorization-mode=Webhook --extra-config=scheduler.address=0.0.0.0 --extra-config=controller-manager.address=0.0.0.0
|
||||||
```
|
```
|
||||||
|
|
||||||
The kube-prometheus stack includes a resource metrics API server, so the metrics-server addon is not necessary. Ensure the metrics-server addon is disabled on minikube:
|
The kube-prometheus stack includes a resource metrics API server, so the metrics-server addon is not necessary. Ensure the metrics-server addon is disabled on minikube:
|
||||||
@@ -89,15 +89,18 @@ $ minikube addons disable metrics-server
|
|||||||
|
|
||||||
### Kubernetes compatibility matrix
|
### Kubernetes compatibility matrix
|
||||||
|
|
||||||
| kube-prometheus stack | Kubernetes 1.14 | Kubernetes 1.15 | Kubernetes 1.16 | Kubernetes 1.17 |
|
The following versions are supported and work as we test against these versions in their respective branches. But note that other versions might work!
|
||||||
|-----------------------|-----------------|-----------------|-----------------|-----------------|
|
|
||||||
| `release-0.3` | ✔ | ✔ | ✔ | ✔ |
|
| kube-prometheus stack | Kubernetes 1.14 | Kubernetes 1.15 | Kubernetes 1.16 | Kubernetes 1.17 | Kubernetes 1.18 |
|
||||||
| `release-0.4` | ✗ | ✗ | ✔ | ✔ |
|
|-----------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
|
||||||
| `HEAD` | ✗ | ✗ | ✗ | ✔ |
|
| `release-0.3` | ✔ | ✔ | ✔ | ✔ | ✗
|
||||||
|
| `release-0.4` | ✗ | ✗ | ✔ | ✔ | ✗
|
||||||
|
| `release-0.5` | ✗ | ✗ | ✗ | ✗ | ✔
|
||||||
|
| `HEAD` | ✗ | ✗ | ✗ | ✗ | ✔
|
||||||
|
|
||||||
## Quickstart
|
## Quickstart
|
||||||
|
|
||||||
>Note: For versions before Kubernetes v1.17.0 refer to the [Kubernetes compatibility matrix](#kubernetes-compatibility-matrix) in order to choose a compatible branch.
|
>Note: For versions before Kubernetes v1.18.z refer to the [Kubernetes compatibility matrix](#kubernetes-compatibility-matrix) in order to choose a compatible branch.
|
||||||
|
|
||||||
This project is intended to be used as a library (i.e. the intent is not for you to create your own modified copy of this repository).
|
This project is intended to be used as a library (i.e. the intent is not for you to create your own modified copy of this repository).
|
||||||
|
|
||||||
|
|||||||
@@ -7,10 +7,15 @@
|
|||||||
{
|
{
|
||||||
alert: 'AlertmanagerConfigInconsistent',
|
alert: 'AlertmanagerConfigInconsistent',
|
||||||
annotations: {
|
annotations: {
|
||||||
message: 'The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.',
|
message: |||
|
||||||
|
The configuration of the instances of the Alertmanager cluster `{{ $labels.namespace }}/{{ $labels.service }}` are out of sync.
|
||||||
|
{{ range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query }}
|
||||||
|
Configuration hash for pod {{ .Labels.pod }} is "{{ printf "%.f" .Value }}"
|
||||||
|
{{ end }}
|
||||||
|
|||,
|
||||||
},
|
},
|
||||||
expr: |||
|
expr: |||
|
||||||
count_values("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{%(prometheusOperatorSelector)s,controller="alertmanager"}) by (name, job, namespace, controller), "service", "alertmanager-$1", "name", "(.*)") != 1
|
count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s})) != 1
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
'for': '5m',
|
'for': '5m',
|
||||||
labels: {
|
labels: {
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
"subdir": "grafana"
|
"subdir": "grafana"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "master"
|
"version": "release-0.1"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -17,7 +17,7 @@
|
|||||||
"subdir": "Documentation/etcd-mixin"
|
"subdir": "Documentation/etcd-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "master"
|
"version": "release-3.4"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -35,7 +35,7 @@
|
|||||||
"subdir": ""
|
"subdir": ""
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "master",
|
"version": "0d2f82676817bbf9e4acf6495b2090205f323b9f",
|
||||||
"name": "ksonnet"
|
"name": "ksonnet"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -45,7 +45,7 @@
|
|||||||
"subdir": ""
|
"subdir": ""
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "master"
|
"version": "release-0.4"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -54,7 +54,7 @@
|
|||||||
"subdir": "jsonnet/kube-state-metrics"
|
"subdir": "jsonnet/kube-state-metrics"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "master"
|
"version": "release-1.9"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -63,7 +63,7 @@
|
|||||||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
"subdir": "jsonnet/kube-state-metrics-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "master"
|
"version": "release-1.9"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -72,7 +72,7 @@
|
|||||||
"subdir": "docs/node-mixin"
|
"subdir": "docs/node-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "master"
|
"version": "fa4edd700ebc1b3614bcd953c215d3f2ab2e0b35"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -81,7 +81,7 @@
|
|||||||
"subdir": "documentation/prometheus-mixin"
|
"subdir": "documentation/prometheus-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "master",
|
"version": "cd12f0873c3eb2031f7ba9b2e169449aa1012e3f",
|
||||||
"name": "prometheus"
|
"name": "prometheus"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -18,8 +18,8 @@
|
|||||||
"subdir": "Documentation/etcd-mixin"
|
"subdir": "Documentation/etcd-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "1166b1f195efae31439c7b3c913b4ef02e7df889",
|
"version": "41061e56ad9d654fea2ee02c851d2a74e0a8a593",
|
||||||
"sum": "Ko3qhNfC2vN/houLh6C0Ryacjv70gl0DVPGU/PQ4OD0="
|
"sum": "bkp18AxkOUYnVC15Gh9EoIi+mMAn0IT3hMzb8mlzpSw="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -28,8 +28,8 @@
|
|||||||
"subdir": "jsonnet/prometheus-operator"
|
"subdir": "jsonnet/prometheus-operator"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "00cbd4911f931380cf9e19d771d7ebae1ec0a807",
|
"version": "beaa1a519e21c8230bab86a15c04bf7e0a9267c1",
|
||||||
"sum": "PfB8G2nfy3e/BrXS1ayymsRRFJvQLWT+oY5aqLS0tE8="
|
"sum": "EOswLhTM7mx9GG0I7Ry1N3yeLB4IMzDhlYRG67mRnPg="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -38,8 +38,8 @@
|
|||||||
"subdir": "grafonnet"
|
"subdir": "grafonnet"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "906768d46973e022594d3f03d82c5a51d86de2cc",
|
"version": "3082bfca110166cd69533fa3c0875fdb1b68c329",
|
||||||
"sum": "J3Vp0EVbxTObr6KydLXsi4Rc0ssNVAEuwLc0NQ+4wqU="
|
"sum": "4/sUV0Kk+o8I+wlYxL9R6EPhL/NiLfYHk+NXlU64RUk="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -48,8 +48,8 @@
|
|||||||
"subdir": "grafana-builder"
|
"subdir": "grafana-builder"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "cb6bc2780a39afbbf9d4ee64fec8d1152023aee9",
|
"version": "0d13e5ba1b3a4c29015738c203d92ea39f71ebe2",
|
||||||
"sum": "slxrtftVDiTlQK22ertdfrg4Epnq97gdrLI63ftUfaE="
|
"sum": "GRf2GvwEU4jhXV+JOonXSZ4wdDv8mnHBPCQ6TUVd+g8="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -69,8 +69,8 @@
|
|||||||
"subdir": ""
|
"subdir": ""
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "3cc34f995c31ed6e1e92024fed1912d63569c39f",
|
"version": "f8c8c111a6e1d07170ae18f8878c091afe15c796",
|
||||||
"sum": "r5Fg4KgiBtsFPCCHtM3Cb4CEgnizLyK97srDNAcjr+Y="
|
"sum": "D9/c2FmIjPWevvNHf9agqXru4k5BKInOPE+jNRYyaaQ="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -79,8 +79,8 @@
|
|||||||
"subdir": "lib/promgrafonnet"
|
"subdir": "lib/promgrafonnet"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "3cc34f995c31ed6e1e92024fed1912d63569c39f",
|
"version": "7120319a5b5c45e8dd2e79f0ad60e2284c6d6f1b",
|
||||||
"sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc="
|
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -89,8 +89,8 @@
|
|||||||
"subdir": "jsonnet/kube-state-metrics"
|
"subdir": "jsonnet/kube-state-metrics"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "52fe3a268bd78c8f32a03361e28fdf23c41512c5",
|
"version": "e72315512a38653b19dcfe4429f93eadedc0ea96",
|
||||||
"sum": "cJjGZaLBjcIGrLHZLjRPU9c3KL+ep9rZTb9dbALSKqA="
|
"sum": "zD/pbQLnQq+5hegEelaheHS8mn1h09GTktFO74iwlBI="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
@@ -99,7 +99,7 @@
|
|||||||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
"subdir": "jsonnet/kube-state-metrics-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "52fe3a268bd78c8f32a03361e28fdf23c41512c5",
|
"version": "e72315512a38653b19dcfe4429f93eadedc0ea96",
|
||||||
"sum": "E1GGavnf9PCWBm4WVrxWnc0FIj72UcbcweqGioWrOdU="
|
"sum": "E1GGavnf9PCWBm4WVrxWnc0FIj72UcbcweqGioWrOdU="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -109,7 +109,7 @@
|
|||||||
"subdir": "docs/node-mixin"
|
"subdir": "docs/node-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "d4d2e1db98152ab6c94dc9a12a997950e0be2416",
|
"version": "fa4edd700ebc1b3614bcd953c215d3f2ab2e0b35",
|
||||||
"sum": "ZwrC0+4o1xD6+oPBu1p+rBXLlf6pMBD9rT8ygyl2aW0="
|
"sum": "ZwrC0+4o1xD6+oPBu1p+rBXLlf6pMBD9rT8ygyl2aW0="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -119,7 +119,7 @@
|
|||||||
"subdir": "documentation/prometheus-mixin"
|
"subdir": "documentation/prometheus-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "209d4bb8a1491f4535cc6d991681e7dc03bb1d56",
|
"version": "cd12f0873c3eb2031f7ba9b2e169449aa1012e3f",
|
||||||
"sum": "kRb3XBTe/AALDcaTFfyuiKqzhxtLvihBkVkvJ5cUd/I=",
|
"sum": "kRb3XBTe/AALDcaTFfyuiKqzhxtLvihBkVkvJ5cUd/I=",
|
||||||
"name": "prometheus"
|
"name": "prometheus"
|
||||||
},
|
},
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -3,7 +3,7 @@ kind: ClusterRole
|
|||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/version: 1.9.5
|
app.kubernetes.io/version: v1.9.5
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
rules:
|
rules:
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
@@ -108,10 +108,3 @@ rules:
|
|||||||
verbs:
|
verbs:
|
||||||
- list
|
- list
|
||||||
- watch
|
- watch
|
||||||
- apiGroups:
|
|
||||||
- coordination.k8s.io
|
|
||||||
resources:
|
|
||||||
- leases
|
|
||||||
verbs:
|
|
||||||
- list
|
|
||||||
- watch
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ kind: ClusterRoleBinding
|
|||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/version: 1.9.5
|
app.kubernetes.io/version: v1.9.5
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
roleRef:
|
roleRef:
|
||||||
apiGroup: rbac.authorization.k8s.io
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ kind: Deployment
|
|||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/version: 1.9.5
|
app.kubernetes.io/version: v1.9.5
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
@@ -15,7 +15,7 @@ spec:
|
|||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/version: 1.9.5
|
app.kubernetes.io/version: v1.9.5
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- args:
|
- args:
|
||||||
@@ -25,8 +25,6 @@ spec:
|
|||||||
- --telemetry-port=8082
|
- --telemetry-port=8082
|
||||||
image: quay.io/coreos/kube-state-metrics:v1.9.5
|
image: quay.io/coreos/kube-state-metrics:v1.9.5
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
securityContext:
|
|
||||||
runAsUser: 65534
|
|
||||||
- args:
|
- args:
|
||||||
- --logtostderr
|
- --logtostderr
|
||||||
- --secure-listen-address=:8443
|
- --secure-listen-address=:8443
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ kind: Service
|
|||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/version: 1.9.5
|
app.kubernetes.io/version: v1.9.5
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
|
|||||||
@@ -3,6 +3,6 @@ kind: ServiceAccount
|
|||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/name: kube-state-metrics
|
app.kubernetes.io/name: kube-state-metrics
|
||||||
app.kubernetes.io/version: 1.9.5
|
app.kubernetes.io/version: v1.9.5
|
||||||
name: kube-state-metrics
|
name: kube-state-metrics
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ metadata:
|
|||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.38.1
|
app.kubernetes.io/version: v0.38.3
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
@@ -19,4 +19,4 @@ spec:
|
|||||||
matchLabels:
|
matchLabels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.38.1
|
app.kubernetes.io/version: v0.38.3
|
||||||
|
|||||||
@@ -74,7 +74,7 @@ spec:
|
|||||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d]))
|
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d]))
|
||||||
-
|
-
|
||||||
(
|
(
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[1d])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1d])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d]))
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d]))
|
||||||
)
|
)
|
||||||
@@ -95,7 +95,7 @@ spec:
|
|||||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h]))
|
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h]))
|
||||||
-
|
-
|
||||||
(
|
(
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[1h])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1h])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h]))
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h]))
|
||||||
)
|
)
|
||||||
@@ -116,7 +116,7 @@ spec:
|
|||||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h]))
|
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h]))
|
||||||
-
|
-
|
||||||
(
|
(
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[2h])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[2h])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h]))
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h]))
|
||||||
)
|
)
|
||||||
@@ -137,7 +137,7 @@ spec:
|
|||||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m]))
|
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m]))
|
||||||
-
|
-
|
||||||
(
|
(
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[30m])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30m])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m]))
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m]))
|
||||||
)
|
)
|
||||||
@@ -158,7 +158,7 @@ spec:
|
|||||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d]))
|
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d]))
|
||||||
-
|
-
|
||||||
(
|
(
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[3d])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[3d])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d]))
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d]))
|
||||||
)
|
)
|
||||||
@@ -179,7 +179,7 @@ spec:
|
|||||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m]))
|
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m]))
|
||||||
-
|
-
|
||||||
(
|
(
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[5m])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[5m])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m]))
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m]))
|
||||||
)
|
)
|
||||||
@@ -200,7 +200,7 @@ spec:
|
|||||||
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h]))
|
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h]))
|
||||||
-
|
-
|
||||||
(
|
(
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[6h])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[6h])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h])) +
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h])) +
|
||||||
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h]))
|
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h]))
|
||||||
)
|
)
|
||||||
@@ -326,81 +326,6 @@ spec:
|
|||||||
labels:
|
labels:
|
||||||
verb: write
|
verb: write
|
||||||
record: apiserver_request:burnrate6h
|
record: apiserver_request:burnrate6h
|
||||||
- expr: |
|
|
||||||
1 - (
|
|
||||||
(
|
|
||||||
# write too slow
|
|
||||||
sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
|
|
||||||
-
|
|
||||||
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
|
|
||||||
) +
|
|
||||||
(
|
|
||||||
# read too slow
|
|
||||||
sum(increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d]))
|
|
||||||
-
|
|
||||||
(
|
|
||||||
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="resource",le="0.1"}[30d])) +
|
|
||||||
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) +
|
|
||||||
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
|
|
||||||
)
|
|
||||||
) +
|
|
||||||
# errors
|
|
||||||
sum(code:apiserver_request_total:increase30d{code=~"5.."})
|
|
||||||
)
|
|
||||||
/
|
|
||||||
sum(code:apiserver_request_total:increase30d)
|
|
||||||
labels:
|
|
||||||
verb: all
|
|
||||||
record: apiserver_request:availability30d
|
|
||||||
- expr: |
|
|
||||||
1 - (
|
|
||||||
sum(increase(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30d]))
|
|
||||||
-
|
|
||||||
(
|
|
||||||
# too slow
|
|
||||||
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="resource",le="0.1"}[30d])) +
|
|
||||||
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) +
|
|
||||||
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
|
|
||||||
)
|
|
||||||
+
|
|
||||||
# errors
|
|
||||||
sum(code:apiserver_request_total:increase30d{verb="read",code=~"5.."})
|
|
||||||
)
|
|
||||||
/
|
|
||||||
sum(code:apiserver_request_total:increase30d{verb="read"})
|
|
||||||
labels:
|
|
||||||
verb: read
|
|
||||||
record: apiserver_request:availability30d
|
|
||||||
- expr: |
|
|
||||||
1 - (
|
|
||||||
(
|
|
||||||
# too slow
|
|
||||||
sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
|
|
||||||
-
|
|
||||||
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
|
|
||||||
)
|
|
||||||
+
|
|
||||||
# errors
|
|
||||||
sum(code:apiserver_request_total:increase30d{verb="write",code=~"5.."})
|
|
||||||
)
|
|
||||||
/
|
|
||||||
sum(code:apiserver_request_total:increase30d{verb="write"})
|
|
||||||
labels:
|
|
||||||
verb: write
|
|
||||||
record: apiserver_request:availability30d
|
|
||||||
- expr: |
|
|
||||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver"}[30d]))
|
|
||||||
record: code_verb:apiserver_request_total:increase30d
|
|
||||||
- expr: |
|
|
||||||
sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
|
|
||||||
labels:
|
|
||||||
verb: read
|
|
||||||
record: code:apiserver_request_total:increase30d
|
|
||||||
- expr: |
|
|
||||||
sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
|
|
||||||
labels:
|
|
||||||
verb: write
|
|
||||||
record: code:apiserver_request_total:increase30d
|
|
||||||
- expr: |
|
- expr: |
|
||||||
sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
|
sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
|
||||||
labels:
|
labels:
|
||||||
@@ -443,6 +368,153 @@ spec:
|
|||||||
labels:
|
labels:
|
||||||
quantile: "0.5"
|
quantile: "0.5"
|
||||||
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
|
||||||
|
- interval: 3m
|
||||||
|
name: kube-apiserver-availability.rules
|
||||||
|
rules:
|
||||||
|
- expr: |
|
||||||
|
1 - (
|
||||||
|
(
|
||||||
|
# write too slow
|
||||||
|
sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
|
||||||
|
-
|
||||||
|
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
|
||||||
|
) +
|
||||||
|
(
|
||||||
|
# read too slow
|
||||||
|
sum(increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d]))
|
||||||
|
-
|
||||||
|
(
|
||||||
|
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) +
|
||||||
|
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) +
|
||||||
|
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
|
||||||
|
)
|
||||||
|
) +
|
||||||
|
# errors
|
||||||
|
sum(code:apiserver_request_total:increase30d{code=~"5.."} or vector(0))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
sum(code:apiserver_request_total:increase30d)
|
||||||
|
labels:
|
||||||
|
verb: all
|
||||||
|
record: apiserver_request:availability30d
|
||||||
|
- expr: |
|
||||||
|
1 - (
|
||||||
|
sum(increase(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30d]))
|
||||||
|
-
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) +
|
||||||
|
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) +
|
||||||
|
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
|
||||||
|
)
|
||||||
|
+
|
||||||
|
# errors
|
||||||
|
sum(code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
sum(code:apiserver_request_total:increase30d{verb="read"})
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
record: apiserver_request:availability30d
|
||||||
|
- expr: |
|
||||||
|
1 - (
|
||||||
|
(
|
||||||
|
# too slow
|
||||||
|
sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
|
||||||
|
-
|
||||||
|
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
|
||||||
|
)
|
||||||
|
+
|
||||||
|
# errors
|
||||||
|
sum(code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0))
|
||||||
|
)
|
||||||
|
/
|
||||||
|
sum(code:apiserver_request_total:increase30d{verb="write"})
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
record: apiserver_request:availability30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[30d]))
|
||||||
|
record: code_verb:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
|
||||||
|
labels:
|
||||||
|
verb: read
|
||||||
|
record: code:apiserver_request_total:increase30d
|
||||||
|
- expr: |
|
||||||
|
sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
|
||||||
|
labels:
|
||||||
|
verb: write
|
||||||
|
record: code:apiserver_request_total:increase30d
|
||||||
- name: k8s.rules
|
- name: k8s.rules
|
||||||
rules:
|
rules:
|
||||||
- expr: |
|
- expr: |
|
||||||
@@ -452,31 +524,31 @@ spec:
|
|||||||
sum by (cluster, namespace, pod, container) (
|
sum by (cluster, namespace, pod, container) (
|
||||||
rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])
|
rate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!="", container!="POD"}[5m])
|
||||||
) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (
|
) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (
|
||||||
1, max by(cluster, namespace, pod, node) (kube_pod_info)
|
1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
|
||||||
)
|
)
|
||||||
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
|
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
|
||||||
- expr: |
|
- expr: |
|
||||||
container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
||||||
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||||
max by(namespace, pod, node) (kube_pod_info)
|
max by(namespace, pod, node) (kube_pod_info{node!=""})
|
||||||
)
|
)
|
||||||
record: node_namespace_pod_container:container_memory_working_set_bytes
|
record: node_namespace_pod_container:container_memory_working_set_bytes
|
||||||
- expr: |
|
- expr: |
|
||||||
container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
||||||
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||||
max by(namespace, pod, node) (kube_pod_info)
|
max by(namespace, pod, node) (kube_pod_info{node!=""})
|
||||||
)
|
)
|
||||||
record: node_namespace_pod_container:container_memory_rss
|
record: node_namespace_pod_container:container_memory_rss
|
||||||
- expr: |
|
- expr: |
|
||||||
container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
||||||
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||||
max by(namespace, pod, node) (kube_pod_info)
|
max by(namespace, pod, node) (kube_pod_info{node!=""})
|
||||||
)
|
)
|
||||||
record: node_namespace_pod_container:container_memory_cache
|
record: node_namespace_pod_container:container_memory_cache
|
||||||
- expr: |
|
- expr: |
|
||||||
container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
|
||||||
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
* on (namespace, pod) group_left(node) topk by(namespace, pod) (1,
|
||||||
max by(namespace, pod, node) (kube_pod_info)
|
max by(namespace, pod, node) (kube_pod_info{node!=""})
|
||||||
)
|
)
|
||||||
record: node_namespace_pod_container:container_memory_swap
|
record: node_namespace_pod_container:container_memory_swap
|
||||||
- expr: |
|
- expr: |
|
||||||
@@ -591,12 +663,12 @@ spec:
|
|||||||
- name: node.rules
|
- name: node.rules
|
||||||
rules:
|
rules:
|
||||||
- expr: |
|
- expr: |
|
||||||
sum(min(kube_pod_info) by (cluster, node))
|
sum(min(kube_pod_info{node!=""}) by (cluster, node))
|
||||||
record: ':kube_pod_info_node_count:'
|
record: ':kube_pod_info_node_count:'
|
||||||
- expr: |
|
- expr: |
|
||||||
topk by(namespace, pod) (1,
|
topk by(namespace, pod) (1,
|
||||||
max by (node, namespace, pod) (
|
max by (node, namespace, pod) (
|
||||||
label_replace(kube_pod_info{job="kube-state-metrics"}, "pod", "$1", "pod", "(.*)")
|
label_replace(kube_pod_info{job="kube-state-metrics",node!=""}, "pod", "$1", "pod", "(.*)")
|
||||||
))
|
))
|
||||||
record: 'node_namespace_pod:kube_pod_info:'
|
record: 'node_namespace_pod:kube_pod_info:'
|
||||||
- expr: |
|
- expr: |
|
||||||
@@ -896,20 +968,26 @@ spec:
|
|||||||
}}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
|
}}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
|
||||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
|
||||||
expr: |
|
expr: |
|
||||||
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[15m]) * 60 * 5 > 0
|
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[5m]) * 60 * 5 > 0
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: warning
|
||||||
- alert: KubePodNotReady
|
- alert: KubePodNotReady
|
||||||
annotations:
|
annotations:
|
||||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
|
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
|
||||||
state for longer than 15 minutes.
|
state for longer than 15 minutes.
|
||||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
|
||||||
expr: |
|
expr: |
|
||||||
sum by (namespace, pod) (max by(namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}) * on(namespace, pod) group_left(owner_kind) max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!="Job"})) > 0
|
sum by (namespace, pod) (
|
||||||
|
max by(namespace, pod) (
|
||||||
|
kube_pod_status_phase{job="kube-state-metrics", phase=~"Pending|Unknown"}
|
||||||
|
) * on(namespace, pod) group_left(owner_kind) topk by(namespace, pod) (
|
||||||
|
1, max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!="Job"})
|
||||||
|
)
|
||||||
|
) > 0
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: warning
|
||||||
- alert: KubeDeploymentGenerationMismatch
|
- alert: KubeDeploymentGenerationMismatch
|
||||||
annotations:
|
annotations:
|
||||||
message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
|
message: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
|
||||||
@@ -922,7 +1000,7 @@ spec:
|
|||||||
kube_deployment_metadata_generation{job="kube-state-metrics"}
|
kube_deployment_metadata_generation{job="kube-state-metrics"}
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: warning
|
||||||
- alert: KubeDeploymentReplicasMismatch
|
- alert: KubeDeploymentReplicasMismatch
|
||||||
annotations:
|
annotations:
|
||||||
message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not
|
message: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not
|
||||||
@@ -940,7 +1018,7 @@ spec:
|
|||||||
)
|
)
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: warning
|
||||||
- alert: KubeStatefulSetReplicasMismatch
|
- alert: KubeStatefulSetReplicasMismatch
|
||||||
annotations:
|
annotations:
|
||||||
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has
|
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has
|
||||||
@@ -958,7 +1036,7 @@ spec:
|
|||||||
)
|
)
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: warning
|
||||||
- alert: KubeStatefulSetGenerationMismatch
|
- alert: KubeStatefulSetGenerationMismatch
|
||||||
annotations:
|
annotations:
|
||||||
message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
|
message: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
|
||||||
@@ -971,7 +1049,7 @@ spec:
|
|||||||
kube_statefulset_metadata_generation{job="kube-state-metrics"}
|
kube_statefulset_metadata_generation{job="kube-state-metrics"}
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: warning
|
||||||
- alert: KubeStatefulSetUpdateNotRolledOut
|
- alert: KubeStatefulSetUpdateNotRolledOut
|
||||||
annotations:
|
annotations:
|
||||||
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
|
message: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
|
||||||
@@ -991,7 +1069,7 @@ spec:
|
|||||||
)
|
)
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: warning
|
||||||
- alert: KubeDaemonSetRolloutStuck
|
- alert: KubeDaemonSetRolloutStuck
|
||||||
annotations:
|
annotations:
|
||||||
message: Only {{ $value | humanizePercentage }} of the desired Pods of DaemonSet
|
message: Only {{ $value | humanizePercentage }} of the desired Pods of DaemonSet
|
||||||
@@ -1003,7 +1081,7 @@ spec:
|
|||||||
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} < 1.00
|
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} < 1.00
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: warning
|
||||||
- alert: KubeContainerWaiting
|
- alert: KubeContainerWaiting
|
||||||
annotations:
|
annotations:
|
||||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}}
|
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}}
|
||||||
@@ -1049,11 +1127,11 @@ spec:
|
|||||||
- alert: KubeJobCompletion
|
- alert: KubeJobCompletion
|
||||||
annotations:
|
annotations:
|
||||||
message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more
|
message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more
|
||||||
than one hour to complete.
|
than 12 hours to complete.
|
||||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion
|
||||||
expr: |
|
expr: |
|
||||||
kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0
|
kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0
|
||||||
for: 1h
|
for: 12h
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeJobFailed
|
- alert: KubeJobFailed
|
||||||
@@ -1147,16 +1225,44 @@ spec:
|
|||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeQuotaExceeded
|
- alert: KubeQuotaAlmostFull
|
||||||
annotations:
|
annotations:
|
||||||
message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
||||||
}} of its {{ $labels.resource }} quota.
|
}} of its {{ $labels.resource }} quota.
|
||||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaalmostfull
|
||||||
|
summary: Namespace quota is going to be full.
|
||||||
expr: |
|
expr: |
|
||||||
kube_resourcequota{job="kube-state-metrics", type="used"}
|
kube_resourcequota{job="kube-state-metrics", type="used"}
|
||||||
/ ignoring(instance, job, type)
|
/ ignoring(instance, job, type)
|
||||||
(kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
|
(kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
|
||||||
> 0.90
|
> 0.9 < 1
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: info
|
||||||
|
- alert: KubeQuotaFullyUsed
|
||||||
|
annotations:
|
||||||
|
message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
||||||
|
}} of its {{ $labels.resource }} quota.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotafullyused
|
||||||
|
expr: |
|
||||||
|
kube_resourcequota{job="kube-state-metrics", type="used"}
|
||||||
|
/ ignoring(instance, job, type)
|
||||||
|
(kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
|
||||||
|
== 1
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: info
|
||||||
|
- alert: KubeQuotaExceeded
|
||||||
|
annotations:
|
||||||
|
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
||||||
|
}} of its {{ $labels.resource }} quota.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded
|
||||||
|
summary: Namespace quota has exceeded the limits.
|
||||||
|
expr: |
|
||||||
|
kube_resourcequota{job="kube-state-metrics", type="used"}
|
||||||
|
/ ignoring(instance, job, type)
|
||||||
|
(kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
|
||||||
|
> 1
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
@@ -1254,7 +1360,9 @@ spec:
|
|||||||
sum(apiserver_request:burnrate5m) > (14.40 * 0.01000)
|
sum(apiserver_request:burnrate5m) > (14.40 * 0.01000)
|
||||||
for: 2m
|
for: 2m
|
||||||
labels:
|
labels:
|
||||||
|
long: 1h
|
||||||
severity: critical
|
severity: critical
|
||||||
|
short: 5m
|
||||||
- alert: KubeAPIErrorBudgetBurn
|
- alert: KubeAPIErrorBudgetBurn
|
||||||
annotations:
|
annotations:
|
||||||
message: The API server is burning too much error budget
|
message: The API server is burning too much error budget
|
||||||
@@ -1265,7 +1373,9 @@ spec:
|
|||||||
sum(apiserver_request:burnrate30m) > (6.00 * 0.01000)
|
sum(apiserver_request:burnrate30m) > (6.00 * 0.01000)
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
|
long: 6h
|
||||||
severity: critical
|
severity: critical
|
||||||
|
short: 30m
|
||||||
- alert: KubeAPIErrorBudgetBurn
|
- alert: KubeAPIErrorBudgetBurn
|
||||||
annotations:
|
annotations:
|
||||||
message: The API server is burning too much error budget
|
message: The API server is burning too much error budget
|
||||||
@@ -1276,7 +1386,9 @@ spec:
|
|||||||
sum(apiserver_request:burnrate2h) > (3.00 * 0.01000)
|
sum(apiserver_request:burnrate2h) > (3.00 * 0.01000)
|
||||||
for: 1h
|
for: 1h
|
||||||
labels:
|
labels:
|
||||||
|
long: 1d
|
||||||
severity: warning
|
severity: warning
|
||||||
|
short: 2h
|
||||||
- alert: KubeAPIErrorBudgetBurn
|
- alert: KubeAPIErrorBudgetBurn
|
||||||
annotations:
|
annotations:
|
||||||
message: The API server is burning too much error budget
|
message: The API server is burning too much error budget
|
||||||
@@ -1287,7 +1399,9 @@ spec:
|
|||||||
sum(apiserver_request:burnrate6h) > (1.00 * 0.01000)
|
sum(apiserver_request:burnrate6h) > (1.00 * 0.01000)
|
||||||
for: 3h
|
for: 3h
|
||||||
labels:
|
labels:
|
||||||
|
long: 3d
|
||||||
severity: warning
|
severity: warning
|
||||||
|
short: 6h
|
||||||
- name: kubernetes-system-apiserver
|
- name: kubernetes-system-apiserver
|
||||||
rules:
|
rules:
|
||||||
- alert: KubeAPILatencyHigh
|
- alert: KubeAPILatencyHigh
|
||||||
@@ -1296,6 +1410,10 @@ spec:
|
|||||||
{{ $labels.verb }} {{ $labels.resource }}.
|
{{ $labels.verb }} {{ $labels.resource }}.
|
||||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
|
||||||
expr: |
|
expr: |
|
||||||
|
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"}
|
||||||
|
>
|
||||||
|
1
|
||||||
|
and on (verb,resource)
|
||||||
(
|
(
|
||||||
cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"}
|
cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"}
|
||||||
>
|
>
|
||||||
@@ -1307,10 +1425,6 @@ spec:
|
|||||||
)
|
)
|
||||||
) > on (verb) group_left()
|
) > on (verb) group_left()
|
||||||
1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
|
1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
|
||||||
and on (verb,resource)
|
|
||||||
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"}
|
|
||||||
>
|
|
||||||
1
|
|
||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
@@ -1676,10 +1790,13 @@ spec:
|
|||||||
rules:
|
rules:
|
||||||
- alert: AlertmanagerConfigInconsistent
|
- alert: AlertmanagerConfigInconsistent
|
||||||
annotations:
|
annotations:
|
||||||
message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}`
|
message: |
|
||||||
are out of sync.
|
The configuration of the instances of the Alertmanager cluster `{{ $labels.namespace }}/{{ $labels.service }}` are out of sync.
|
||||||
|
{{ range printf "alertmanager_config_hash{namespace=\"%s\",service=\"%s\"}" $labels.namespace $labels.service | query }}
|
||||||
|
Configuration hash for pod {{ .Labels.pod }} is "{{ printf "%.f" .Value }}"
|
||||||
|
{{ end }}
|
||||||
expr: |
|
expr: |
|
||||||
count_values("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{job="prometheus-operator",namespace="monitoring",controller="alertmanager"}) by (name, job, namespace, controller), "service", "alertmanager-$1", "name", "(.*)") != 1
|
count by(namespace,service) (count_values by(namespace,service) ("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"})) != 1
|
||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
|
|||||||
@@ -2783,6 +2783,13 @@ spec:
|
|||||||
selectors of replication controllers and services. More info:
|
selectors of replication controllers and services. More info:
|
||||||
http://kubernetes.io/docs/user-guide/labels'
|
http://kubernetes.io/docs/user-guide/labels'
|
||||||
type: object
|
type: object
|
||||||
|
name:
|
||||||
|
description: 'Name must be unique within a namespace. Is required
|
||||||
|
when creating resources, although some resources may allow a client
|
||||||
|
to request the generation of an appropriate name automatically.
|
||||||
|
Name is primarily intended for creation idempotence and configuration
|
||||||
|
definition. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/identifiers#names'
|
||||||
|
type: string
|
||||||
type: object
|
type: object
|
||||||
portName:
|
portName:
|
||||||
description: Port name used for the pods and governing service. This
|
description: Port name used for the pods and governing service. This
|
||||||
@@ -3002,7 +3009,34 @@ spec:
|
|||||||
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||||
type: string
|
type: string
|
||||||
metadata:
|
metadata:
|
||||||
description: 'Standard object''s metadata. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata'
|
description: EmbeddedMetadata contains metadata relevant to
|
||||||
|
an EmbeddedResource.
|
||||||
|
properties:
|
||||||
|
annotations:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: 'Annotations is an unstructured key value map
|
||||||
|
stored with a resource that may be set by external tools
|
||||||
|
to store and retrieve arbitrary metadata. They are not
|
||||||
|
queryable and should be preserved when modifying objects.
|
||||||
|
More info: http://kubernetes.io/docs/user-guide/annotations'
|
||||||
|
type: object
|
||||||
|
labels:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: 'Map of string keys and values that can be
|
||||||
|
used to organize and categorize (scope and select) objects.
|
||||||
|
May match selectors of replication controllers and services.
|
||||||
|
More info: http://kubernetes.io/docs/user-guide/labels'
|
||||||
|
type: object
|
||||||
|
name:
|
||||||
|
description: 'Name must be unique within a namespace. Is
|
||||||
|
required when creating resources, although some resources
|
||||||
|
may allow a client to request the generation of an appropriate
|
||||||
|
name automatically. Name is primarily intended for creation
|
||||||
|
idempotence and configuration definition. Cannot be updated.
|
||||||
|
More info: http://kubernetes.io/docs/user-guide/identifiers#names'
|
||||||
|
type: string
|
||||||
type: object
|
type: object
|
||||||
spec:
|
spec:
|
||||||
description: 'Spec defines the desired characteristics of a
|
description: 'Spec defines the desired characteristics of a
|
||||||
|
|||||||
@@ -3266,6 +3266,13 @@ spec:
|
|||||||
selectors of replication controllers and services. More info:
|
selectors of replication controllers and services. More info:
|
||||||
http://kubernetes.io/docs/user-guide/labels'
|
http://kubernetes.io/docs/user-guide/labels'
|
||||||
type: object
|
type: object
|
||||||
|
name:
|
||||||
|
description: 'Name must be unique within a namespace. Is required
|
||||||
|
when creating resources, although some resources may allow a client
|
||||||
|
to request the generation of an appropriate name automatically.
|
||||||
|
Name is primarily intended for creation idempotence and configuration
|
||||||
|
definition. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/identifiers#names'
|
||||||
|
type: string
|
||||||
type: object
|
type: object
|
||||||
podMonitorNamespaceSelector:
|
podMonitorNamespaceSelector:
|
||||||
description: Namespaces to be selected for PodMonitor discovery. If
|
description: Namespaces to be selected for PodMonitor discovery. If
|
||||||
@@ -4201,7 +4208,9 @@ spec:
|
|||||||
type: object
|
type: object
|
||||||
type: object
|
type: object
|
||||||
serviceMonitorSelector:
|
serviceMonitorSelector:
|
||||||
description: ServiceMonitors to be selected for target discovery.
|
description: ServiceMonitors to be selected for target discovery. *Deprecated:*
|
||||||
|
if neither this nor podMonitorSelector are specified, configuration
|
||||||
|
is unmanaged.
|
||||||
properties:
|
properties:
|
||||||
matchExpressions:
|
matchExpressions:
|
||||||
description: matchExpressions is a list of label selector requirements.
|
description: matchExpressions is a list of label selector requirements.
|
||||||
@@ -4288,7 +4297,34 @@ spec:
|
|||||||
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||||
type: string
|
type: string
|
||||||
metadata:
|
metadata:
|
||||||
description: 'Standard object''s metadata. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata'
|
description: EmbeddedMetadata contains metadata relevant to
|
||||||
|
an EmbeddedResource.
|
||||||
|
properties:
|
||||||
|
annotations:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: 'Annotations is an unstructured key value map
|
||||||
|
stored with a resource that may be set by external tools
|
||||||
|
to store and retrieve arbitrary metadata. They are not
|
||||||
|
queryable and should be preserved when modifying objects.
|
||||||
|
More info: http://kubernetes.io/docs/user-guide/annotations'
|
||||||
|
type: object
|
||||||
|
labels:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: 'Map of string keys and values that can be
|
||||||
|
used to organize and categorize (scope and select) objects.
|
||||||
|
May match selectors of replication controllers and services.
|
||||||
|
More info: http://kubernetes.io/docs/user-guide/labels'
|
||||||
|
type: object
|
||||||
|
name:
|
||||||
|
description: 'Name must be unique within a namespace. Is
|
||||||
|
required when creating resources, although some resources
|
||||||
|
may allow a client to request the generation of an appropriate
|
||||||
|
name automatically. Name is primarily intended for creation
|
||||||
|
idempotence and configuration definition. Cannot be updated.
|
||||||
|
More info: http://kubernetes.io/docs/user-guide/identifiers#names'
|
||||||
|
type: string
|
||||||
type: object
|
type: object
|
||||||
spec:
|
spec:
|
||||||
description: 'Spec defines the desired characteristics of a
|
description: 'Spec defines the desired characteristics of a
|
||||||
|
|||||||
@@ -2941,6 +2941,13 @@ spec:
|
|||||||
selectors of replication controllers and services. More info:
|
selectors of replication controllers and services. More info:
|
||||||
http://kubernetes.io/docs/user-guide/labels'
|
http://kubernetes.io/docs/user-guide/labels'
|
||||||
type: object
|
type: object
|
||||||
|
name:
|
||||||
|
description: 'Name must be unique within a namespace. Is required
|
||||||
|
when creating resources, although some resources may allow a client
|
||||||
|
to request the generation of an appropriate name automatically.
|
||||||
|
Name is primarily intended for creation idempotence and configuration
|
||||||
|
definition. Cannot be updated. More info: http://kubernetes.io/docs/user-guide/identifiers#names'
|
||||||
|
type: string
|
||||||
type: object
|
type: object
|
||||||
portName:
|
portName:
|
||||||
description: Port name used for the pods and governing service. This
|
description: Port name used for the pods and governing service. This
|
||||||
@@ -3256,7 +3263,34 @@ spec:
|
|||||||
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||||
type: string
|
type: string
|
||||||
metadata:
|
metadata:
|
||||||
description: 'Standard object''s metadata. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata'
|
description: EmbeddedMetadata contains metadata relevant to
|
||||||
|
an EmbeddedResource.
|
||||||
|
properties:
|
||||||
|
annotations:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: 'Annotations is an unstructured key value map
|
||||||
|
stored with a resource that may be set by external tools
|
||||||
|
to store and retrieve arbitrary metadata. They are not
|
||||||
|
queryable and should be preserved when modifying objects.
|
||||||
|
More info: http://kubernetes.io/docs/user-guide/annotations'
|
||||||
|
type: object
|
||||||
|
labels:
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: 'Map of string keys and values that can be
|
||||||
|
used to organize and categorize (scope and select) objects.
|
||||||
|
May match selectors of replication controllers and services.
|
||||||
|
More info: http://kubernetes.io/docs/user-guide/labels'
|
||||||
|
type: object
|
||||||
|
name:
|
||||||
|
description: 'Name must be unique within a namespace. Is
|
||||||
|
required when creating resources, although some resources
|
||||||
|
may allow a client to request the generation of an appropriate
|
||||||
|
name automatically. Name is primarily intended for creation
|
||||||
|
idempotence and configuration definition. Cannot be updated.
|
||||||
|
More info: http://kubernetes.io/docs/user-guide/identifiers#names'
|
||||||
|
type: string
|
||||||
type: object
|
type: object
|
||||||
spec:
|
spec:
|
||||||
description: 'Spec defines the desired characteristics of a
|
description: 'Spec defines the desired characteristics of a
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ metadata:
|
|||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.38.1
|
app.kubernetes.io/version: v0.38.3
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
rules:
|
rules:
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ metadata:
|
|||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.38.1
|
app.kubernetes.io/version: v0.38.3
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
roleRef:
|
roleRef:
|
||||||
apiGroup: rbac.authorization.k8s.io
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ metadata:
|
|||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.38.1
|
app.kubernetes.io/version: v0.38.3
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
@@ -18,15 +18,15 @@ spec:
|
|||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.38.1
|
app.kubernetes.io/version: v0.38.3
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- args:
|
- args:
|
||||||
- --kubelet-service=kube-system/kubelet
|
- --kubelet-service=kube-system/kubelet
|
||||||
- --logtostderr=true
|
- --logtostderr=true
|
||||||
- --config-reloader-image=jimmidyson/configmap-reload:v0.3.0
|
- --config-reloader-image=jimmidyson/configmap-reload:v0.3.0
|
||||||
- --prometheus-config-reloader=quay.io/coreos/prometheus-config-reloader:v0.38.1
|
- --prometheus-config-reloader=quay.io/prometheus-operator/prometheus-config-reloader:v0.38.3
|
||||||
image: quay.io/coreos/prometheus-operator:v0.38.1
|
image: quay.io/prometheus-operator/prometheus-operator:v0.38.3
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8080
|
- containerPort: 8080
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ metadata:
|
|||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.38.1
|
app.kubernetes.io/version: v0.38.3
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
spec:
|
spec:
|
||||||
|
|||||||
@@ -4,6 +4,6 @@ metadata:
|
|||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/component: controller
|
app.kubernetes.io/component: controller
|
||||||
app.kubernetes.io/name: prometheus-operator
|
app.kubernetes.io/name: prometheus-operator
|
||||||
app.kubernetes.io/version: v0.38.1
|
app.kubernetes.io/version: v0.38.3
|
||||||
name: prometheus-operator
|
name: prometheus-operator
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
minikube delete
|
minikube delete
|
||||||
minikube addons disable metrics-server
|
minikube addons disable metrics-server
|
||||||
minikube start \
|
minikube start \
|
||||||
--kubernetes-version=v1.16.0 \
|
--kubernetes-version=v1.18.1 \
|
||||||
--memory=6g \
|
--memory=6g \
|
||||||
--bootstrapper=kubeadm \
|
--bootstrapper=kubeadm \
|
||||||
--extra-config=kubelet.authentication-token-webhook=true \
|
--extra-config=kubelet.authentication-token-webhook=true \
|
||||||
|
|||||||
@@ -13,16 +13,27 @@ chmod +x kubectl
|
|||||||
curl -Lo kind https://github.com/kubernetes-sigs/kind/releases/download/v0.6.1/kind-linux-amd64
|
curl -Lo kind https://github.com/kubernetes-sigs/kind/releases/download/v0.6.1/kind-linux-amd64
|
||||||
chmod +x kind
|
chmod +x kind
|
||||||
|
|
||||||
./kind create cluster --image=kindest/node:v1.17.0
|
run_e2e_tests() {
|
||||||
export KUBECONFIG="$(./kind get kubeconfig-path)"
|
cluster_version=$1
|
||||||
|
|
||||||
# create namespace, permissions, and CRDs
|
./kind create cluster --image=kindest/node:$cluster_version
|
||||||
./kubectl create -f manifests/setup
|
export KUBECONFIG="$(./kind get kubeconfig-path)"
|
||||||
|
|
||||||
# wait for CRD creation to complete
|
# create namespace, permissions, and CRDs
|
||||||
until ./kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done
|
./kubectl create -f manifests/setup
|
||||||
|
|
||||||
# create monitoring components
|
# wait for CRD creation to complete
|
||||||
./kubectl create -f manifests/
|
until ./kubectl get servicemonitors --all-namespaces ; do date; sleep 1; echo ""; done
|
||||||
|
|
||||||
make test-e2e
|
# create monitoring components
|
||||||
|
./kubectl create -f manifests/
|
||||||
|
|
||||||
|
make test-e2e
|
||||||
|
./kind delete cluster
|
||||||
|
}
|
||||||
|
cluster_compatible_versions=("v1.18.0")
|
||||||
|
|
||||||
|
for cluster_version in "${cluster_compatible_versions[@]}"
|
||||||
|
do
|
||||||
|
run_e2e_tests $cluster_version
|
||||||
|
done
|
||||||
|
|||||||
Reference in New Issue
Block a user