correctly discover the Alertmanager cluster and ServiceMonitors
This commit is contained in:
@@ -24,6 +24,7 @@ which manages Prometheus servers and their configuration in a cluster. With a si
|
|||||||
* A Prometheus configuration covering monitoring of all Kubernetes core components and exporters
|
* A Prometheus configuration covering monitoring of all Kubernetes core components and exporters
|
||||||
* A default set of alerting rules on the cluster component's health
|
* A default set of alerting rules on the cluster component's health
|
||||||
* A Grafana instance serving dashboards on cluster metrics
|
* A Grafana instance serving dashboards on cluster metrics
|
||||||
|
* A three node highly available Alertmanager cluster
|
||||||
|
|
||||||
Simply run:
|
Simply run:
|
||||||
|
|
||||||
@@ -35,6 +36,7 @@ hack/cluster-monitoring/deploy
|
|||||||
After all pods are ready, you can reach:
|
After all pods are ready, you can reach:
|
||||||
|
|
||||||
* Prometheus UI on node port `30900`
|
* Prometheus UI on node port `30900`
|
||||||
|
* Alertmanager UI on node port `30903`
|
||||||
* Grafana on node port `30902`
|
* Grafana on node port `30902`
|
||||||
|
|
||||||
To tear it all down again, run:
|
To tear it all down again, run:
|
||||||
@@ -57,7 +59,9 @@ hack/example-service-monitoring/deploy
|
|||||||
```
|
```
|
||||||
|
|
||||||
After all pods are ready you can reach the Prometheus server on node port `30100` and observe
|
After all pods are ready you can reach the Prometheus server on node port `30100` and observe
|
||||||
how it monitors the service as specified.
|
how it monitors the service as specified. Same as before, this Prometheus server automatically
|
||||||
|
discovers the Alertmanager cluster deployed in the [Monitoring Kubernetes](#Monitoring-Kubernetes)
|
||||||
|
section.
|
||||||
|
|
||||||
Teardown:
|
Teardown:
|
||||||
|
|
||||||
|
@@ -19,3 +19,4 @@ until kctl get prometheus; do sleep 1; done
|
|||||||
kctl apply -f manifests/exporters
|
kctl apply -f manifests/exporters
|
||||||
kctl apply -f manifests/grafana
|
kctl apply -f manifests/grafana
|
||||||
kctl apply -f manifests/prometheus
|
kctl apply -f manifests/prometheus
|
||||||
|
kctl apply -f manifests/alertmanager
|
||||||
|
@@ -11,6 +11,7 @@ kctl() {
|
|||||||
kctl delete -f manifests/exporters
|
kctl delete -f manifests/exporters
|
||||||
kctl delete -f manifests/grafana
|
kctl delete -f manifests/grafana
|
||||||
kctl delete -f manifests/prometheus
|
kctl delete -f manifests/prometheus
|
||||||
|
kctl delete -f manifests/alertmanager
|
||||||
|
|
||||||
# Hack: wait a bit to let the controller delete the deployed Prometheus server.
|
# Hack: wait a bit to let the controller delete the deployed Prometheus server.
|
||||||
sleep 5
|
sleep 5
|
||||||
|
@@ -7,10 +7,9 @@ metadata:
|
|||||||
prometheus: frontend
|
prometheus: frontend
|
||||||
spec:
|
spec:
|
||||||
version: v1.4.1
|
version: v1.4.1
|
||||||
serviceMonitors:
|
serviceMonitorSelector:
|
||||||
- selector:
|
matchLabels:
|
||||||
matchLabels:
|
tier: frontend
|
||||||
tier: frontend
|
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
# 2Gi is default, but won't schedule if you don't have a node with >2Gi
|
# 2Gi is default, but won't schedule if you don't have a node with >2Gi
|
||||||
|
@@ -1,6 +1,25 @@
|
|||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
data:
|
data:
|
||||||
prometheus.yaml: |
|
prometheus.yaml: |
|
||||||
|
alerting:
|
||||||
|
alertmanagers:
|
||||||
|
- kubernetes_sd_configs:
|
||||||
|
- role: endpoints
|
||||||
|
relabel_configs:
|
||||||
|
- action: keep
|
||||||
|
regex: alertmanager-main
|
||||||
|
source_labels:
|
||||||
|
- __meta_kubernetes_service_name
|
||||||
|
- action: keep
|
||||||
|
regex: monitoring
|
||||||
|
source_labels:
|
||||||
|
- __meta_kubernetes_namespace
|
||||||
|
- action: keep
|
||||||
|
regex: web
|
||||||
|
source_labels:
|
||||||
|
- __meta_kubernetes_endpoint_port_name
|
||||||
|
scheme: http
|
||||||
|
|
||||||
global:
|
global:
|
||||||
scrape_interval: 15s
|
scrape_interval: 15s
|
||||||
evaluation_interval: 15s
|
evaluation_interval: 15s
|
||||||
|
@@ -13,8 +13,3 @@ spec:
|
|||||||
# production use. This value is mainly meant for demonstration/testing
|
# production use. This value is mainly meant for demonstration/testing
|
||||||
# purposes.
|
# purposes.
|
||||||
memory: 400Mi
|
memory: 400Mi
|
||||||
alerting:
|
|
||||||
alertmanagers:
|
|
||||||
- namespace: monitoring
|
|
||||||
name: alertmanager-main
|
|
||||||
port: web
|
|
||||||
|
Reference in New Issue
Block a user