diff --git a/assets/prometheus/prometheus.yaml b/assets/prometheus/prometheus.yaml index d48d5648..08df4789 100644 --- a/assets/prometheus/prometheus.yaml +++ b/assets/prometheus/prometheus.yaml @@ -1,3 +1,22 @@ +alerting: + alertmanagers: + - kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - action: keep + regex: alertmanager-main + source_labels: + - __meta_kubernetes_service_name + - action: keep + regex: monitoring + source_labels: + - __meta_kubernetes_namespace + - action: keep + regex: web + source_labels: + - __meta_kubernetes_endpoint_port_name + scheme: http + global: scrape_interval: 15s evaluation_interval: 15s diff --git a/assets/prometheus/rules/kubernetes.rules b/assets/prometheus/rules/kubernetes.rules index 216c0ccd..c0dddb92 100644 --- a/assets/prometheus/rules/kubernetes.rules +++ b/assets/prometheus/rules/kubernetes.rules @@ -286,18 +286,6 @@ ALERT K8SControllerManagerDown description = "There is no running K8S controller manager. Deployments and replication controllers are not making progress.", } -ALERT K8SMoreThanOneController - IF count by (job,cluster) (up{job=~"kube-scheduler|kube-controller-manager"}) > 1 - FOR 5m - LABELS { - service = "k8s", - severity = "critical", - } - ANNOTATIONS { - summary = "More than one controller node is active", - description = "There is more than one {{ $labels.job }} managing the cluster. Cluster behaviour is undefined.", - } - ALERT K8SConntrackTableFull IF 100*node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 50 FOR 10m diff --git a/manifests/prometheus/prometheus-k8s-rules.yaml b/manifests/prometheus/prometheus-k8s-rules.yaml index c01a7f3e..6e83500e 100644 --- a/manifests/prometheus/prometheus-k8s-rules.yaml +++ b/manifests/prometheus/prometheus-k8s-rules.yaml @@ -341,18 +341,6 @@ data: description = "There is no running K8S controller manager. Deployments and replication controllers are not making progress.", } - ALERT K8SMoreThanOneController - IF count by (job,cluster) (up{job=~"kube-scheduler|kube-controller-manager"}) > 1 - FOR 5m - LABELS { - service = "k8s", - severity = "critical", - } - ANNOTATIONS { - summary = "More than one controller node is active", - description = "There is more than one {{ $labels.job }} managing the cluster. Cluster behaviour is undefined.", - } - ALERT K8SConntrackTableFull IF 100*node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 50 FOR 10m