diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index b7f9308e..75e29de0 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -18,7 +18,7 @@ "subdir": "Documentation/etcd-mixin" } }, - "version": "1af6d61a1ce56aa8825e694934bfe86e24113e6e", + "version": "205a656cc58b32ab701c74297ce440bd517a680f", "sum": "NhOkJWkO7ZO2DSE8Fvipcs7Hh2/GOCS0WjPPZU8OiaQ=" }, { @@ -28,8 +28,8 @@ "subdir": "jsonnet/prometheus-operator" } }, - "version": "96094ad1ab039950537df448b95bbcc04c57bfc4", - "sum": "ReamRYoS2C39Of7KtXGqkSWdfHw5Fy/Ix6ujOmBLFAg=" + "version": "a1eb8fd04f3cd2bfb11aee8943e0b3469ff2f199", + "sum": "nM1eDP5vftqAeQSmVYzSBAh+lG0SN6zu46QiocQiVhk=" }, { "source": { @@ -38,8 +38,8 @@ "subdir": "grafonnet" } }, - "version": "3336c69715f8f7a4d637582504c9fabd9d9ca081", - "sum": "w6zS28Rjs9EzRN/WoLLIdi028BvumxDTyLefYVoql2k=" + "version": "cc1626a1b4dee45c99b78ddd9714dfd5f5d7816e", + "sum": "nkgrtMYPCq/YB4r3mKyToepaLhicwWnxDdGIodPpzz0=" }, { "source": { @@ -48,8 +48,8 @@ "subdir": "grafana-builder" } }, - "version": "f62b65014b2c443b234af31e4e1754278e66cef9", - "sum": "N65Fv0M2JvFE3GN8ZxP5xh1U5a314ey8geLAioJLzF8=" + "version": "ab0e5d8a62180b52ace18b53bc57c87c2b4f1044", + "sum": "mD0zEP9FVFXeag7EaeS5OvUr2A9D6DQhGemoNn6+PLc=" }, { "source": { @@ -69,8 +69,8 @@ "subdir": "" } }, - "version": "dc563cbb03da396d23bc49f33d4f7ae28db514a4", - "sum": "ZBRziwnNo3LPC4XhIjpWahz+gT+w3i2+klIcHx2r7d0=" + "version": "a161500608ac2ca8908f2c318bd929ecd5e20415", + "sum": "mCzVPYGcvqge5epnrWZ8013OzXmu84g3j73EYsUcsm0=" }, { "source": { @@ -79,8 +79,8 @@ "subdir": "lib/promgrafonnet" } }, - "version": "dc563cbb03da396d23bc49f33d4f7ae28db514a4", - "sum": "VhgBM39yv0f4bKv8VfGg4FXkg573evGDRalip9ypKbc=" + "version": "5c07e1de2c8ea4f8fdcc1985712daa5e751a327f", + "sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps=" }, { "source": { @@ -89,7 +89,7 @@ "subdir": "jsonnet/kube-state-metrics" } }, - "version": "eef2b125b5f09d0cc5245c0db897a27c9b74ca9b", + "version": "89aaf6c524ee891140c4c8f2a05b1b16f5847309", "sum": "zD/pbQLnQq+5hegEelaheHS8mn1h09GTktFO74iwlBI=" }, { @@ -99,7 +99,7 @@ "subdir": "jsonnet/kube-state-metrics-mixin" } }, - "version": "eef2b125b5f09d0cc5245c0db897a27c9b74ca9b", + "version": "89aaf6c524ee891140c4c8f2a05b1b16f5847309", "sum": "E1GGavnf9PCWBm4WVrxWnc0FIj72UcbcweqGioWrOdU=" }, { @@ -109,8 +109,8 @@ "subdir": "docs/node-mixin" } }, - "version": "503e4fc8486c0082d6bd8c53fad646bcfafeedf6", - "sum": "3jFV2qsc/GZe2GADswTYqxxP2zGOiANTj73W/VNFGqc=" + "version": "ff2ff3410f4ea8195e51f5fb8d84151684f91b3f", + "sum": "znDrZiHvvascm7Xuj3lTASIOfwX4Vmx7PELmKKw4YiI=" }, { "source": { @@ -119,7 +119,7 @@ "subdir": "documentation/prometheus-mixin" } }, - "version": "e5a06b483527d4fe0704b8fa3a2b475b661c526f", + "version": "983ebb4a513302315a8117932ab832815f85e3d2", "sum": "TBq4SL7YsPInARbJqwz25JaBvvAegcnRCsuz3K9niWc=", "name": "prometheus" }, diff --git a/manifests/grafana-dashboardDefinitions.yaml b/manifests/grafana-dashboardDefinitions.yaml index 54db1580..1406fa3c 100644 --- a/manifests/grafana-dashboardDefinitions.yaml +++ b/manifests/grafana-dashboardDefinitions.yaml @@ -2030,6 +2030,9 @@ items: "id": 5, "lines": true, "linewidth": 1, + "links": [ + + ], "minSpan": 24, "nullPointMode": "null as zero", "renderer": "flot", @@ -19273,6 +19276,9 @@ items: "id": 5, "lines": true, "linewidth": 1, + "links": [ + + ], "minSpan": 24, "nullPointMode": "null as zero", "renderer": "flot", @@ -20644,6 +20650,9 @@ items: "id": 5, "lines": true, "linewidth": 1, + "links": [ + + ], "minSpan": 24, "nullPointMode": "null as zero", "renderer": "flot", diff --git a/manifests/prometheus-operator-serviceMonitor.yaml b/manifests/prometheus-operator-serviceMonitor.yaml index a8b977de..52ee0e31 100644 --- a/manifests/prometheus-operator-serviceMonitor.yaml +++ b/manifests/prometheus-operator-serviceMonitor.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.42.0 + app.kubernetes.io/version: v0.42.1 name: prometheus-operator namespace: monitoring spec: @@ -19,4 +19,4 @@ spec: matchLabels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.42.0 + app.kubernetes.io/version: v0.42.1 diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index 0c1eb32d..37aa9f0a 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -1020,6 +1020,28 @@ spec: for: 10m labels: severity: warning + - alert: NodeRAIDDegraded + annotations: + description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is + in degraded state due to one or more disks failures. Number of spare drives + is insufficient to fix issue automatically. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-noderaiddegraded + summary: RAID Array is degraded + expr: | + node_md_disks_required - ignoring (state) (node_md_disks{state="active"}) > 0 + for: 15m + labels: + severity: critical + - alert: NodeRAIDDiskFailure + annotations: + description: At least one device in RAID array on {{ $labels.instance }} failed. + Array '{{ $labels.device }}' needs attention and possibly a disk swap. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-noderaiddiskfailure + summary: Failed device in RAID array + expr: | + node_md_disks{state="fail"} > 0 + labels: + severity: warning - name: kubernetes-apps rules: - alert: KubePodCrashLooping @@ -1301,6 +1323,20 @@ spec: for: 5m labels: severity: warning + - alert: KubeQuotaAlmostFull + annotations: + description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage + }} of its {{ $labels.resource }} quota. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaalmostfull + summary: Namespace quota is going to be full. + expr: | + kube_resourcequota{job="kube-state-metrics", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) + > 0.9 < 1 + for: 15m + labels: + severity: info - alert: KubeQuotaFullyUsed annotations: message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage @@ -1310,10 +1346,24 @@ spec: kube_resourcequota{job="kube-state-metrics", type="used"} / ignoring(instance, job, type) (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) - >= 1 + == 1 for: 15m labels: severity: info + - alert: KubeQuotaExceeded + annotations: + description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage + }} of its {{ $labels.resource }} quota. + runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded + summary: Namespace quota has exceeded the limits. + expr: | + kube_resourcequota{job="kube-state-metrics", type="used"} + / ignoring(instance, job, type) + (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0) + > 1 + for: 15m + labels: + severity: warning - alert: CPUThrottlingHigh annotations: message: '{{ $value | humanizePercentage }} throttling of CPU in namespace diff --git a/manifests/setup/prometheus-operator-clusterRole.yaml b/manifests/setup/prometheus-operator-clusterRole.yaml index a3de43b7..ed895874 100644 --- a/manifests/setup/prometheus-operator-clusterRole.yaml +++ b/manifests/setup/prometheus-operator-clusterRole.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.42.0 + app.kubernetes.io/version: v0.42.1 name: prometheus-operator rules: - apiGroups: diff --git a/manifests/setup/prometheus-operator-clusterRoleBinding.yaml b/manifests/setup/prometheus-operator-clusterRoleBinding.yaml index 9001430c..38e98265 100644 --- a/manifests/setup/prometheus-operator-clusterRoleBinding.yaml +++ b/manifests/setup/prometheus-operator-clusterRoleBinding.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.42.0 + app.kubernetes.io/version: v0.42.1 name: prometheus-operator roleRef: apiGroup: rbac.authorization.k8s.io diff --git a/manifests/setup/prometheus-operator-deployment.yaml b/manifests/setup/prometheus-operator-deployment.yaml index 56de8d59..08cd3a77 100644 --- a/manifests/setup/prometheus-operator-deployment.yaml +++ b/manifests/setup/prometheus-operator-deployment.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.42.0 + app.kubernetes.io/version: v0.42.1 name: prometheus-operator namespace: monitoring spec: @@ -18,15 +18,15 @@ spec: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.42.0 + app.kubernetes.io/version: v0.42.1 spec: containers: - args: - --kubelet-service=kube-system/kubelet - --logtostderr=true - --config-reloader-image=jimmidyson/configmap-reload:v0.4.0 - - --prometheus-config-reloader=quay.io/prometheus-operator/prometheus-config-reloader:v0.42.0 - image: quay.io/prometheus-operator/prometheus-operator:v0.42.0 + - --prometheus-config-reloader=quay.io/prometheus-operator/prometheus-config-reloader:v0.42.1 + image: quay.io/prometheus-operator/prometheus-operator:v0.42.1 name: prometheus-operator ports: - containerPort: 8080 diff --git a/manifests/setup/prometheus-operator-service.yaml b/manifests/setup/prometheus-operator-service.yaml index 91d1bbe3..a1543b11 100644 --- a/manifests/setup/prometheus-operator-service.yaml +++ b/manifests/setup/prometheus-operator-service.yaml @@ -4,7 +4,7 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.42.0 + app.kubernetes.io/version: v0.42.1 name: prometheus-operator namespace: monitoring spec: diff --git a/manifests/setup/prometheus-operator-serviceAccount.yaml b/manifests/setup/prometheus-operator-serviceAccount.yaml index 2a98d4db..37f53feb 100644 --- a/manifests/setup/prometheus-operator-serviceAccount.yaml +++ b/manifests/setup/prometheus-operator-serviceAccount.yaml @@ -4,6 +4,6 @@ metadata: labels: app.kubernetes.io/component: controller app.kubernetes.io/name: prometheus-operator - app.kubernetes.io/version: v0.42.0 + app.kubernetes.io/version: v0.42.1 name: prometheus-operator namespace: monitoring