contrib/kube-prometheus: Generate
This commit is contained in:
@@ -255,7 +255,7 @@ These are the available fields with their respective default values:
|
|||||||
namespace: "default",
|
namespace: "default",
|
||||||
|
|
||||||
versions+:: {
|
versions+:: {
|
||||||
alertmanager: "v0.15.2",
|
alertmanager: "v0.15.3",
|
||||||
nodeExporter: "v0.16.0",
|
nodeExporter: "v0.16.0",
|
||||||
kubeStateMetrics: "v1.3.1",
|
kubeStateMetrics: "v1.3.1",
|
||||||
kubeRbacProxy: "v0.3.1",
|
kubeRbacProxy: "v0.3.1",
|
||||||
@@ -377,9 +377,9 @@ $ jsonnet -J vendor -S --tla-str repository=internal-registry.com/organization s
|
|||||||
docker pull quay.io/coreos/addon-resizer:1.0
|
docker pull quay.io/coreos/addon-resizer:1.0
|
||||||
docker tag quay.io/coreos/addon-resizer:1.0 internal-registry.com/organization/addon-resizer:1.0
|
docker tag quay.io/coreos/addon-resizer:1.0 internal-registry.com/organization/addon-resizer:1.0
|
||||||
docker push internal-registry.com/organization/addon-resizer:1.0
|
docker push internal-registry.com/organization/addon-resizer:1.0
|
||||||
docker pull quay.io/prometheus/alertmanager:v0.15.2
|
docker pull quay.io/prometheus/alertmanager:v0.15.3
|
||||||
docker tag quay.io/prometheus/alertmanager:v0.15.2 internal-registry.com/organization/alertmanager:v0.15.2
|
docker tag quay.io/prometheus/alertmanager:v0.15.3 internal-registry.com/organization/alertmanager:v0.15.3
|
||||||
docker push internal-registry.com/organization/alertmanager:v0.15.2
|
docker push internal-registry.com/organization/alertmanager:v0.15.3
|
||||||
...
|
...
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@@ -8,7 +8,7 @@
|
|||||||
"subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus"
|
"subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "5185231304f688cf127bf235a4dfdf9f4f9e7821"
|
"version": "c9350aab06b47bcf8410b597ba50b4addf21ee3d"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "ksonnet",
|
"name": "ksonnet",
|
||||||
@@ -28,7 +28,7 @@
|
|||||||
"subdir": ""
|
"subdir": ""
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "1595151b85934d55ea6969a781039d66f82b22d5"
|
"version": "f7ca48cca5d9cadc9a2203b8c0b3bb3eb85f3294"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "grafonnet",
|
"name": "grafonnet",
|
||||||
@@ -38,7 +38,7 @@
|
|||||||
"subdir": "grafonnet"
|
"subdir": "grafonnet"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "1ed195577cd8a406d4811dd6818e939169b686a7"
|
"version": "d407225c5a2e087eb68843528aab2be0507c73b8"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "grafana-builder",
|
"name": "grafana-builder",
|
||||||
@@ -48,7 +48,7 @@
|
|||||||
"subdir": "grafana-builder"
|
"subdir": "grafana-builder"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "04e51ce1caeaa4c9aed4c446c9922388a13f6cb1"
|
"version": "90fbdbf08cf0d4bdc78ab52151041da36a7b0abc"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "grafana",
|
"name": "grafana",
|
||||||
@@ -78,7 +78,7 @@
|
|||||||
"subdir": "Documentation/etcd-mixin"
|
"subdir": "Documentation/etcd-mixin"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "83304cfc808cf6303d48c45a696f169fae422e68"
|
"version": "ee9dcbca0d89dc563c9e6bc725fab0c6f21d689b"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@@ -11,4 +11,4 @@ spec:
|
|||||||
beta.kubernetes.io/os: linux
|
beta.kubernetes.io/os: linux
|
||||||
replicas: 3
|
replicas: 3
|
||||||
serviceAccountName: alertmanager-main
|
serviceAccountName: alertmanager-main
|
||||||
version: v0.15.2
|
version: v0.15.3
|
||||||
|
@@ -5279,7 +5279,8 @@ items:
|
|||||||
"expr": "avg(sum by (cpu) (irate(node_cpu_seconds_total{job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[2m]))) * 100\n",
|
"expr": "avg(sum by (cpu) (irate(node_cpu_seconds_total{job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[2m]))) * 100\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": ""
|
"legendFormat": "",
|
||||||
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"thresholds": "80, 90",
|
"thresholds": "80, 90",
|
||||||
@@ -5484,7 +5485,8 @@ items:
|
|||||||
"expr": "max(\n (\n (\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}\n )\n / node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n ) * 100)\n",
|
"expr": "max(\n (\n (\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}\n )\n / node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n ) * 100)\n",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": ""
|
"legendFormat": "",
|
||||||
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"thresholds": "80, 90",
|
"thresholds": "80, 90",
|
||||||
@@ -6580,7 +6582,8 @@ items:
|
|||||||
"expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", namespace=\"$namespace\", pod_name=\u007e\"$statefulset.*\"}[3m]))",
|
"expr": "sum(rate(container_cpu_usage_seconds_total{job=\"kubelet\", namespace=\"$namespace\", pod_name=\u007e\"$statefulset.*\"}[3m]))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": ""
|
"legendFormat": "",
|
||||||
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"thresholds": "",
|
"thresholds": "",
|
||||||
@@ -6659,7 +6662,8 @@ items:
|
|||||||
"expr": "sum(container_memory_usage_bytes{job=\"kubelet\", namespace=\"$namespace\", pod_name=\u007e\"$statefulset.*\"}) / 1024^3",
|
"expr": "sum(container_memory_usage_bytes{job=\"kubelet\", namespace=\"$namespace\", pod_name=\u007e\"$statefulset.*\"}) / 1024^3",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": ""
|
"legendFormat": "",
|
||||||
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"thresholds": "",
|
"thresholds": "",
|
||||||
@@ -6738,7 +6742,8 @@ items:
|
|||||||
"expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", namespace=\"$namespace\", pod_name=\u007e\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{namespace=\"$namespace\",pod_name=\u007e\"$statefulset.*\"}[3m]))",
|
"expr": "sum(rate(container_network_transmit_bytes_total{job=\"kubelet\", namespace=\"$namespace\", pod_name=\u007e\"$statefulset.*\"}[3m])) + sum(rate(container_network_receive_bytes_total{namespace=\"$namespace\",pod_name=\u007e\"$statefulset.*\"}[3m]))",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": ""
|
"legendFormat": "",
|
||||||
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"thresholds": "",
|
"thresholds": "",
|
||||||
@@ -6832,7 +6837,8 @@ items:
|
|||||||
"expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
|
"expr": "max(kube_statefulset_replicas{job=\"kube-state-metrics\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": ""
|
"legendFormat": "",
|
||||||
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"thresholds": "",
|
"thresholds": "",
|
||||||
@@ -6912,7 +6918,8 @@ items:
|
|||||||
"expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
|
"expr": "min(kube_statefulset_status_replicas_current{job=\"kube-state-metrics\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": ""
|
"legendFormat": "",
|
||||||
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"thresholds": "",
|
"thresholds": "",
|
||||||
@@ -6992,7 +6999,8 @@ items:
|
|||||||
"expr": "max(kube_statefulset_status_observed_generation{job=\"kube-state-metrics\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
|
"expr": "max(kube_statefulset_status_observed_generation{job=\"kube-state-metrics\", namespace=\"$namespace\", statefulset=\"$statefulset\"}) without (instance, pod)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": ""
|
"legendFormat": "",
|
||||||
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"thresholds": "",
|
"thresholds": "",
|
||||||
@@ -7072,7 +7080,8 @@ items:
|
|||||||
"expr": "max(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", statefulset=\"$statefulset\", namespace=\"$namespace\"}) without (instance, pod)",
|
"expr": "max(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", statefulset=\"$statefulset\", namespace=\"$namespace\"}) without (instance, pod)",
|
||||||
"format": "time_series",
|
"format": "time_series",
|
||||||
"intervalFactor": 2,
|
"intervalFactor": 2,
|
||||||
"legendFormat": ""
|
"legendFormat": "",
|
||||||
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"thresholds": "",
|
"thresholds": "",
|
||||||
|
@@ -223,22 +223,22 @@ spec:
|
|||||||
)
|
)
|
||||||
record: node:node_disk_utilisation:avg_irate
|
record: node:node_disk_utilisation:avg_irate
|
||||||
- expr: |
|
- expr: |
|
||||||
avg(irate(node_disk_io_time_weighted_seconds_total_seconds_total{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3)
|
avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3)
|
||||||
record: :node_disk_saturation:avg_irate
|
record: :node_disk_saturation:avg_irate
|
||||||
- expr: |
|
- expr: |
|
||||||
avg by (node) (
|
avg by (node) (
|
||||||
irate(node_disk_io_time_weighted_seconds_total_seconds_total{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3
|
irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3
|
||||||
* on (namespace, pod) group_left(node)
|
* on (namespace, pod) group_left(node)
|
||||||
node_namespace_pod:kube_pod_info:
|
node_namespace_pod:kube_pod_info:
|
||||||
)
|
)
|
||||||
record: node:node_disk_saturation:avg_irate
|
record: node:node_disk_saturation:avg_irate
|
||||||
- expr: |
|
- expr: |
|
||||||
max by (namespace, pod, device) ((node_filesystem_size{fstype=~"ext[234]|btrfs|xfs|zfs"}
|
max by (namespace, pod, device) ((node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"}
|
||||||
- node_filesystem_avail{fstype=~"ext[234]|btrfs|xfs|zfs"})
|
- node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
|
||||||
/ node_filesystem_size{fstype=~"ext[234]|btrfs|xfs|zfs"})
|
/ node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
|
||||||
record: 'node:node_filesystem_usage:'
|
record: 'node:node_filesystem_usage:'
|
||||||
- expr: |
|
- expr: |
|
||||||
max by (namespace, pod, device) (node_filesystem_avail{fstype=~"ext[234]|btrfs|xfs|zfs"} / node_filesystem_size{fstype=~"ext[234]|btrfs|xfs|zfs"})
|
max by (namespace, pod, device) (node_filesystem_avail_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"} / node_filesystem_size_bytes{fstype=~"ext[234]|btrfs|xfs|zfs"})
|
||||||
record: 'node:node_filesystem_avail:'
|
record: 'node:node_filesystem_avail:'
|
||||||
- expr: |
|
- expr: |
|
||||||
sum(irate(node_network_receive_bytes_total{job="node-exporter",device="eth0"}[1m])) +
|
sum(irate(node_network_receive_bytes_total{job="node-exporter",device="eth0"}[1m])) +
|
||||||
@@ -829,6 +829,16 @@ spec:
|
|||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
|
- alert: KubePersistentVolumeErrors
|
||||||
|
annotations:
|
||||||
|
message: The persistent volume {{ $labels.persistentvolume }} has status {{
|
||||||
|
$labels.phase }}.
|
||||||
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeerrors
|
||||||
|
expr: |
|
||||||
|
kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} > 0
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
- name: kubernetes-system
|
- name: kubernetes-system
|
||||||
rules:
|
rules:
|
||||||
- alert: KubeNodeNotReady
|
- alert: KubeNodeNotReady
|
||||||
|
Reference in New Issue
Block a user