Compare commits
47 Commits
fix-versio
...
release-0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
69d247555d | ||
|
|
622febc63b | ||
|
|
f9e5898bec | ||
|
|
10396baa75 | ||
|
|
70aa6619e6 | ||
|
|
02ed3e2519 | ||
|
|
6da3452a1c | ||
|
|
013adb7f6c | ||
|
|
abd442d24f | ||
|
|
58006b585a | ||
|
|
5f02e2741c | ||
|
|
297af9d438 | ||
|
|
f1a0d42944 | ||
|
|
d7a363961b | ||
|
|
01e631621d | ||
|
|
019c15fc6d | ||
|
|
4a40a2a11c | ||
|
|
ccb46bfb1b | ||
|
|
74594f2170 | ||
|
|
ffced1bd3e | ||
|
|
710f6aa24d | ||
|
|
d666e4baa0 | ||
|
|
b983b579d3 | ||
|
|
0d4d1a004b | ||
|
|
efb33b36d1 | ||
|
|
557dbd1a09 | ||
|
|
4dfa6f6bc8 | ||
|
|
e8b58191b5 | ||
|
|
ed87db34b6 | ||
|
|
f5096d0fc4 | ||
|
|
e37d34ba2d | ||
|
|
4bef6d2736 | ||
|
|
448aac54e5 | ||
|
|
c9d5a64833 | ||
|
|
0b3659d5c1 | ||
|
|
2d1ffd6459 | ||
|
|
bd2e788432 | ||
|
|
439914c74c | ||
|
|
305857a390 | ||
|
|
f24727f378 | ||
|
|
023951137c | ||
|
|
2c34d3dff6 | ||
|
|
9d2e395361 | ||
|
|
bc9892e53e | ||
|
|
7760c2b801 | ||
|
|
ba330fcd6a | ||
|
|
37f2852388 |
12
.github/workflows/ci.yaml
vendored
12
.github/workflows/ci.yaml
vendored
@@ -4,7 +4,7 @@ on:
|
||||
- pull_request
|
||||
env:
|
||||
golang-version: '1.15'
|
||||
kind-version: 'v0.9.0'
|
||||
kind-version: 'v0.11.0'
|
||||
jobs:
|
||||
generate:
|
||||
runs-on: ${{ matrix.os }}
|
||||
@@ -53,7 +53,7 @@ jobs:
|
||||
matrix:
|
||||
kind-image:
|
||||
- 'kindest/node:v1.20.0'
|
||||
# - 'kindest/node:v1.21.0' #TODO(paulfantom): enable as soon as image is available
|
||||
- 'kindest/node:v1.21.1'
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
@@ -63,13 +63,9 @@ jobs:
|
||||
with:
|
||||
version: ${{ env.kind-version }}
|
||||
image: ${{ matrix.kind-image }}
|
||||
wait: 300s
|
||||
- name: Wait for cluster to finish bootstraping
|
||||
run: |
|
||||
until [ "$(kubectl get pods --all-namespaces --no-headers | grep -cEv '([0-9]+)/\1')" -eq 0 ]; do
|
||||
sleep 5s
|
||||
done
|
||||
kubectl cluster-info
|
||||
kubectl get pods -A
|
||||
run: kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout=300s
|
||||
- name: Create kube-prometheus stack
|
||||
run: |
|
||||
kubectl create -f manifests/setup
|
||||
|
||||
4
Makefile
4
Makefile
@@ -36,6 +36,10 @@ vendor: $(JB_BIN) jsonnetfile.json jsonnetfile.lock.json
|
||||
crdschemas: vendor
|
||||
./scripts/generate-schemas.sh
|
||||
|
||||
.PHONY: update
|
||||
update: $(JB_BIN)
|
||||
$(JB_BIN) update
|
||||
|
||||
.PHONY: validate
|
||||
validate: crdschemas manifests $(KUBECONFORM_BIN)
|
||||
# Follow-up on https://github.com/instrumenta/kubernetes-json-schema/issues/26 if validations start failing
|
||||
|
||||
@@ -44,7 +44,96 @@
|
||||
// Drop all other metrics which are deprecated in kubernetes.
|
||||
{
|
||||
sourceLabels: ['__name__'],
|
||||
regex: '(admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)',
|
||||
regex: '(' + std.join('|',
|
||||
[
|
||||
'admission_quota_controller_adds',
|
||||
'admission_quota_controller_depth',
|
||||
'admission_quota_controller_longest_running_processor_microseconds',
|
||||
'admission_quota_controller_queue_latency',
|
||||
'admission_quota_controller_unfinished_work_seconds',
|
||||
'admission_quota_controller_work_duration',
|
||||
'APIServiceOpenAPIAggregationControllerQueue1_adds',
|
||||
'APIServiceOpenAPIAggregationControllerQueue1_depth',
|
||||
'APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds',
|
||||
'APIServiceOpenAPIAggregationControllerQueue1_queue_latency',
|
||||
'APIServiceOpenAPIAggregationControllerQueue1_retries',
|
||||
'APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds',
|
||||
'APIServiceOpenAPIAggregationControllerQueue1_work_duration',
|
||||
'APIServiceRegistrationController_adds',
|
||||
'APIServiceRegistrationController_depth',
|
||||
'APIServiceRegistrationController_longest_running_processor_microseconds',
|
||||
'APIServiceRegistrationController_queue_latency',
|
||||
'APIServiceRegistrationController_retries',
|
||||
'APIServiceRegistrationController_unfinished_work_seconds',
|
||||
'APIServiceRegistrationController_work_duration',
|
||||
'autoregister_adds',
|
||||
'autoregister_depth',
|
||||
'autoregister_longest_running_processor_microseconds',
|
||||
'autoregister_queue_latency',
|
||||
'autoregister_retries',
|
||||
'autoregister_unfinished_work_seconds',
|
||||
'autoregister_work_duration',
|
||||
'AvailableConditionController_adds',
|
||||
'AvailableConditionController_depth',
|
||||
'AvailableConditionController_longest_running_processor_microseconds',
|
||||
'AvailableConditionController_queue_latency',
|
||||
'AvailableConditionController_retries',
|
||||
'AvailableConditionController_unfinished_work_seconds',
|
||||
'AvailableConditionController_work_duration',
|
||||
'crd_autoregistration_controller_adds',
|
||||
'crd_autoregistration_controller_depth',
|
||||
'crd_autoregistration_controller_longest_running_processor_microseconds',
|
||||
'crd_autoregistration_controller_queue_latency',
|
||||
'crd_autoregistration_controller_retries',
|
||||
'crd_autoregistration_controller_unfinished_work_seconds',
|
||||
'crd_autoregistration_controller_work_duration',
|
||||
'crdEstablishing_adds',
|
||||
'crdEstablishing_depth',
|
||||
'crdEstablishing_longest_running_processor_microseconds',
|
||||
'crdEstablishing_queue_latency',
|
||||
'crdEstablishing_retries',
|
||||
'crdEstablishing_unfinished_work_seconds',
|
||||
'crdEstablishing_work_duration',
|
||||
'crd_finalizer_adds',
|
||||
'crd_finalizer_depth',
|
||||
'crd_finalizer_longest_running_processor_microseconds',
|
||||
'crd_finalizer_queue_latency',
|
||||
'crd_finalizer_retries',
|
||||
'crd_finalizer_unfinished_work_seconds',
|
||||
'crd_finalizer_work_duration',
|
||||
'crd_naming_condition_controller_adds',
|
||||
'crd_naming_condition_controller_depth',
|
||||
'crd_naming_condition_controller_longest_running_processor_microseconds',
|
||||
'crd_naming_condition_controller_queue_latency',
|
||||
'crd_naming_condition_controller_retries',
|
||||
'crd_naming_condition_controller_unfinished_work_seconds',
|
||||
'crd_naming_condition_controller_work_duration',
|
||||
'crd_openapi_controller_adds',
|
||||
'crd_openapi_controller_depth',
|
||||
'crd_openapi_controller_longest_running_processor_microseconds',
|
||||
'crd_openapi_controller_queue_latency',
|
||||
'crd_openapi_controller_retries',
|
||||
'crd_openapi_controller_unfinished_work_seconds',
|
||||
'crd_openapi_controller_work_duration',
|
||||
'DiscoveryController_adds',
|
||||
'DiscoveryController_depth',
|
||||
'DiscoveryController_longest_running_processor_microseconds',
|
||||
'DiscoveryController_queue_latency',
|
||||
'DiscoveryController_retries',
|
||||
'DiscoveryController_unfinished_work_seconds',
|
||||
'DiscoveryController_work_duration',
|
||||
'kubeproxy_sync_proxy_rules_latency_microseconds',
|
||||
'non_structural_schema_condition_controller_adds',
|
||||
'non_structural_schema_condition_controller_depth',
|
||||
'non_structural_schema_condition_controller_longest_running_processor_microseconds',
|
||||
'non_structural_schema_condition_controller_queue_latency',
|
||||
'non_structural_schema_condition_controller_retries',
|
||||
'non_structural_schema_condition_controller_unfinished_work_seconds',
|
||||
'non_structural_schema_condition_controller_work_duration',
|
||||
'rest_client_request_latency_seconds',
|
||||
'storage_operation_errors_total',
|
||||
'storage_operation_status_count',
|
||||
]) + ')',
|
||||
action: 'drop',
|
||||
},
|
||||
]
|
||||
|
||||
@@ -16,8 +16,8 @@ local addArgs(args, name, containers) = std.map(
|
||||
containers: addArgs(
|
||||
[|||
|
||||
--metric-denylist=
|
||||
kube_*_created,
|
||||
kube_*_metadata_resource_version,
|
||||
kube_.+_created,
|
||||
kube_.+_metadata_resource_version,
|
||||
kube_replicaset_metadata_generation,
|
||||
kube_replicaset_status_observed_generation,
|
||||
kube_pod_restart_policy,
|
||||
|
||||
@@ -20,7 +20,8 @@ local defaults = {
|
||||
for labelName in std.objectFields(defaults.commonLabels)
|
||||
if !std.setMember(labelName, ['app.kubernetes.io/version'])
|
||||
},
|
||||
configmapReloaderImage: 'jimmidyson/configmap-reload:v0.5.0',
|
||||
configmapReloaderImage: error 'must provide version',
|
||||
kubeRbacProxyImage: error 'must provide kubeRbacProxyImage',
|
||||
|
||||
port: 9115,
|
||||
internalPort: 19115,
|
||||
@@ -204,6 +205,7 @@ function(params) {
|
||||
ports: [
|
||||
{ name: 'https', containerPort: bb._config.port },
|
||||
],
|
||||
image: bb._config.kubeRbacProxyImage,
|
||||
});
|
||||
|
||||
{
|
||||
|
||||
@@ -3,8 +3,7 @@ local defaults = {
|
||||
name: 'grafana',
|
||||
namespace: error 'must provide namespace',
|
||||
version: error 'must provide version',
|
||||
// image: error 'must provide image',
|
||||
imageRepos: 'grafana/grafana',
|
||||
image: error 'must provide image',
|
||||
resources: {
|
||||
requests: { cpu: '100m', memory: '100Mi' },
|
||||
limits: { cpu: '200m', memory: '200Mi' },
|
||||
@@ -44,7 +43,7 @@ function(params) {
|
||||
grafana: g._config.version,
|
||||
},
|
||||
imageRepos+:: {
|
||||
grafana: g._config.imageRepos,
|
||||
grafana: std.split(g._config.image, ':')[0],
|
||||
},
|
||||
prometheus+:: {
|
||||
name: g._config.prometheusName,
|
||||
|
||||
@@ -120,6 +120,23 @@ function(params) {
|
||||
regex: 'container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)',
|
||||
action: 'drop',
|
||||
},
|
||||
// Drop cAdvisor metrics with no (pod, namespace) labels while preserving ability to monitor system services resource usage (cardinality estimation)
|
||||
{
|
||||
sourceLabels: ['__name__', 'pod', 'namespace'],
|
||||
action: 'drop',
|
||||
regex: '(' + std.join('|',
|
||||
[
|
||||
'container_fs_.*', // add filesystem read/write data (nodes*disks*services*4)
|
||||
'container_spec_.*', // everything related to cgroup specification and thus static data (nodes*services*5)
|
||||
'container_blkio_device_usage_total', // useful for containers, but not for system services (nodes*disks*services*operations*2)
|
||||
'container_file_descriptors', // file descriptors limits and global numbers are exposed via (nodes*services)
|
||||
'container_sockets', // used sockets in cgroup. Usually not important for system services (nodes*services)
|
||||
'container_threads_max', // max number of threads in cgroup. Usually for system services it is not limited (nodes*services)
|
||||
'container_threads', // used threads in cgroup. Usually not important for system services (nodes*services)
|
||||
'container_start_time_seconds', // container start. Possibly not needed for system services (nodes*services)
|
||||
'container_last_seen', // not needed as system services are always running (nodes*services)
|
||||
]) + ');;',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
local defaults = {
|
||||
namespace: error 'must provide namespace',
|
||||
image: 'quay.io/brancz/kube-rbac-proxy:v0.8.0',
|
||||
image: error 'must provide image',
|
||||
ports: error 'must provide ports',
|
||||
secureListenAddress: error 'must provide secureListenAddress',
|
||||
upstream: error 'must provide upstream',
|
||||
|
||||
@@ -6,6 +6,7 @@ local defaults = {
|
||||
namespace: error 'must provide namespace',
|
||||
version: error 'must provide version',
|
||||
image: error 'must provide version',
|
||||
kubeRbacProxyImage: error 'must provide kubeRbacProxyImage',
|
||||
resources: {
|
||||
requests: { cpu: '10m', memory: '190Mi' },
|
||||
limits: { cpu: '100m', memory: '250Mi' },
|
||||
@@ -95,6 +96,7 @@ function(params) (import 'github.com/kubernetes/kube-state-metrics/jsonnet/kube-
|
||||
limits+: { cpu: '40m' },
|
||||
requests+: { cpu: '20m' },
|
||||
},
|
||||
image: ksm._config.kubeRbacProxyImage,
|
||||
}),
|
||||
|
||||
local kubeRbacProxySelf = krp({
|
||||
@@ -104,6 +106,7 @@ function(params) (import 'github.com/kubernetes/kube-state-metrics/jsonnet/kube-
|
||||
ports: [
|
||||
{ name: 'https-self', containerPort: 9443 },
|
||||
],
|
||||
image: ksm._config.kubeRbacProxyImage,
|
||||
}),
|
||||
|
||||
deployment+: {
|
||||
|
||||
@@ -6,6 +6,7 @@ local defaults = {
|
||||
namespace: error 'must provide namespace',
|
||||
version: error 'must provide version',
|
||||
image: error 'must provide version',
|
||||
kubeRbacProxyImage: error 'must provide kubeRbacProxyImage',
|
||||
resources: {
|
||||
requests: { cpu: '102m', memory: '180Mi' },
|
||||
limits: { cpu: '250m', memory: '180Mi' },
|
||||
@@ -174,8 +175,11 @@ function(params) {
|
||||
'--no-collector.wifi',
|
||||
'--no-collector.hwmon',
|
||||
'--collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)',
|
||||
'--collector.netclass.ignored-devices=^(veth.*)$',
|
||||
'--collector.netdev.device-exclude=^(veth.*)$',
|
||||
// NOTE: ignore veth network interface associated with containers.
|
||||
// OVN renames veth.* to <rand-hex>@if<X> where X is /sys/class/net/<if>/ifindex
|
||||
// thus [a-z0-9] regex below
|
||||
'--collector.netclass.ignored-devices=^(veth.*|[a-f0-9]{15})$',
|
||||
'--collector.netdev.device-exclude=^(veth.*|[a-f0-9]{15})$',
|
||||
],
|
||||
volumeMounts: [
|
||||
{ name: 'sys', mountPath: '/host/sys', mountPropagation: 'HostToContainer', readOnly: true },
|
||||
@@ -200,6 +204,7 @@ function(params) {
|
||||
ports: [
|
||||
{ name: 'https', containerPort: ne._config.port, hostPort: ne._config.port },
|
||||
],
|
||||
image: ne._config.kubeRbacProxyImage,
|
||||
}) + {
|
||||
env: [
|
||||
{ name: 'IP', valueFrom: { fieldRef: { fieldPath: 'status.podIP' } } },
|
||||
|
||||
@@ -300,4 +300,20 @@ function(params) {
|
||||
namespace: pa._config.namespace,
|
||||
}],
|
||||
},
|
||||
|
||||
[if (defaults + params).replicas > 1 then 'podDisruptionBudget']: {
|
||||
apiVersion: 'policy/v1beta1',
|
||||
kind: 'PodDisruptionBudget',
|
||||
metadata: {
|
||||
name: pa._config.name,
|
||||
namespace: pa._config.namespace,
|
||||
labels: pa._config.commonLabels,
|
||||
},
|
||||
spec: {
|
||||
minAvailable: 1,
|
||||
selector: {
|
||||
matchLabels: pa._config.selectorLabels,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ local defaults = {
|
||||
namespace: error 'must provide namespace',
|
||||
version: error 'must provide version',
|
||||
image: error 'must provide image',
|
||||
kubeRbacProxyImage: error 'must provide kubeRbacProxyImage',
|
||||
configReloaderImage: error 'must provide config reloader image',
|
||||
resources: {
|
||||
limits: { cpu: '200m', memory: '200Mi' },
|
||||
@@ -114,6 +115,7 @@ function(params)
|
||||
ports: [
|
||||
{ name: 'https', containerPort: 8443 },
|
||||
],
|
||||
image: po._config.kubeRbacProxyImage,
|
||||
}),
|
||||
|
||||
deployment+: {
|
||||
|
||||
@@ -91,7 +91,7 @@
|
||||
"subdir": "doc/alertmanager-mixin"
|
||||
}
|
||||
},
|
||||
"version": "release-0.21",
|
||||
"version": "99f64e944b1043c790784cf5373c8fb349816fc4",
|
||||
"name": "alertmanager"
|
||||
},
|
||||
{
|
||||
|
||||
@@ -30,6 +30,8 @@ local platformPatch = import './platforms/platforms.libsonnet';
|
||||
prometheus: error 'must provide version',
|
||||
prometheusAdapter: error 'must provide version',
|
||||
prometheusOperator: error 'must provide version',
|
||||
kubeRbacProxy: error 'must provide version',
|
||||
configmapReload: error 'must provide version',
|
||||
} + (import 'versions.json'),
|
||||
images: {
|
||||
alertmanager: 'quay.io/prometheus/alertmanager:v' + $.values.common.versions.alertmanager,
|
||||
@@ -41,6 +43,8 @@ local platformPatch = import './platforms/platforms.libsonnet';
|
||||
prometheusAdapter: 'directxman12/k8s-prometheus-adapter:v' + $.values.common.versions.prometheusAdapter,
|
||||
prometheusOperator: 'quay.io/prometheus-operator/prometheus-operator:v' + $.values.common.versions.prometheusOperator,
|
||||
prometheusOperatorReloader: 'quay.io/prometheus-operator/prometheus-config-reloader:v' + $.values.common.versions.prometheusOperator,
|
||||
kubeRbacProxy: 'quay.io/brancz/kube-rbac-proxy:v' + $.values.common.versions.kubeRbacProxy,
|
||||
configmapReload: 'jimmidyson/configmap-reload:v' + $.values.common.versions.configmapReload,
|
||||
},
|
||||
},
|
||||
alertmanager: {
|
||||
@@ -54,6 +58,8 @@ local platformPatch = import './platforms/platforms.libsonnet';
|
||||
namespace: $.values.common.namespace,
|
||||
version: $.values.common.versions.blackboxExporter,
|
||||
image: $.values.common.images.blackboxExporter,
|
||||
kubeRbacProxyImage: $.values.common.images.kubeRbacProxy,
|
||||
configmapReloaderImage: $.values.common.images.configmapReload,
|
||||
},
|
||||
grafana: {
|
||||
namespace: $.values.common.namespace,
|
||||
@@ -68,12 +74,14 @@ local platformPatch = import './platforms/platforms.libsonnet';
|
||||
version: $.values.common.versions.kubeStateMetrics,
|
||||
image: $.values.common.images.kubeStateMetrics,
|
||||
mixin+: { ruleLabels: $.values.common.ruleLabels },
|
||||
kubeRbacProxyImage: $.values.common.images.kubeRbacProxy,
|
||||
},
|
||||
nodeExporter: {
|
||||
namespace: $.values.common.namespace,
|
||||
version: $.values.common.versions.nodeExporter,
|
||||
image: $.values.common.images.nodeExporter,
|
||||
mixin+: { ruleLabels: $.values.common.ruleLabels },
|
||||
kubeRbacProxyImage: $.values.common.images.kubeRbacProxy,
|
||||
},
|
||||
prometheus: {
|
||||
namespace: $.values.common.namespace,
|
||||
@@ -98,6 +106,7 @@ local platformPatch = import './platforms/platforms.libsonnet';
|
||||
'app.kubernetes.io/part-of': 'kube-prometheus',
|
||||
},
|
||||
mixin+: { ruleLabels: $.values.common.ruleLabels },
|
||||
kubeRbacProxyImage: $.values.common.images.kubeRbacProxy,
|
||||
},
|
||||
kubernetesControlPlane: {
|
||||
namespace: $.values.common.namespace,
|
||||
|
||||
@@ -18,22 +18,22 @@ local service(name, namespace, labels, selector, ports) = {
|
||||
kubeControllerManagerPrometheusDiscoveryService: service(
|
||||
'kube-controller-manager-prometheus-discovery',
|
||||
'kube-system',
|
||||
{ 'app.kubernetes.io/name': 'kube-controller-manager' },
|
||||
{ 'app.kubernetes.io/name': 'kube-controller-manager' },
|
||||
{ 'k8s-app': 'kube-controller-manager', 'app.kubernetes.io/name': 'kube-controller-manager' },
|
||||
{ 'k8s-app': 'kube-controller-manager' },
|
||||
[{ name: 'https-metrics', port: 10257, targetPort: 10257 }]
|
||||
),
|
||||
kubeSchedulerPrometheusDiscoveryService: service(
|
||||
'kube-scheduler-prometheus-discovery',
|
||||
'kube-system',
|
||||
{ 'app.kubernetes.io/name': 'kube-scheduler' },
|
||||
{ 'app.kubernetes.io/name': 'kube-scheduler' },
|
||||
{ 'k8s-app': 'kube-controller-manager', 'app.kubernetes.io/name': 'kube-scheduler' },
|
||||
{ 'k8s-app': 'kube-scheduler' },
|
||||
[{ name: 'https-metrics', port: 10259, targetPort: 10259 }]
|
||||
),
|
||||
kubeDnsPrometheusDiscoveryService: service(
|
||||
'kube-dns-prometheus-discovery',
|
||||
'kube-system',
|
||||
{ 'app.kubernetes.io/name': 'kube-dns' },
|
||||
{ 'app.kubernetes.io/name': 'kube-dns' },
|
||||
{ 'k8s-app': 'kube-controller-manager', 'app.kubernetes.io/name': 'kube-dns' },
|
||||
{ 'k8s-app': 'kube-dns' },
|
||||
[{ name: 'metrics', port: 10055, targetPort: 10055 }, { name: 'http-metrics-dnsmasq', port: 10054, targetPort: 10054 }]
|
||||
),
|
||||
},
|
||||
|
||||
@@ -6,5 +6,7 @@
|
||||
"nodeExporter": "1.1.2",
|
||||
"prometheus": "2.26.0",
|
||||
"prometheusAdapter": "0.8.4",
|
||||
"prometheusOperator": "0.47.0"
|
||||
}
|
||||
"prometheusOperator": "0.47.0",
|
||||
"kubeRbacProxy": "0.8.0",
|
||||
"configmapReload": "0.5.0"
|
||||
}
|
||||
@@ -18,7 +18,7 @@
|
||||
"subdir": "contrib/mixin"
|
||||
}
|
||||
},
|
||||
"version": "57a092b45d0eae6c9e600e62513ffcd2f1f25a92",
|
||||
"version": "562d645ac923388ff5b8d270b0536764d34b0e0f",
|
||||
"sum": "W/Azptf1PoqjyMwJON96UY69MFugDA4IAYiKURscryc="
|
||||
},
|
||||
{
|
||||
@@ -28,8 +28,8 @@
|
||||
"subdir": "grafonnet"
|
||||
}
|
||||
},
|
||||
"version": "daad85cf3fad3580e58029414630e29956aefe21",
|
||||
"sum": "zkOBVXtNSGlOdbm5TRCbEik7c/Jk+btbJqaE9qW8j3Y="
|
||||
"version": "6db00c292d3a1c71661fc875f90e0ec7caa538c2",
|
||||
"sum": "gF8foHByYcB25jcUOBqP6jxk0OPifQMjPvKY0HaCk6w="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@@ -38,8 +38,8 @@
|
||||
"subdir": "grafana-builder"
|
||||
}
|
||||
},
|
||||
"version": "89299b1c5e93952622801795353d496fb337f44e",
|
||||
"sum": "GRf2GvwEU4jhXV+JOonXSZ4wdDv8mnHBPCQ6TUVd+g8="
|
||||
"version": "98c3060877aa178f6bdfc6ac618fbe0043fc3de7",
|
||||
"sum": "0KkygBQd/AFzUvVzezE4qF/uDYgrwUXVpZfINBti0oc="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@@ -59,8 +59,8 @@
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "ec3e85f45b5691d54a02ab38ed654c3c9f736fe5",
|
||||
"sum": "6KgRTpd101espi7a7CDpkqN0yaIPmENxxlAXqGcCWhk="
|
||||
"version": "7d3bb79a4983052d421264a7e0f3c9b0d4a22268",
|
||||
"sum": "DFo3YX4xc6GJTSZDaG5XRE/ixY/5GZJwdyqBkvons4M="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@@ -69,7 +69,7 @@
|
||||
"subdir": "lib/promgrafonnet"
|
||||
}
|
||||
},
|
||||
"version": "faa0561a823cbd3b726aaefffcf6ee317547041a",
|
||||
"version": "0f0f3dc472ff2a8cdc6a6c6f938a2c450cb493ec",
|
||||
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
|
||||
},
|
||||
{
|
||||
@@ -79,7 +79,7 @@
|
||||
"subdir": "jsonnet/kube-state-metrics"
|
||||
}
|
||||
},
|
||||
"version": "93255df07113f87dcdec0726b4f4db4e6344df26",
|
||||
"version": "b1889aa1561ee269f628e2b9659155e7714dbbf0",
|
||||
"sum": "S5qI+PJUdNeYOv76jH5nxwYS9N6U7CRxvyuB1wI4cTE="
|
||||
},
|
||||
{
|
||||
@@ -89,7 +89,7 @@
|
||||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
||||
}
|
||||
},
|
||||
"version": "93255df07113f87dcdec0726b4f4db4e6344df26",
|
||||
"version": "b1889aa1561ee269f628e2b9659155e7714dbbf0",
|
||||
"sum": "Yf8mNAHrV1YWzrdV8Ry5dJ8YblepTGw3C0Zp10XIYLo="
|
||||
},
|
||||
{
|
||||
@@ -99,7 +99,7 @@
|
||||
"subdir": "jsonnet/mixin"
|
||||
}
|
||||
},
|
||||
"version": "a4f5928b074e75addb76a27c5ebfe78314fcd6d1",
|
||||
"version": "b7ca32169844f0b5143f3e5e318fc05fa025df18",
|
||||
"sum": "6reUygVmQrLEWQzTKcH8ceDbvM+2ztK3z2VBR2K2l+U=",
|
||||
"name": "prometheus-operator-mixin"
|
||||
},
|
||||
@@ -110,7 +110,7 @@
|
||||
"subdir": "jsonnet/prometheus-operator"
|
||||
}
|
||||
},
|
||||
"version": "64d466d7730165c0d260f187e2e9742bc0295bf2",
|
||||
"version": "b7ca32169844f0b5143f3e5e318fc05fa025df18",
|
||||
"sum": "MRwyChXdKG3anL2OWpbUu3qWc97w9J6YsjUWjLFQyB0="
|
||||
},
|
||||
{
|
||||
@@ -120,8 +120,8 @@
|
||||
"subdir": "doc/alertmanager-mixin"
|
||||
}
|
||||
},
|
||||
"version": "22ac6dff21901bfce14545da59b37a1aaca0db3a",
|
||||
"sum": "VP1vn/WTGLZaBgGhGMUO81qNTc/fnp5KtzVjcaxad6Q=",
|
||||
"version": "99f64e944b1043c790784cf5373c8fb349816fc4",
|
||||
"sum": "V8jcZQ1Qrlm7AQ6wjbuQQsacPb0NvrcZovKyplmzW5w=",
|
||||
"name": "alertmanager"
|
||||
},
|
||||
{
|
||||
@@ -131,8 +131,8 @@
|
||||
"subdir": "docs/node-mixin"
|
||||
}
|
||||
},
|
||||
"version": "46cdf618c9419ea62dd1a87331def97a0f18c837",
|
||||
"sum": "sxI7cBEy34JSbB0gHy9xC/ErtFsRzl9eJPsWVqd+XSY="
|
||||
"version": "b597c1244d7bef49e6f3359c87a56dd7707f6719",
|
||||
"sum": "cZTNXQMUCLB5FGYpMn845dcqGdkcYt58qCqOFIV/BoQ="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@@ -141,7 +141,7 @@
|
||||
"subdir": "documentation/prometheus-mixin"
|
||||
}
|
||||
},
|
||||
"version": "3cafc58827d1ebd1a67749f88be4218f0bab3d8d",
|
||||
"version": "6eeded0fdf760e81af75d9c44ce539ab77da4505",
|
||||
"sum": "VK0c3sQ3ksiM6JQsAVfWmL5NbzGv9llMfXFNXfFdJ+A=",
|
||||
"name": "prometheus"
|
||||
},
|
||||
@@ -152,7 +152,7 @@
|
||||
"subdir": "mixin"
|
||||
}
|
||||
},
|
||||
"version": "ba6c5c4726ff52807c7383c68f2159b1af7980bb",
|
||||
"version": "09b36547e5ed61a32a309648a8913bd02c08d3cc",
|
||||
"sum": "XP3uq7xcfKHsnWsz1v992csZhhZR3jQma6hFOfSViTs=",
|
||||
"name": "thanos-mixin"
|
||||
},
|
||||
|
||||
@@ -51,6 +51,7 @@ resources:
|
||||
- ./manifests/prometheus-adapter-clusterRoleServerResources.yaml
|
||||
- ./manifests/prometheus-adapter-configMap.yaml
|
||||
- ./manifests/prometheus-adapter-deployment.yaml
|
||||
- ./manifests/prometheus-adapter-podDisruptionBudget.yaml
|
||||
- ./manifests/prometheus-adapter-roleBindingAuthReader.yaml
|
||||
- ./manifests/prometheus-adapter-service.yaml
|
||||
- ./manifests/prometheus-adapter-serviceAccount.yaml
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -14,11 +14,11 @@ spec:
|
||||
rules:
|
||||
- alert: KubePodCrashLooping
|
||||
annotations:
|
||||
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting {{ printf "%.2f" $value }} times / 10 minutes.
|
||||
description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is in waiting state (reason: "CrashLoopBackOff").'
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubepodcrashlooping
|
||||
summary: Pod is crash looping.
|
||||
expr: |
|
||||
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[10m]) * 60 * 5 > 0
|
||||
max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", job="kube-state-metrics"}[5m]) >= 1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@@ -214,19 +214,19 @@ spec:
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubehpareplicasmismatch
|
||||
summary: HPA has not matched descired number of replicas.
|
||||
expr: |
|
||||
(kube_hpa_status_desired_replicas{job="kube-state-metrics"}
|
||||
(kube_horizontalpodautoscaler_status_desired_replicas{job="kube-state-metrics"}
|
||||
!=
|
||||
kube_hpa_status_current_replicas{job="kube-state-metrics"})
|
||||
kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics"})
|
||||
and
|
||||
(kube_hpa_status_current_replicas{job="kube-state-metrics"}
|
||||
(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics"}
|
||||
>
|
||||
kube_hpa_spec_min_replicas{job="kube-state-metrics"})
|
||||
kube_horizontalpodautoscaler_spec_min_replicas{job="kube-state-metrics"})
|
||||
and
|
||||
(kube_hpa_status_current_replicas{job="kube-state-metrics"}
|
||||
(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics"}
|
||||
<
|
||||
kube_hpa_spec_max_replicas{job="kube-state-metrics"})
|
||||
kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics"})
|
||||
and
|
||||
changes(kube_hpa_status_current_replicas{job="kube-state-metrics"}[15m]) == 0
|
||||
changes(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics"}[15m]) == 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@@ -236,9 +236,9 @@ spec:
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubehpamaxedout
|
||||
summary: HPA is running at max replicas
|
||||
expr: |
|
||||
kube_hpa_status_current_replicas{job="kube-state-metrics"}
|
||||
kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics"}
|
||||
==
|
||||
kube_hpa_spec_max_replicas{job="kube-state-metrics"}
|
||||
kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics"}
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@@ -360,10 +360,13 @@ spec:
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubepersistentvolumefillingup
|
||||
summary: PersistentVolume is filling up.
|
||||
expr: |
|
||||
kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}
|
||||
/
|
||||
kubelet_volume_stats_capacity_bytes{job="kubelet", metrics_path="/metrics"}
|
||||
< 0.03
|
||||
(
|
||||
kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}
|
||||
/
|
||||
kubelet_volume_stats_capacity_bytes{job="kubelet", metrics_path="/metrics"}
|
||||
) < 0.03
|
||||
and
|
||||
kubelet_volume_stats_used_bytes{job="kubelet", metrics_path="/metrics"} > 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
@@ -379,6 +382,8 @@ spec:
|
||||
kubelet_volume_stats_capacity_bytes{job="kubelet", metrics_path="/metrics"}
|
||||
) < 0.15
|
||||
and
|
||||
kubelet_volume_stats_used_bytes{job="kubelet", metrics_path="/metrics"} > 0
|
||||
and
|
||||
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
|
||||
for: 1h
|
||||
labels:
|
||||
|
||||
@@ -39,7 +39,7 @@ spec:
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
|
||||
regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
|
||||
@@ -39,7 +39,7 @@ spec:
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
|
||||
regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
|
||||
@@ -40,7 +40,7 @@ spec:
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: (admission_quota_controller_adds|crd_autoregistration_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|AvailableConditionController_retries|crd_openapi_controller_unfinished_work_seconds|APIServiceRegistrationController_retries|admission_quota_controller_longest_running_processor_microseconds|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_unfinished_work_seconds|crd_openapi_controller_adds|crd_autoregistration_controller_retries|crd_finalizer_queue_latency|AvailableConditionController_work_duration|non_structural_schema_condition_controller_depth|crd_autoregistration_controller_unfinished_work_seconds|AvailableConditionController_adds|DiscoveryController_longest_running_processor_microseconds|autoregister_queue_latency|crd_autoregistration_controller_adds|non_structural_schema_condition_controller_work_duration|APIServiceRegistrationController_adds|crd_finalizer_work_duration|crd_naming_condition_controller_unfinished_work_seconds|crd_openapi_controller_longest_running_processor_microseconds|DiscoveryController_adds|crd_autoregistration_controller_longest_running_processor_microseconds|autoregister_unfinished_work_seconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|non_structural_schema_condition_controller_queue_latency|crd_naming_condition_controller_depth|AvailableConditionController_longest_running_processor_microseconds|crdEstablishing_depth|crd_finalizer_longest_running_processor_microseconds|crd_naming_condition_controller_adds|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_unfinished_work_seconds|crd_openapi_controller_depth|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|DiscoveryController_work_duration|autoregister_adds|crd_autoregistration_controller_queue_latency|crd_finalizer_retries|AvailableConditionController_unfinished_work_seconds|autoregister_longest_running_processor_microseconds|non_structural_schema_condition_controller_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_depth|AvailableConditionController_depth|DiscoveryController_retries|admission_quota_controller_depth|crdEstablishing_adds|APIServiceOpenAPIAggregationControllerQueue1_retries|crdEstablishing_queue_latency|non_structural_schema_condition_controller_longest_running_processor_microseconds|autoregister_work_duration|crd_openapi_controller_retries|APIServiceRegistrationController_work_duration|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_openapi_controller_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_queue_latency|crd_autoregistration_controller_depth|AvailableConditionController_queue_latency|admission_quota_controller_queue_latency|crd_naming_condition_controller_work_duration|crd_openapi_controller_work_duration|DiscoveryController_depth|crd_naming_condition_controller_longest_running_processor_microseconds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|crd_finalizer_unfinished_work_seconds|crdEstablishing_retries|admission_quota_controller_unfinished_work_seconds|non_structural_schema_condition_controller_adds|APIServiceRegistrationController_unfinished_work_seconds|admission_quota_controller_work_duration|autoregister_depth|autoregister_retries|kubeproxy_sync_proxy_rules_latency_microseconds|rest_client_request_latency_seconds|non_structural_schema_condition_controller_retries)
|
||||
regex: (admission_quota_controller_adds|admission_quota_controller_depth|admission_quota_controller_longest_running_processor_microseconds|admission_quota_controller_queue_latency|admission_quota_controller_unfinished_work_seconds|admission_quota_controller_work_duration|APIServiceOpenAPIAggregationControllerQueue1_adds|APIServiceOpenAPIAggregationControllerQueue1_depth|APIServiceOpenAPIAggregationControllerQueue1_longest_running_processor_microseconds|APIServiceOpenAPIAggregationControllerQueue1_queue_latency|APIServiceOpenAPIAggregationControllerQueue1_retries|APIServiceOpenAPIAggregationControllerQueue1_unfinished_work_seconds|APIServiceOpenAPIAggregationControllerQueue1_work_duration|APIServiceRegistrationController_adds|APIServiceRegistrationController_depth|APIServiceRegistrationController_longest_running_processor_microseconds|APIServiceRegistrationController_queue_latency|APIServiceRegistrationController_retries|APIServiceRegistrationController_unfinished_work_seconds|APIServiceRegistrationController_work_duration|autoregister_adds|autoregister_depth|autoregister_longest_running_processor_microseconds|autoregister_queue_latency|autoregister_retries|autoregister_unfinished_work_seconds|autoregister_work_duration|AvailableConditionController_adds|AvailableConditionController_depth|AvailableConditionController_longest_running_processor_microseconds|AvailableConditionController_queue_latency|AvailableConditionController_retries|AvailableConditionController_unfinished_work_seconds|AvailableConditionController_work_duration|crd_autoregistration_controller_adds|crd_autoregistration_controller_depth|crd_autoregistration_controller_longest_running_processor_microseconds|crd_autoregistration_controller_queue_latency|crd_autoregistration_controller_retries|crd_autoregistration_controller_unfinished_work_seconds|crd_autoregistration_controller_work_duration|crdEstablishing_adds|crdEstablishing_depth|crdEstablishing_longest_running_processor_microseconds|crdEstablishing_queue_latency|crdEstablishing_retries|crdEstablishing_unfinished_work_seconds|crdEstablishing_work_duration|crd_finalizer_adds|crd_finalizer_depth|crd_finalizer_longest_running_processor_microseconds|crd_finalizer_queue_latency|crd_finalizer_retries|crd_finalizer_unfinished_work_seconds|crd_finalizer_work_duration|crd_naming_condition_controller_adds|crd_naming_condition_controller_depth|crd_naming_condition_controller_longest_running_processor_microseconds|crd_naming_condition_controller_queue_latency|crd_naming_condition_controller_retries|crd_naming_condition_controller_unfinished_work_seconds|crd_naming_condition_controller_work_duration|crd_openapi_controller_adds|crd_openapi_controller_depth|crd_openapi_controller_longest_running_processor_microseconds|crd_openapi_controller_queue_latency|crd_openapi_controller_retries|crd_openapi_controller_unfinished_work_seconds|crd_openapi_controller_work_duration|DiscoveryController_adds|DiscoveryController_depth|DiscoveryController_longest_running_processor_microseconds|DiscoveryController_queue_latency|DiscoveryController_retries|DiscoveryController_unfinished_work_seconds|DiscoveryController_work_duration|kubeproxy_sync_proxy_rules_latency_microseconds|non_structural_schema_condition_controller_adds|non_structural_schema_condition_controller_depth|non_structural_schema_condition_controller_longest_running_processor_microseconds|non_structural_schema_condition_controller_queue_latency|non_structural_schema_condition_controller_retries|non_structural_schema_condition_controller_unfinished_work_seconds|non_structural_schema_condition_controller_work_duration|rest_client_request_latency_seconds|storage_operation_errors_total|storage_operation_status_count)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
port: https-metrics
|
||||
@@ -60,6 +60,12 @@ spec:
|
||||
regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: (container_fs_.*|container_spec_.*|container_blkio_device_usage_total|container_file_descriptors|container_sockets|container_threads_max|container_threads|container_start_time_seconds|container_last_seen);;
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- pod
|
||||
- namespace
|
||||
path: /metrics/cadvisor
|
||||
port: https-metrics
|
||||
relabelings:
|
||||
|
||||
@@ -30,8 +30,8 @@ spec:
|
||||
- --no-collector.wifi
|
||||
- --no-collector.hwmon
|
||||
- --collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/pods/.+)($|/)
|
||||
- --collector.netclass.ignored-devices=^(veth.*)$
|
||||
- --collector.netdev.device-exclude=^(veth.*)$
|
||||
- --collector.netclass.ignored-devices=^(veth.*|[a-f0-9]{15})$
|
||||
- --collector.netdev.device-exclude=^(veth.*|[a-f0-9]{15})$
|
||||
image: quay.io/prometheus/node-exporter:v1.1.2
|
||||
name: node-exporter
|
||||
resources:
|
||||
|
||||
@@ -234,9 +234,9 @@ spec:
|
||||
record: instance:node_num_cpu:sum
|
||||
- expr: |
|
||||
1 - avg without (cpu, mode) (
|
||||
rate(node_cpu_seconds_total{job="node-exporter", mode="idle"}[5m])
|
||||
rate(node_cpu_seconds_total{job="node-exporter", mode="idle"}[1m])
|
||||
)
|
||||
record: instance:node_cpu_utilisation:rate5m
|
||||
record: instance:node_cpu_utilisation:rate1m
|
||||
- expr: |
|
||||
(
|
||||
node_load1{job="node-exporter"}
|
||||
@@ -252,31 +252,31 @@ spec:
|
||||
)
|
||||
record: instance:node_memory_utilisation:ratio
|
||||
- expr: |
|
||||
rate(node_vmstat_pgmajfault{job="node-exporter"}[5m])
|
||||
record: instance:node_vmstat_pgmajfault:rate5m
|
||||
rate(node_vmstat_pgmajfault{job="node-exporter"}[1m])
|
||||
record: instance:node_vmstat_pgmajfault:rate1m
|
||||
- expr: |
|
||||
rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[5m])
|
||||
record: instance_device:node_disk_io_time_seconds:rate5m
|
||||
rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
|
||||
record: instance_device:node_disk_io_time_seconds:rate1m
|
||||
- expr: |
|
||||
rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[5m])
|
||||
record: instance_device:node_disk_io_time_weighted_seconds:rate5m
|
||||
rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
|
||||
record: instance_device:node_disk_io_time_weighted_seconds:rate1m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[5m])
|
||||
rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[1m])
|
||||
)
|
||||
record: instance:node_network_receive_bytes_excluding_lo:rate5m
|
||||
record: instance:node_network_receive_bytes_excluding_lo:rate1m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[5m])
|
||||
rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[1m])
|
||||
)
|
||||
record: instance:node_network_transmit_bytes_excluding_lo:rate5m
|
||||
record: instance:node_network_transmit_bytes_excluding_lo:rate1m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[5m])
|
||||
rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[1m])
|
||||
)
|
||||
record: instance:node_network_receive_drop_excluding_lo:rate5m
|
||||
record: instance:node_network_receive_drop_excluding_lo:rate1m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[5m])
|
||||
rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[1m])
|
||||
)
|
||||
record: instance:node_network_transmit_drop_excluding_lo:rate5m
|
||||
record: instance:node_network_transmit_drop_excluding_lo:rate1m
|
||||
|
||||
17
manifests/prometheus-adapter-podDisruptionBudget.yaml
Normal file
17
manifests/prometheus-adapter-podDisruptionBudget.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
apiVersion: policy/v1beta1
|
||||
kind: PodDisruptionBudget
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 0.8.4
|
||||
name: prometheus-adapter
|
||||
namespace: monitoring
|
||||
spec:
|
||||
minAvailable: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: metrics-adapter
|
||||
app.kubernetes.io/name: prometheus-adapter
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
Reference in New Issue
Block a user