jsonnet: Drop cAdvisor metrics with no (pod, namespace) labels while preserving ability to monitor system services resource usage
The following provides a description and cardinality estimation based on the tests in a local cluster: container_blkio_device_usage_total - useful for containers, but not for system services (nodes*disks*services*operations*2) container_fs_.* - add filesystem read/write data (nodes*disks*services*4) container_file_descriptors - file descriptors limits and global numbers are exposed via (nodes*services) container_threads_max - max number of threads in cgroup. Usually for system services it is not limited (nodes*services) container_threads - used threads in cgroup. Usually not important for system services (nodes*services) container_sockets - used sockets in cgroup. Usually not important for system services (nodes*services) container_start_time_seconds - container start. Possibly not needed for system services (nodes*services) container_last_seen - Not needed as system services are always running (nodes*services) container_spec_.* - Everything related to cgroup specification and thus static data (nodes*services*5)
This commit is contained in:
@@ -37,6 +37,23 @@
|
||||
regex: 'container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)',
|
||||
action: 'drop',
|
||||
},
|
||||
// Drop cAdvisor metrics with no (pod, namespace) labels while preserving ability to monitor system services resource usage (cardinality estimation)
|
||||
{
|
||||
sourceLabels: ['__name__', 'pod', 'namespace'],
|
||||
action: 'drop',
|
||||
regex: '(' + std.join('|',
|
||||
[
|
||||
'container_fs_.*', // add filesystem read/write data (nodes*disks*services*4)
|
||||
'container_spec_.*', // everything related to cgroup specification and thus static data (nodes*services*5)
|
||||
'container_blkio_device_usage_total', // useful for containers, but not for system services (nodes*disks*services*operations*2)
|
||||
'container_file_descriptors', // file descriptors limits and global numbers are exposed via (nodes*services)
|
||||
'container_sockets', // used sockets in cgroup. Usually not important for system services (nodes*services)
|
||||
'container_threads_max', // max number of threads in cgroup. Usually for system services it is not limited (nodes*services)
|
||||
'container_threads', // used threads in cgroup. Usually not important for system services (nodes*services)
|
||||
'container_start_time_seconds', // container start. Possibly not needed for system services (nodes*services)
|
||||
'container_last_seen', // not needed as system services are always running (nodes*services)
|
||||
]) + ');;',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
|
||||
@@ -322,6 +322,23 @@ local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
regex: 'container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)',
|
||||
action: 'drop',
|
||||
},
|
||||
// Drop cAdvisor metrics with no (pod, namespace) labels while preserving ability to monitor system services resource usage (cardinality estimation)
|
||||
{
|
||||
sourceLabels: ['__name__', 'pod', 'namespace'],
|
||||
action: 'drop',
|
||||
regex: '(' + std.join('|',
|
||||
[
|
||||
'container_fs_.*', // add filesystem read/write data (nodes*disks*services*4)
|
||||
'container_spec_.*', // everything related to cgroup specification and thus static data (nodes*services*5)
|
||||
'container_blkio_device_usage_total', // useful for containers, but not for system services (nodes*disks*services*operations*2)
|
||||
'container_file_descriptors', // file descriptors limits and global numbers are exposed via (nodes*services)
|
||||
'container_sockets', // used sockets in cgroup. Usually not important for system services (nodes*services)
|
||||
'container_threads_max', // max number of threads in cgroup. Usually for system services it is not limited (nodes*services)
|
||||
'container_threads', // used threads in cgroup. Usually not important for system services (nodes*services)
|
||||
'container_start_time_seconds', // container start. Possibly not needed for system services (nodes*services)
|
||||
'container_last_seen', // not needed as system services are always running (nodes*services)
|
||||
]) + ');;',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
|
||||
@@ -59,6 +59,12 @@ spec:
|
||||
regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s)
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- action: drop
|
||||
regex: (container_fs_.*|container_spec_.*|container_blkio_device_usage_total|container_file_descriptors|container_sockets|container_threads_max|container_threads|container_start_time_seconds|container_last_seen);;
|
||||
sourceLabels:
|
||||
- __name__
|
||||
- pod
|
||||
- namespace
|
||||
path: /metrics/cadvisor
|
||||
port: https-metrics
|
||||
relabelings:
|
||||
|
||||
Reference in New Issue
Block a user