Manage metrics filtering¶
Available since 2.24.0 and 2.24.2 for MOSK 23.2
By default, StackLight drops unused metrics to increase Prometheus performance providing better resource utilization and faster query response. The following list contains white-listed scrape jobs grouped by the job name. Prometheus collects metrics from this list by default.
White list of Prometheus scrape jobs
{
"_group-blackbox-metrics": [
"probe_dns_lookup_time_seconds",
"probe_duration_seconds",
"probe_http_content_length",
"probe_http_duration_seconds",
"probe_http_ssl",
"probe_http_uncompressed_body_length",
"probe_ssl_earliest_cert_expiry",
"probe_success"
],
"_group-controller-runtime-metrics": [
"workqueue_adds_total",
"workqueue_depth",
"workqueue_queue_duration_seconds_count",
"workqueue_queue_duration_seconds_sum",
"workqueue_retries_total",
"workqueue_work_duration_seconds_count",
"workqueue_work_duration_seconds_sum"
],
"_group-etcd-metrics": [
"etcd_cluster_version",
"etcd_debugging_snap_save_total_duration_seconds_sum",
"etcd_disk_backend_commit_duration_seconds_bucket",
"etcd_disk_backend_commit_duration_seconds_count",
"etcd_disk_backend_commit_duration_seconds_sum",
"etcd_disk_backend_snapshot_duration_seconds_count",
"etcd_disk_backend_snapshot_duration_seconds_sum",
"etcd_disk_wal_fsync_duration_seconds_bucket",
"etcd_disk_wal_fsync_duration_seconds_count",
"etcd_disk_wal_fsync_duration_seconds_sum",
"etcd_mvcc_db_total_size_in_bytes",
"etcd_network_client_grpc_received_bytes_total",
"etcd_network_client_grpc_sent_bytes_total",
"etcd_network_peer_received_bytes_total",
"etcd_network_peer_sent_bytes_total",
"etcd_server_go_version",
"etcd_server_has_leader",
"etcd_server_leader_changes_seen_total",
"etcd_server_proposals_applied_total",
"etcd_server_proposals_committed_total",
"etcd_server_proposals_failed_total",
"etcd_server_proposals_pending",
"etcd_server_quota_backend_bytes",
"etcd_server_version",
"grpc_server_handled_total",
"grpc_server_started_total"
],
"_group-go-collector-metrics": [
"go_gc_duration_seconds",
"go_gc_duration_seconds_count",
"go_gc_duration_seconds_sum",
"go_goroutines",
"go_info",
"go_memstats_alloc_bytes",
"go_memstats_alloc_bytes_total",
"go_memstats_buck_hash_sys_bytes",
"go_memstats_frees_total",
"go_memstats_gc_sys_bytes",
"go_memstats_heap_alloc_bytes",
"go_memstats_heap_idle_bytes",
"go_memstats_heap_inuse_bytes",
"go_memstats_heap_released_bytes",
"go_memstats_heap_sys_bytes",
"go_memstats_lookups_total",
"go_memstats_mallocs_total",
"go_memstats_mcache_inuse_bytes",
"go_memstats_mcache_sys_bytes",
"go_memstats_mspan_inuse_bytes",
"go_memstats_mspan_sys_bytes",
"go_memstats_next_gc_bytes",
"go_memstats_other_sys_bytes",
"go_memstats_stack_inuse_bytes",
"go_memstats_stack_sys_bytes",
"go_memstats_sys_bytes",
"go_threads"
],
"_group-process-collector-metrics": [
"process_cpu_seconds_total",
"process_max_fds",
"process_open_fds",
"process_resident_memory_bytes",
"process_start_time_seconds",
"process_virtual_memory_bytes"
],
"_group-rest-client-metrics": [
"rest_client_request_latency_seconds_count",
"rest_client_request_latency_seconds_sum"
],
"_group-service-handler-metrics": [
"service_handler_count",
"service_handler_sum"
],
"_group-service-http-metrics": [
"service_http_count",
"service_http_sum"
],
"_group-service-reconciler-metrics": [
"service_reconciler_count",
"service_reconciler_sum"
],
"alertmanager-webhook-servicenow": [
"servicenow_auth_ok"
],
"blackbox": [],
"blackbox-external-endpoint": [],
"cadvisor": [
"cadvisor_version_info",
"container_cpu_cfs_periods_total",
"container_cpu_cfs_throttled_periods_total",
"container_cpu_usage_seconds_total",
"container_fs_reads_bytes_total",
"container_fs_reads_total",
"container_fs_writes_bytes_total",
"container_fs_writes_total",
"container_memory_usage_bytes",
"container_memory_working_set_bytes",
"container_network_receive_bytes_total",
"container_network_transmit_bytes_total",
"container_scrape_error",
"machine_cpu_cores"
],
"calico": [
"felix_active_local_endpoints",
"felix_active_local_policies",
"felix_active_local_selectors",
"felix_active_local_tags",
"felix_cluster_num_host_endpoints",
"felix_cluster_num_hosts",
"felix_cluster_num_workload_endpoints",
"felix_host",
"felix_int_dataplane_addr_msg_batch_size_count",
"felix_int_dataplane_addr_msg_batch_size_sum",
"felix_int_dataplane_failures",
"felix_int_dataplane_iface_msg_batch_size_count",
"felix_int_dataplane_iface_msg_batch_size_sum",
"felix_ipset_errors",
"felix_ipsets_calico",
"felix_iptables_chains",
"felix_iptables_restore_errors",
"felix_iptables_save_errors",
"felix_resyncs_started"
],
"etcd-server": [],
"fluentd": [
"apache_http_request_duration_seconds_bucket",
"apache_http_request_duration_seconds_count",
"docker_networkdb_stats_netmsg",
"docker_networkdb_stats_qlen"
],
"helm-controller": [
"helmbundle_reconcile_up",
"helmbundle_release_ready",
"helmbundle_release_status",
"helmbundle_release_success",
"rest_client_requests_total"
],
"ironic": [
"ironic_driver_metadata",
"ironic_drivers_total",
"ironic_nodes",
"ironic_up"
],
"kaas-exporter": [
"kaas_cluster_info",
"kaas_cluster_updating",
"kaas_clusters",
"kaas_info",
"kaas_license_expiry",
"kaas_machine_ready",
"kaas_machines_ready",
"kaas_machines_requested",
"rest_client_requests_total"
],
"kubelet": [
"kubelet_running_containers",
"kubelet_running_pods",
"kubelet_volume_stats_available_bytes",
"kubelet_volume_stats_capacity_bytes",
"kubernetes_build_info",
"rest_client_requests_total"
],
"kubernetes-apiservers": [
"apiserver_client_certificate_expiration_seconds_bucket",
"apiserver_client_certificate_expiration_seconds_count",
"apiserver_request_total",
"kubernetes_build_info",
"rest_client_requests_total"
],
"kubernetes-master-api": [],
"mcc-blackbox": [],
"mcc-cache": [],
"mcc-controllers": [
"rest_client_requests_total"
],
"mcc-providers": [
"rest_client_requests_total"
],
"mke-manager-api": [],
"mke-metrics-controller": [
"ucp_controller_services",
"ucp_engine_node_health"
],
"mke-metrics-engine": [
"ucp_engine_container_cpu_percent",
"ucp_engine_container_cpu_total_time_nanoseconds",
"ucp_engine_container_health",
"ucp_engine_container_memory_usage_bytes",
"ucp_engine_container_network_rx_bytes_total",
"ucp_engine_container_network_tx_bytes_total",
"ucp_engine_container_unhealth",
"ucp_engine_containers",
"ucp_engine_disk_free_bytes",
"ucp_engine_disk_total_bytes",
"ucp_engine_images",
"ucp_engine_memory_total_bytes",
"ucp_engine_num_cpu_cores"
],
"msr-api": [],
"openstack-blackbox-ext": [],
"openstack-cloudprober": [
"cloudprober_success",
"cloudprober_total"
],
"openstack-ingress-controller": [
"nginx_ingress_controller_build_info",
"nginx_ingress_controller_config_hash",
"nginx_ingress_controller_config_last_reload_successful",
"nginx_ingress_controller_nginx_process_connections",
"nginx_ingress_controller_nginx_process_cpu_seconds_total",
"nginx_ingress_controller_nginx_process_resident_memory_bytes",
"nginx_ingress_controller_request_duration_seconds_bucket",
"nginx_ingress_controller_request_size_sum",
"nginx_ingress_controller_requests",
"nginx_ingress_controller_response_size_sum",
"nginx_ingress_controller_ssl_expire_time_seconds",
"nginx_ingress_controller_success"
],
"osdpl-exporter": [
"osdpl_certificate_expiry"
],
"patroni": [
"patroni_patroni_cluster_unlocked",
"patroni_patroni_info",
"patroni_postgresql_info",
"patroni_replication_info",
"patroni_xlog_location",
"patroni_xlog_paused",
"patroni_xlog_received_location",
"patroni_xlog_replayed_location",
"python_info"
],
"postgresql": [
"pg_database_size",
"pg_locks_count",
"pg_stat_activity_count",
"pg_stat_activity_max_tx_duration",
"pg_stat_archiver_failed_count",
"pg_stat_bgwriter_buffers_alloc",
"pg_stat_bgwriter_buffers_alloc_total",
"pg_stat_bgwriter_buffers_backend",
"pg_stat_bgwriter_buffers_backend_fsync",
"pg_stat_bgwriter_buffers_backend_fsync_total",
"pg_stat_bgwriter_buffers_backend_total",
"pg_stat_bgwriter_buffers_checkpoint",
"pg_stat_bgwriter_buffers_checkpoint_total",
"pg_stat_bgwriter_buffers_clean",
"pg_stat_bgwriter_buffers_clean_total",
"pg_stat_bgwriter_checkpoint_sync_time",
"pg_stat_bgwriter_checkpoint_sync_time_total",
"pg_stat_bgwriter_checkpoint_write_time",
"pg_stat_bgwriter_checkpoint_write_time_total",
"pg_stat_database_blks_hit",
"pg_stat_database_blks_read",
"pg_stat_database_checksum_failures",
"pg_stat_database_conflicts",
"pg_stat_database_conflicts_confl_bufferpin",
"pg_stat_database_conflicts_confl_deadlock",
"pg_stat_database_conflicts_confl_lock",
"pg_stat_database_conflicts_confl_snapshot",
"pg_stat_database_conflicts_confl_tablespace",
"pg_stat_database_deadlocks",
"pg_stat_database_temp_bytes",
"pg_stat_database_tup_deleted",
"pg_stat_database_tup_fetched",
"pg_stat_database_tup_inserted",
"pg_stat_database_tup_returned",
"pg_stat_database_tup_updated",
"pg_stat_database_xact_commit",
"pg_stat_database_xact_rollback",
"postgres_exporter_build_info"
],
"prometheus-alertmanager": [
"alertmanager_active_alerts",
"alertmanager_active_silences",
"alertmanager_alerts",
"alertmanager_alerts_invalid_total",
"alertmanager_alerts_received_total",
"alertmanager_build_info",
"alertmanager_cluster_failed_peers",
"alertmanager_cluster_health_score",
"alertmanager_cluster_members",
"alertmanager_cluster_messages_pruned_total",
"alertmanager_cluster_messages_queued",
"alertmanager_cluster_messages_received_size_total",
"alertmanager_cluster_messages_received_total",
"alertmanager_cluster_messages_sent_size_total",
"alertmanager_cluster_messages_sent_total",
"alertmanager_cluster_peer_info",
"alertmanager_cluster_peers_joined_total",
"alertmanager_cluster_peers_left_total",
"alertmanager_cluster_reconnections_failed_total",
"alertmanager_cluster_reconnections_total",
"alertmanager_config_last_reload_success_timestamp_seconds",
"alertmanager_config_last_reload_successful",
"alertmanager_nflog_gc_duration_seconds_count",
"alertmanager_nflog_gc_duration_seconds_sum",
"alertmanager_nflog_gossip_messages_propagated_total",
"alertmanager_nflog_queries_total",
"alertmanager_nflog_query_duration_seconds_bucket",
"alertmanager_nflog_query_errors_total",
"alertmanager_nflog_snapshot_duration_seconds_count",
"alertmanager_nflog_snapshot_duration_seconds_sum",
"alertmanager_nflog_snapshot_size_bytes",
"alertmanager_notification_latency_seconds_bucket",
"alertmanager_notifications_failed_total",
"alertmanager_notifications_total",
"alertmanager_oversize_gossip_message_duration_seconds_bucket",
"alertmanager_oversized_gossip_message_dropped_total",
"alertmanager_oversized_gossip_message_failure_total",
"alertmanager_oversized_gossip_message_sent_total",
"alertmanager_partial_state_merges_failed_total",
"alertmanager_partial_state_merges_total",
"alertmanager_silences",
"alertmanager_silences_gc_duration_seconds_count",
"alertmanager_silences_gc_duration_seconds_sum",
"alertmanager_silences_gossip_messages_propagated_total",
"alertmanager_silences_queries_total",
"alertmanager_silences_query_duration_seconds_bucket",
"alertmanager_silences_query_errors_total",
"alertmanager_silences_snapshot_duration_seconds_count",
"alertmanager_silences_snapshot_duration_seconds_sum",
"alertmanager_silences_snapshot_size_bytes",
"alertmanager_state_replication_failed_total",
"alertmanager_state_replication_total"
],
"prometheus-coredns": [
"coredns_build_info"
],
"prometheus-elasticsearch-exporter": [
"elasticsearch_breakers_estimated_size_bytes",
"elasticsearch_breakers_limit_size_bytes",
"elasticsearch_breakers_tripped",
"elasticsearch_cluster_health_active_primary_shards",
"elasticsearch_cluster_health_active_shards",
"elasticsearch_cluster_health_delayed_unassigned_shards",
"elasticsearch_cluster_health_initializing_shards",
"elasticsearch_cluster_health_number_of_data_nodes",
"elasticsearch_cluster_health_number_of_nodes",
"elasticsearch_cluster_health_number_of_pending_tasks",
"elasticsearch_cluster_health_relocating_shards",
"elasticsearch_cluster_health_status",
"elasticsearch_cluster_health_unassigned_shards",
"elasticsearch_exporter_build_info",
"elasticsearch_indices_docs",
"elasticsearch_indices_docs_deleted",
"elasticsearch_indices_docs_primary",
"elasticsearch_indices_fielddata_evictions",
"elasticsearch_indices_fielddata_memory_size_bytes",
"elasticsearch_indices_filter_cache_evictions",
"elasticsearch_indices_flush_time_seconds",
"elasticsearch_indices_flush_total",
"elasticsearch_indices_get_exists_time_seconds",
"elasticsearch_indices_get_exists_total",
"elasticsearch_indices_get_missing_time_seconds",
"elasticsearch_indices_get_missing_total",
"elasticsearch_indices_get_time_seconds",
"elasticsearch_indices_get_total",
"elasticsearch_indices_indexing_delete_time_seconds_total",
"elasticsearch_indices_indexing_delete_total",
"elasticsearch_indices_indexing_index_time_seconds_total",
"elasticsearch_indices_indexing_index_total",
"elasticsearch_indices_merges_docs_total",
"elasticsearch_indices_merges_total",
"elasticsearch_indices_merges_total_size_bytes_total",
"elasticsearch_indices_merges_total_time_seconds_total",
"elasticsearch_indices_query_cache_evictions",
"elasticsearch_indices_query_cache_memory_size_bytes",
"elasticsearch_indices_refresh_time_seconds_total",
"elasticsearch_indices_refresh_total",
"elasticsearch_indices_search_fetch_time_seconds",
"elasticsearch_indices_search_fetch_total",
"elasticsearch_indices_search_query_time_seconds",
"elasticsearch_indices_search_query_total",
"elasticsearch_indices_segment_count_primary",
"elasticsearch_indices_segment_count_total",
"elasticsearch_indices_segment_doc_values_memory_bytes_primary",
"elasticsearch_indices_segment_doc_values_memory_bytes_total",
"elasticsearch_indices_segment_fields_memory_bytes_primary",
"elasticsearch_indices_segment_fields_memory_bytes_total",
"elasticsearch_indices_segment_fixed_bit_set_memory_bytes_primary",
"elasticsearch_indices_segment_fixed_bit_set_memory_bytes_total",
"elasticsearch_indices_segment_index_writer_memory_bytes_primary",
"elasticsearch_indices_segment_index_writer_memory_bytes_total",
"elasticsearch_indices_segment_memory_bytes_primary",
"elasticsearch_indices_segment_memory_bytes_total",
"elasticsearch_indices_segment_norms_memory_bytes_primary",
"elasticsearch_indices_segment_norms_memory_bytes_total",
"elasticsearch_indices_segment_points_memory_bytes_primary",
"elasticsearch_indices_segment_points_memory_bytes_total",
"elasticsearch_indices_segment_terms_memory_primary",
"elasticsearch_indices_segment_terms_memory_total",
"elasticsearch_indices_segment_version_map_memory_bytes_primary",
"elasticsearch_indices_segment_version_map_memory_bytes_total",
"elasticsearch_indices_segments_count",
"elasticsearch_indices_segments_memory_bytes",
"elasticsearch_indices_store_size_bytes",
"elasticsearch_indices_store_size_bytes_primary",
"elasticsearch_indices_store_size_bytes_total",
"elasticsearch_indices_store_throttle_time_seconds_total",
"elasticsearch_indices_translog_operations",
"elasticsearch_indices_translog_size_in_bytes",
"elasticsearch_jvm_gc_collection_seconds_count",
"elasticsearch_jvm_gc_collection_seconds_sum",
"elasticsearch_jvm_memory_committed_bytes",
"elasticsearch_jvm_memory_max_bytes",
"elasticsearch_jvm_memory_pool_peak_used_bytes",
"elasticsearch_jvm_memory_used_bytes",
"elasticsearch_os_load1",
"elasticsearch_os_load15",
"elasticsearch_os_load5",
"elasticsearch_process_cpu_percent",
"elasticsearch_process_cpu_seconds_total",
"elasticsearch_process_cpu_time_seconds_sum",
"elasticsearch_process_open_files_count",
"elasticsearch_thread_pool_active_count",
"elasticsearch_thread_pool_completed_count",
"elasticsearch_thread_pool_queue_count",
"elasticsearch_thread_pool_rejected_count",
"elasticsearch_transport_rx_size_bytes_total",
"elasticsearch_transport_tx_size_bytes_total"
],
"prometheus-grafana": [
"grafana_api_dashboard_get_milliseconds",
"grafana_api_dashboard_get_milliseconds_count",
"grafana_api_dashboard_get_milliseconds_sum",
"grafana_api_dashboard_save_milliseconds",
"grafana_api_dashboard_save_milliseconds_count",
"grafana_api_dashboard_save_milliseconds_sum",
"grafana_api_dashboard_search_milliseconds",
"grafana_api_dashboard_search_milliseconds_count",
"grafana_api_dashboard_search_milliseconds_sum",
"grafana_api_dataproxy_request_all_milliseconds",
"grafana_api_dataproxy_request_all_milliseconds_count",
"grafana_api_dataproxy_request_all_milliseconds_sum",
"grafana_api_login_oauth_total",
"grafana_api_login_post_total",
"grafana_api_response_status_total",
"grafana_build_info",
"grafana_feature_toggles_info",
"grafana_http_request_duration_seconds_count",
"grafana_page_response_status_total",
"grafana_plugin_build_info",
"grafana_proxy_response_status_total",
"grafana_stat_total_orgs",
"grafana_stat_total_users",
"grafana_stat_totals_dashboard"
],
"prometheus-kube-state-metrics": [
"kube_cronjob_next_schedule_time",
"kube_daemonset_created",
"kube_daemonset_status_current_number_scheduled",
"kube_daemonset_status_desired_number_scheduled",
"kube_daemonset_status_number_available",
"kube_daemonset_status_number_misscheduled",
"kube_daemonset_status_number_ready",
"kube_daemonset_status_number_unavailable",
"kube_daemonset_status_observed_generation",
"kube_daemonset_status_updated_number_scheduled",
"kube_deployment_created",
"kube_deployment_metadata_generation",
"kube_deployment_spec_paused",
"kube_deployment_spec_replicas",
"kube_deployment_spec_strategy_rollingupdate_max_surge",
"kube_deployment_spec_strategy_rollingupdate_max_unavailable",
"kube_deployment_status_condition",
"kube_deployment_status_observed_generation",
"kube_deployment_status_replicas",
"kube_deployment_status_replicas_available",
"kube_deployment_status_replicas_ready",
"kube_deployment_status_replicas_unavailable",
"kube_deployment_status_replicas_updated",
"kube_endpoint_address_available",
"kube_job_status_active",
"kube_job_status_failed",
"kube_job_status_succeeded",
"kube_namespace_created",
"kube_namespace_status_phase",
"kube_node_info",
"kube_node_labels",
"kube_node_role",
"kube_node_spec_taint",
"kube_node_spec_unschedulable",
"kube_node_status_allocatable",
"kube_node_status_capacity",
"kube_node_status_condition",
"kube_persistentvolume_capacity_bytes",
"kube_persistentvolume_status_phase",
"kube_persistentvolumeclaim_resource_requests_storage_bytes",
"kube_pod_container_info",
"kube_pod_container_resource_limits",
"kube_pod_container_resource_requests",
"kube_pod_container_status_restarts_total",
"kube_pod_container_status_running",
"kube_pod_container_status_terminated",
"kube_pod_container_status_waiting",
"kube_pod_info",
"kube_pod_init_container_status_running",
"kube_pod_status_phase",
"kube_service_status_load_balancer_ingress",
"kube_statefulset_created",
"kube_statefulset_metadata_generation",
"kube_statefulset_replicas",
"kube_statefulset_status_current_revision",
"kube_statefulset_status_observed_generation",
"kube_statefulset_status_replicas",
"kube_statefulset_status_replicas_available",
"kube_statefulset_status_replicas_current",
"kube_statefulset_status_replicas_ready",
"kube_statefulset_status_replicas_updated",
"kube_statefulset_status_update_revision"
],
"prometheus-libvirt-exporter": [
"libvirt_domain_block_stats_allocation",
"libvirt_domain_block_stats_capacity",
"libvirt_domain_block_stats_physical",
"libvirt_domain_block_stats_read_bytes_total",
"libvirt_domain_block_stats_read_requests_total",
"libvirt_domain_block_stats_write_bytes_total",
"libvirt_domain_block_stats_write_requests_total",
"libvirt_domain_info_cpu_time_seconds_total",
"libvirt_domain_info_maximum_memory_bytes",
"libvirt_domain_info_memory_usage_bytes",
"libvirt_domain_info_state",
"libvirt_domain_info_virtual_cpus",
"libvirt_domain_interface_stats_receive_bytes_total",
"libvirt_domain_interface_stats_receive_drops_total",
"libvirt_domain_interface_stats_receive_errors_total",
"libvirt_domain_interface_stats_receive_packets_total",
"libvirt_domain_interface_stats_transmit_bytes_total",
"libvirt_domain_interface_stats_transmit_drops_total",
"libvirt_domain_interface_stats_transmit_errors_total",
"libvirt_domain_interface_stats_transmit_packets_total",
"libvirt_domain_memory_actual_balloon_bytes",
"libvirt_domain_memory_available_bytes",
"libvirt_domain_memory_rss_bytes",
"libvirt_domain_memory_unused_bytes",
"libvirt_domain_memory_usable_bytes",
"libvirt_up"
],
"prometheus-memcached-exporter": [
"memcached_commands_total",
"memcached_current_bytes",
"memcached_current_connections",
"memcached_current_items",
"memcached_exporter_build_info",
"memcached_items_evicted_total",
"memcached_items_reclaimed_total",
"memcached_limit_bytes",
"memcached_read_bytes_total",
"memcached_up",
"memcached_version",
"memcached_written_bytes_total"
],
"prometheus-msteams": [],
"prometheus-mysql-exporter": [
"mysql_global_status_aborted_clients",
"mysql_global_status_aborted_connects",
"mysql_global_status_buffer_pool_pages",
"mysql_global_status_bytes_received",
"mysql_global_status_bytes_sent",
"mysql_global_status_commands_total",
"mysql_global_status_created_tmp_disk_tables",
"mysql_global_status_created_tmp_files",
"mysql_global_status_created_tmp_tables",
"mysql_global_status_handlers_total",
"mysql_global_status_innodb_log_waits",
"mysql_global_status_innodb_num_open_files",
"mysql_global_status_innodb_page_size",
"mysql_global_status_max_used_connections",
"mysql_global_status_open_files",
"mysql_global_status_open_table_definitions",
"mysql_global_status_open_tables",
"mysql_global_status_opened_files",
"mysql_global_status_opened_table_definitions",
"mysql_global_status_opened_tables",
"mysql_global_status_qcache_free_memory",
"mysql_global_status_qcache_hits",
"mysql_global_status_qcache_inserts",
"mysql_global_status_qcache_lowmem_prunes",
"mysql_global_status_qcache_not_cached",
"mysql_global_status_qcache_queries_in_cache",
"mysql_global_status_queries",
"mysql_global_status_questions",
"mysql_global_status_select_full_join",
"mysql_global_status_select_full_range_join",
"mysql_global_status_select_range",
"mysql_global_status_select_range_check",
"mysql_global_status_select_scan",
"mysql_global_status_slow_queries",
"mysql_global_status_sort_merge_passes",
"mysql_global_status_sort_range",
"mysql_global_status_sort_rows",
"mysql_global_status_sort_scan",
"mysql_global_status_table_locks_immediate",
"mysql_global_status_table_locks_waited",
"mysql_global_status_threads_cached",
"mysql_global_status_threads_connected",
"mysql_global_status_threads_created",
"mysql_global_status_threads_running",
"mysql_global_status_wsrep_flow_control_paused",
"mysql_global_status_wsrep_local_recv_queue",
"mysql_global_status_wsrep_local_state",
"mysql_global_status_wsrep_ready",
"mysql_global_variables_innodb_buffer_pool_size",
"mysql_global_variables_innodb_log_buffer_size",
"mysql_global_variables_key_buffer_size",
"mysql_global_variables_max_connections",
"mysql_global_variables_open_files_limit",
"mysql_global_variables_query_cache_size",
"mysql_global_variables_table_definition_cache",
"mysql_global_variables_table_open_cache",
"mysql_global_variables_thread_cache_size",
"mysql_global_variables_wsrep_desync",
"mysql_up",
"mysql_version_info",
"mysqld_exporter_build_info"
],
"prometheus-node-exporter": [
"node_arp_entries",
"node_bonding_active",
"node_bonding_slaves",
"node_boot_time_seconds",
"node_context_switches_total",
"node_cpu_seconds_total",
"node_disk_io_now",
"node_disk_io_time_seconds_total",
"node_disk_io_time_weighted_seconds_total",
"node_disk_read_bytes_total",
"node_disk_read_time_seconds_total",
"node_disk_reads_completed_total",
"node_disk_reads_merged_total",
"node_disk_write_time_seconds_total",
"node_disk_writes_completed_total",
"node_disk_writes_merged_total",
"node_disk_written_bytes_total",
"node_entropy_available_bits",
"node_exporter_build_info",
"node_filefd_allocated",
"node_filefd_maximum",
"node_filesystem_avail_bytes",
"node_filesystem_files",
"node_filesystem_files_free",
"node_filesystem_free_bytes",
"node_filesystem_readonly",
"node_filesystem_size_bytes",
"node_forks_total",
"node_hwmon_temp_celsius",
"node_hwmon_temp_crit_alarm_celsius",
"node_hwmon_temp_crit_celsius",
"node_hwmon_temp_crit_hyst_celsius",
"node_hwmon_temp_max_celsius",
"node_intr_total",
"node_load1",
"node_load15",
"node_load5",
"node_memory_Active_anon_bytes",
"node_memory_Active_bytes",
"node_memory_Active_file_bytes",
"node_memory_AnonHugePages_bytes",
"node_memory_AnonPages_bytes",
"node_memory_Bounce_bytes",
"node_memory_Buffers_bytes",
"node_memory_Cached_bytes",
"node_memory_CommitLimit_bytes",
"node_memory_Committed_AS_bytes",
"node_memory_DirectMap1G",
"node_memory_DirectMap2M_bytes",
"node_memory_DirectMap4k_bytes",
"node_memory_Dirty_bytes",
"node_memory_HardwareCorrupted_bytes",
"node_memory_HugePages_Free",
"node_memory_HugePages_Rsvd",
"node_memory_HugePages_Surp",
"node_memory_HugePages_Total",
"node_memory_Hugepagesize_bytes",
"node_memory_Inactive_anon_bytes",
"node_memory_Inactive_bytes",
"node_memory_Inactive_file_bytes",
"node_memory_KernelStack_bytes",
"node_memory_Mapped_bytes",
"node_memory_MemAvailable_bytes",
"node_memory_MemFree_bytes",
"node_memory_MemTotal_bytes",
"node_memory_Mlocked_bytes",
"node_memory_NFS_Unstable_bytes",
"node_memory_PageTables_bytes",
"node_memory_SReclaimable_bytes",
"node_memory_SUnreclaim_bytes",
"node_memory_Shmem_bytes",
"node_memory_Slab_bytes",
"node_memory_SwapCached_bytes",
"node_memory_SwapFree_bytes",
"node_memory_SwapTotal_bytes",
"node_memory_Unevictable_bytes",
"node_memory_VmallocChunk_bytes",
"node_memory_VmallocTotal_bytes",
"node_memory_VmallocUsed_bytes",
"node_memory_WritebackTmp_bytes",
"node_memory_Writeback_bytes",
"node_netstat_TcpExt_TCPSynRetrans",
"node_netstat_Tcp_ActiveOpens",
"node_netstat_Tcp_AttemptFails",
"node_netstat_Tcp_CurrEstab",
"node_netstat_Tcp_EstabResets",
"node_netstat_Tcp_InCsumErrors",
"node_netstat_Tcp_InErrs",
"node_netstat_Tcp_InSegs",
"node_netstat_Tcp_MaxConn",
"node_netstat_Tcp_OutRsts",
"node_netstat_Tcp_OutSegs",
"node_netstat_Tcp_PassiveOpens",
"node_netstat_Tcp_RetransSegs",
"node_netstat_Udp_InCsumErrors",
"node_netstat_Udp_InDatagrams",
"node_netstat_Udp_InErrors",
"node_netstat_Udp_NoPorts",
"node_netstat_Udp_OutDatagrams",
"node_netstat_Udp_RcvbufErrors",
"node_netstat_Udp_SndbufErrors",
"node_network_mtu_bytes",
"node_network_receive_bytes_total",
"node_network_receive_compressed_total",
"node_network_receive_drop_total",
"node_network_receive_errs_total",
"node_network_receive_fifo_total",
"node_network_receive_frame_total",
"node_network_receive_multicast_total",
"node_network_receive_packets_total",
"node_network_transmit_bytes_total",
"node_network_transmit_carrier_total",
"node_network_transmit_colls_total",
"node_network_transmit_compressed_total",
"node_network_transmit_drop_total",
"node_network_transmit_errs_total",
"node_network_transmit_fifo_total",
"node_network_transmit_packets_total",
"node_network_up",
"node_nf_conntrack_entries",
"node_nf_conntrack_entries_limit",
"node_procs_blocked",
"node_procs_running",
"node_scrape_collector_duration_seconds",
"node_scrape_collector_success",
"node_sockstat_FRAG_inuse",
"node_sockstat_FRAG_memory",
"node_sockstat_RAW_inuse",
"node_sockstat_TCP_alloc",
"node_sockstat_TCP_inuse",
"node_sockstat_TCP_mem",
"node_sockstat_TCP_mem_bytes",
"node_sockstat_TCP_orphan",
"node_sockstat_TCP_tw",
"node_sockstat_UDPLITE_inuse",
"node_sockstat_UDP_inuse",
"node_sockstat_UDP_mem",
"node_sockstat_UDP_mem_bytes",
"node_sockstat_sockets_used",
"node_textfile_scrape_error",
"node_time_seconds",
"node_timex_estimated_error_seconds",
"node_timex_frequency_adjustment_ratio",
"node_timex_maxerror_seconds",
"node_timex_offset_seconds",
"node_timex_sync_status",
"node_uname_info"
],
"prometheus-rabbitmq-exporter": [
"rabbitmq_channels",
"rabbitmq_connections",
"rabbitmq_consumers",
"rabbitmq_exchanges",
"rabbitmq_exporter_build_info",
"rabbitmq_fd_available",
"rabbitmq_fd_used",
"rabbitmq_node_disk_free",
"rabbitmq_node_disk_free_alarm",
"rabbitmq_node_mem_alarm",
"rabbitmq_node_mem_used",
"rabbitmq_partitions",
"rabbitmq_queue_messages_global",
"rabbitmq_queue_messages_ready_global",
"rabbitmq_queue_messages_unacknowledged_global",
"rabbitmq_queues",
"rabbitmq_sockets_available",
"rabbitmq_sockets_used",
"rabbitmq_up",
"rabbitmq_uptime",
"rabbitmq_version_info"
],
"prometheus-relay": [],
"prometheus-server": [
"prometheus_build_info",
"prometheus_config_last_reload_success_timestamp_seconds",
"prometheus_config_last_reload_successful",
"prometheus_engine_query_duration_seconds",
"prometheus_engine_query_duration_seconds_sum",
"prometheus_http_request_duration_seconds_count",
"prometheus_notifications_alertmanagers_discovered",
"prometheus_notifications_errors_total",
"prometheus_notifications_queue_capacity",
"prometheus_notifications_queue_length",
"prometheus_notifications_sent_total",
"prometheus_rule_evaluation_failures_total",
"prometheus_target_interval_length_seconds",
"prometheus_target_interval_length_seconds_count",
"prometheus_target_scrapes_sample_duplicate_timestamp_total",
"prometheus_tsdb_blocks_loaded",
"prometheus_tsdb_compaction_chunk_range_seconds_count",
"prometheus_tsdb_compaction_chunk_range_seconds_sum",
"prometheus_tsdb_compaction_chunk_samples_count",
"prometheus_tsdb_compaction_chunk_samples_sum",
"prometheus_tsdb_compaction_chunk_size_bytes_sum",
"prometheus_tsdb_compaction_duration_seconds_bucket",
"prometheus_tsdb_compaction_duration_seconds_count",
"prometheus_tsdb_compaction_duration_seconds_sum",
"prometheus_tsdb_compactions_failed_total",
"prometheus_tsdb_compactions_total",
"prometheus_tsdb_compactions_triggered_total",
"prometheus_tsdb_head_active_appenders",
"prometheus_tsdb_head_chunks",
"prometheus_tsdb_head_chunks_created_total",
"prometheus_tsdb_head_chunks_removed_total",
"prometheus_tsdb_head_gc_duration_seconds_sum",
"prometheus_tsdb_head_samples_appended_total",
"prometheus_tsdb_head_series",
"prometheus_tsdb_head_series_created_total",
"prometheus_tsdb_head_series_removed_total",
"prometheus_tsdb_reloads_failures_total",
"prometheus_tsdb_reloads_total",
"prometheus_tsdb_storage_blocks_bytes",
"prometheus_tsdb_wal_corruptions_total",
"prometheus_tsdb_wal_fsync_duration_seconds_count",
"prometheus_tsdb_wal_fsync_duration_seconds_sum",
"prometheus_tsdb_wal_truncations_failed_total",
"prometheus_tsdb_wal_truncations_total"
],
"rabbitmq-operator-metrics": [
"rest_client_requests_total"
],
"refapp": [],
"sf-notifier": [
"sf_auth_ok",
"sf_error_count_created",
"sf_error_count_total",
"sf_request_count_created",
"sf_request_count_total"
],
"telegraf-docker-swarm": [
"docker_n_containers",
"docker_n_containers_paused",
"docker_n_containers_running",
"docker_n_containers_stopped",
"docker_swarm_node_ready",
"docker_swarm_tasks_desired",
"docker_swarm_tasks_running",
"internal_agent_gather_errors"
],
"telemeter-client": [
"federate_errors",
"federate_filtered_samples",
"federate_samples"
],
"telemeter-server": [
"telemeter_cleanups_total",
"telemeter_partitions",
"telemeter_samples_total"
],
"tf-cassandra-jmx-exporter": [
"cassandra_cache_entries",
"cassandra_cache_estimated_size_bytes",
"cassandra_cache_hits_total",
"cassandra_cache_requests_total",
"cassandra_client_authentication_failures_total",
"cassandra_client_native_connections",
"cassandra_client_request_failures_total",
"cassandra_client_request_latency_seconds_count",
"cassandra_client_request_latency_seconds_sum",
"cassandra_client_request_timeouts_total",
"cassandra_client_request_unavailable_exceptions_total",
"cassandra_client_request_view_write_latency_seconds",
"cassandra_commit_log_pending_tasks",
"cassandra_compaction_bytes_compacted_total",
"cassandra_compaction_completed_total",
"cassandra_dropped_messages_total",
"cassandra_endpoint_connection_timeouts_total",
"cassandra_storage_exceptions_total",
"cassandra_storage_hints_total",
"cassandra_storage_load_bytes",
"cassandra_table_estimated_pending_compactions",
"cassandra_table_repaired_ratio",
"cassandra_table_sstables_per_read_count",
"cassandra_table_tombstones_scanned",
"cassandra_thread_pool_active_tasks",
"cassandra_thread_pool_blocked_tasks"
],
"tf-control": [
"tf_controller_sessions",
"tf_controller_up"
],
"tf-kafka-jmx": [
"jmx_exporter_build_info",
"kafka_controller_controllerstats_count",
"kafka_controller_controllerstats_oneminuterate",
"kafka_controller_kafkacontroller_value",
"kafka_log_log_value",
"kafka_network_processor_value",
"kafka_network_requestmetrics_99thpercentile",
"kafka_network_requestmetrics_mean",
"kafka_network_requestmetrics_oneminuterate",
"kafka_network_socketserver_value",
"kafka_server_brokertopicmetrics_count",
"kafka_server_brokertopicmetrics_oneminuterate",
"kafka_server_delayedoperationpurgatory_value",
"kafka_server_kafkarequesthandlerpool_oneminuterate",
"kafka_server_replicamanager_oneminuterate",
"kafka_server_replicamanager_value"
],
"tf-redis": [
"redis_commands_duration_seconds_total",
"redis_commands_processed_total",
"redis_commands_total",
"redis_connected_clients",
"redis_connected_slaves",
"redis_db_keys",
"redis_db_keys_expiring",
"redis_evicted_keys_total",
"redis_expired_keys_total",
"redis_exporter_build_info",
"redis_instance_info",
"redis_keyspace_hits_total",
"redis_keyspace_misses_total",
"redis_memory_max_bytes",
"redis_memory_used_bytes",
"redis_net_input_bytes_total",
"redis_net_output_bytes_total",
"redis_rejected_connections_total",
"redis_slave_info",
"redis_up",
"redis_uptime_in_seconds"
],
"tf-vrouter": [
"tf_vrouter_ds_discard",
"tf_vrouter_ds_flow_action_drop",
"tf_vrouter_ds_flow_queue_limit_exceeded",
"tf_vrouter_ds_flow_table_full",
"tf_vrouter_ds_frag_err",
"tf_vrouter_ds_invalid_if",
"tf_vrouter_ds_invalid_label",
"tf_vrouter_ds_invalid_nh",
"tf_vrouter_flow_active",
"tf_vrouter_flow_aged",
"tf_vrouter_flow_created",
"tf_vrouter_lls_session_info",
"tf_vrouter_up",
"tf_vrouter_xmpp_connection_state"
],
"tf-zookeeper": [
"approximate_data_size",
"bytes_received_count",
"commit_count",
"connection_drop_count",
"connection_rejected",
"connection_request_count",
"dead_watchers_cleaner_latency_sum",
"dead_watchers_cleared",
"dead_watchers_queued",
"digest_mismatches_count",
"election_time_sum",
"ephemerals_count",
"follower_sync_time_count",
"follower_sync_time_sum",
"fsynctime_sum",
"global_sessions",
"jvm_classes_loaded",
"jvm_gc_collection_seconds_sum",
"jvm_info",
"jvm_memory_pool_bytes_used",
"jvm_threads_current",
"jvm_threads_deadlocked",
"jvm_threads_state",
"leader_uptime",
"learner_commit_received_count",
"learner_proposal_received_count",
"learners",
"local_sessions",
"max_file_descriptor_count",
"node_changed_watch_count_sum",
"node_children_watch_count_sum",
"node_created_watch_count_sum",
"node_deleted_watch_count_sum",
"num_alive_connections",
"om_commit_process_time_ms_sum",
"om_proposal_process_time_ms_sum",
"open_file_descriptor_count",
"outstanding_requests",
"packets_received",
"packets_sent",
"pending_syncs",
"proposal_count",
"quorum_size",
"response_packet_cache_hits",
"response_packet_cache_misses",
"response_packet_get_children_cache_hits",
"response_packet_get_children_cache_misses",
"revalidate_count",
"snapshottime_sum",
"stale_sessions_expired",
"synced_followers",
"synced_non_voting_followers",
"synced_observers",
"unrecoverable_error_count",
"uptime",
"watch_count",
"znode_count"
],
"ucp-kv": []
}
You can add necessary metrics that are dropped to this white list as described below. It is also possible to disable the filtering feature. However, Mirantis does not recommend disabling the feature to prevent direct impact on the Prometheus index size, which affects query speed. For clusters with extended retention period, performance degradation will be the most noticeable.
Add dropped metrics to the white list¶
You can expand the default white list of Prometheus
metrics using the prometheusServer.metricsFiltering.extraMetricsInclude
parameter to enable metrics that are dropped by default. For the
parameter description, see Prometheus metrics filtering. For configuration
steps, see StackLight configuration procedure.
Example configuration:
prometheusServer:
metricsFiltering:
enabled: true
extraMetricsInclude:
cadvisor:
- container_memory_failcnt
- container_network_transmit_errors_total
calico:
- felix_route_table_per_iface_sync_seconds_sum
- felix_bpf_dataplane_endpoints
_group-go-collector-metrics:
- go_gc_heap_goal_bytes
- go_gc_heap_objects_objects
Disable metrics filtering¶
Mirantis does not recommend disabling metrics filtering to prevent direct impact on the Prometheus index size, which affects query speed. In clusters with an extended retention period, performance degradation will be the most noticeable. Therefore, the best option is to keep the feature enabled and add the required dropped metrics to the white list as described in Add dropped metrics to the white list.
If disabling of metrics filtering is absolutely necessary, set the
prometheusServer.metricsFiltering.enabled
parameter to false
:
prometheusServer:
metricsFiltering:
enabled: false
For configuration steps, see StackLight configuration procedure.