Manage metrics filtering

Available since 2.24.0 and 2.24.2 for MOSK 23.2

By default, StackLight drops unused metrics to increase Prometheus performance providing better resource utilization and faster query response. The following list contains white-listed scrape jobs grouped by the job name. Prometheus collects metrics from this list by default.

White list of Prometheus scrape jobs
{
    "_group-blackbox-metrics": [
        "probe_dns_lookup_time_seconds",
        "probe_duration_seconds",
        "probe_http_content_length",
        "probe_http_duration_seconds",
        "probe_http_ssl",
        "probe_http_uncompressed_body_length",
        "probe_ssl_earliest_cert_expiry",
        "probe_success"
    ],
    "_group-controller-runtime-metrics": [
        "workqueue_adds_total",
        "workqueue_depth",
        "workqueue_queue_duration_seconds_count",
        "workqueue_queue_duration_seconds_sum",
        "workqueue_retries_total",
        "workqueue_work_duration_seconds_count",
        "workqueue_work_duration_seconds_sum"
    ],
    "_group-etcd-metrics": [
        "etcd_cluster_version",
        "etcd_debugging_snap_save_total_duration_seconds_sum",
        "etcd_disk_backend_commit_duration_seconds_bucket",
        "etcd_disk_backend_commit_duration_seconds_count",
        "etcd_disk_backend_commit_duration_seconds_sum",
        "etcd_disk_backend_snapshot_duration_seconds_count",
        "etcd_disk_backend_snapshot_duration_seconds_sum",
        "etcd_disk_wal_fsync_duration_seconds_bucket",
        "etcd_disk_wal_fsync_duration_seconds_count",
        "etcd_disk_wal_fsync_duration_seconds_sum",
        "etcd_mvcc_db_total_size_in_bytes",
        "etcd_network_client_grpc_received_bytes_total",
        "etcd_network_client_grpc_sent_bytes_total",
        "etcd_network_peer_received_bytes_total",
        "etcd_network_peer_sent_bytes_total",
        "etcd_server_go_version",
        "etcd_server_has_leader",
        "etcd_server_leader_changes_seen_total",
        "etcd_server_proposals_applied_total",
        "etcd_server_proposals_committed_total",
        "etcd_server_proposals_failed_total",
        "etcd_server_proposals_pending",
        "etcd_server_quota_backend_bytes",
        "etcd_server_version",
        "grpc_server_handled_total",
        "grpc_server_started_total"
    ],
    "_group-go-collector-metrics": [
        "go_gc_duration_seconds",
        "go_gc_duration_seconds_count",
        "go_gc_duration_seconds_sum",
        "go_goroutines",
        "go_info",
        "go_memstats_alloc_bytes",
        "go_memstats_alloc_bytes_total",
        "go_memstats_buck_hash_sys_bytes",
        "go_memstats_frees_total",
        "go_memstats_gc_sys_bytes",
        "go_memstats_heap_alloc_bytes",
        "go_memstats_heap_idle_bytes",
        "go_memstats_heap_inuse_bytes",
        "go_memstats_heap_released_bytes",
        "go_memstats_heap_sys_bytes",
        "go_memstats_lookups_total",
        "go_memstats_mallocs_total",
        "go_memstats_mcache_inuse_bytes",
        "go_memstats_mcache_sys_bytes",
        "go_memstats_mspan_inuse_bytes",
        "go_memstats_mspan_sys_bytes",
        "go_memstats_next_gc_bytes",
        "go_memstats_other_sys_bytes",
        "go_memstats_stack_inuse_bytes",
        "go_memstats_stack_sys_bytes",
        "go_memstats_sys_bytes",
        "go_threads"
    ],
    "_group-process-collector-metrics": [
        "process_cpu_seconds_total",
        "process_max_fds",
        "process_open_fds",
        "process_resident_memory_bytes",
        "process_start_time_seconds",
        "process_virtual_memory_bytes"
    ],
    "_group-rest-client-metrics": [
        "rest_client_request_latency_seconds_count",
        "rest_client_request_latency_seconds_sum"
    ],
    "_group-service-handler-metrics": [
        "service_handler_count",
        "service_handler_sum"
    ],
    "_group-service-http-metrics": [
        "service_http_count",
        "service_http_sum"
    ],
    "_group-service-reconciler-metrics": [
        "service_reconciler_count",
        "service_reconciler_sum"
    ],
    "alertmanager-webhook-servicenow": [
        "servicenow_auth_ok"
    ],
    "blackbox": [],
    "blackbox-external-endpoint": [],
    "cadvisor": [
        "cadvisor_version_info",
        "container_cpu_cfs_periods_total",
        "container_cpu_cfs_throttled_periods_total",
        "container_cpu_usage_seconds_total",
        "container_fs_reads_bytes_total",
        "container_fs_reads_total",
        "container_fs_writes_bytes_total",
        "container_fs_writes_total",
        "container_memory_usage_bytes",
        "container_memory_working_set_bytes",
        "container_network_receive_bytes_total",
        "container_network_transmit_bytes_total",
        "container_scrape_error",
        "machine_cpu_cores"
    ],
    "calico": [
        "felix_active_local_endpoints",
        "felix_active_local_policies",
        "felix_active_local_selectors",
        "felix_active_local_tags",
        "felix_cluster_num_host_endpoints",
        "felix_cluster_num_hosts",
        "felix_cluster_num_workload_endpoints",
        "felix_host",
        "felix_int_dataplane_addr_msg_batch_size_count",
        "felix_int_dataplane_addr_msg_batch_size_sum",
        "felix_int_dataplane_failures",
        "felix_int_dataplane_iface_msg_batch_size_count",
        "felix_int_dataplane_iface_msg_batch_size_sum",
        "felix_ipset_errors",
        "felix_ipsets_calico",
        "felix_iptables_chains",
        "felix_iptables_restore_errors",
        "felix_iptables_save_errors",
        "felix_resyncs_started"
    ],
    "etcd-server": [],
    "fluentd": [
        "apache_http_request_duration_seconds_bucket",
        "apache_http_request_duration_seconds_count",
        "docker_networkdb_stats_netmsg",
        "docker_networkdb_stats_qlen"
    ],
    "helm-controller": [
        "helmbundle_reconcile_up",
        "helmbundle_release_ready",
        "helmbundle_release_status",
        "helmbundle_release_success",
        "rest_client_requests_total"
    ],
    "ironic": [
        "ironic_driver_metadata",
        "ironic_drivers_total",
        "ironic_nodes",
        "ironic_up"
    ],
    "kaas-exporter": [
        "kaas_cluster_info",
        "kaas_cluster_updating",
        "kaas_clusters",
        "kaas_info",
        "kaas_license_expiry",
        "kaas_machine_ready",
        "kaas_machines_ready",
        "kaas_machines_requested",
        "rest_client_requests_total"
    ],
    "kubelet": [
        "kubelet_running_containers",
        "kubelet_running_pods",
        "kubelet_volume_stats_available_bytes",
        "kubelet_volume_stats_capacity_bytes",
        "kubelet_volume_stats_used_bytes",
        "kubernetes_build_info",
        "rest_client_requests_total"
    ],
    "kubernetes-apiservers": [
        "apiserver_client_certificate_expiration_seconds_bucket",
        "apiserver_client_certificate_expiration_seconds_count",
        "apiserver_request_total",
        "kubernetes_build_info",
        "rest_client_requests_total"
    ],
    "kubernetes-master-api": [],
    "mcc-blackbox": [],
    "mcc-cache": [],
    "mcc-controllers": [
        "rest_client_requests_total"
    ],
    "mcc-providers": [
        "rest_client_requests_total"
    ],
    "mke-manager-api": [],
    "mke-metrics-controller": [
        "ucp_controller_services",
        "ucp_engine_node_health"
    ],
    "mke-metrics-engine": [
        "ucp_engine_container_cpu_percent",
        "ucp_engine_container_cpu_total_time_nanoseconds",
        "ucp_engine_container_health",
        "ucp_engine_container_memory_usage_bytes",
        "ucp_engine_container_network_rx_bytes_total",
        "ucp_engine_container_network_tx_bytes_total",
        "ucp_engine_container_unhealth",
        "ucp_engine_containers",
        "ucp_engine_disk_free_bytes",
        "ucp_engine_disk_total_bytes",
        "ucp_engine_images",
        "ucp_engine_memory_total_bytes",
        "ucp_engine_num_cpu_cores"
    ],
    "msr-api": [],
    "openstack-blackbox-ext": [],
    "openstack-cloudprober": [
        "cloudprober_success",
        "cloudprober_total"
    ],
    "openstack-ingress-controller": [
        "nginx_ingress_controller_build_info",
        "nginx_ingress_controller_config_hash",
        "nginx_ingress_controller_config_last_reload_successful",
        "nginx_ingress_controller_nginx_process_connections",
        "nginx_ingress_controller_nginx_process_cpu_seconds_total",
        "nginx_ingress_controller_nginx_process_resident_memory_bytes",
        "nginx_ingress_controller_request_duration_seconds_bucket",
        "nginx_ingress_controller_request_size_sum",
        "nginx_ingress_controller_requests",
        "nginx_ingress_controller_response_size_sum",
        "nginx_ingress_controller_ssl_expire_time_seconds",
        "nginx_ingress_controller_success"
    ],
    "osdpl-exporter": [
        "osdpl_aodh_alarms",
        "osdpl_certificate_expiry",
        "osdpl_cinder_zone_volumes",
        "osdpl_neutron_availability_zone_info",
        "osdpl_neutron_zone_routers",
        "osdpl_nova_aggregate_hosts",
        "osdpl_nova_availability_zone_info",
        "osdpl_nova_availability_zone_instances",
        "osdpl_nova_availability_zone_hosts",
        "osdpl_version_info"
    ],
    "patroni": [
        "patroni_patroni_cluster_unlocked",
        "patroni_patroni_info",
        "patroni_postgresql_info",
        "patroni_replication_info",
        "patroni_xlog_location",
        "patroni_xlog_paused",
        "patroni_xlog_received_location",
        "patroni_xlog_replayed_location",
        "python_info"
    ],
    "postgresql": [
        "pg_database_size",
        "pg_locks_count",
        "pg_stat_activity_count",
        "pg_stat_activity_max_tx_duration",
        "pg_stat_archiver_failed_count",
        "pg_stat_bgwriter_buffers_alloc",
        "pg_stat_bgwriter_buffers_alloc_total",
        "pg_stat_bgwriter_buffers_backend",
        "pg_stat_bgwriter_buffers_backend_fsync",
        "pg_stat_bgwriter_buffers_backend_fsync_total",
        "pg_stat_bgwriter_buffers_backend_total",
        "pg_stat_bgwriter_buffers_checkpoint",
        "pg_stat_bgwriter_buffers_checkpoint_total",
        "pg_stat_bgwriter_buffers_clean",
        "pg_stat_bgwriter_buffers_clean_total",
        "pg_stat_bgwriter_checkpoint_sync_time",
        "pg_stat_bgwriter_checkpoint_sync_time_total",
        "pg_stat_bgwriter_checkpoint_write_time",
        "pg_stat_bgwriter_checkpoint_write_time_total",
        "pg_stat_database_blks_hit",
        "pg_stat_database_blks_read",
        "pg_stat_database_checksum_failures",
        "pg_stat_database_conflicts",
        "pg_stat_database_conflicts_confl_bufferpin",
        "pg_stat_database_conflicts_confl_deadlock",
        "pg_stat_database_conflicts_confl_lock",
        "pg_stat_database_conflicts_confl_snapshot",
        "pg_stat_database_conflicts_confl_tablespace",
        "pg_stat_database_deadlocks",
        "pg_stat_database_temp_bytes",
        "pg_stat_database_tup_deleted",
        "pg_stat_database_tup_fetched",
        "pg_stat_database_tup_inserted",
        "pg_stat_database_tup_returned",
        "pg_stat_database_tup_updated",
        "pg_stat_database_xact_commit",
        "pg_stat_database_xact_rollback",
        "postgres_exporter_build_info"
    ],
    "prometheus-alertmanager": [
        "alertmanager_active_alerts",
        "alertmanager_active_silences",
        "alertmanager_alerts",
        "alertmanager_alerts_invalid_total",
        "alertmanager_alerts_received_total",
        "alertmanager_build_info",
        "alertmanager_cluster_failed_peers",
        "alertmanager_cluster_health_score",
        "alertmanager_cluster_members",
        "alertmanager_cluster_messages_pruned_total",
        "alertmanager_cluster_messages_queued",
        "alertmanager_cluster_messages_received_size_total",
        "alertmanager_cluster_messages_received_total",
        "alertmanager_cluster_messages_sent_size_total",
        "alertmanager_cluster_messages_sent_total",
        "alertmanager_cluster_peer_info",
        "alertmanager_cluster_peers_joined_total",
        "alertmanager_cluster_peers_left_total",
        "alertmanager_cluster_reconnections_failed_total",
        "alertmanager_cluster_reconnections_total",
        "alertmanager_config_last_reload_success_timestamp_seconds",
        "alertmanager_config_last_reload_successful",
        "alertmanager_nflog_gc_duration_seconds_count",
        "alertmanager_nflog_gc_duration_seconds_sum",
        "alertmanager_nflog_gossip_messages_propagated_total",
        "alertmanager_nflog_queries_total",
        "alertmanager_nflog_query_duration_seconds_bucket",
        "alertmanager_nflog_query_errors_total",
        "alertmanager_nflog_snapshot_duration_seconds_count",
        "alertmanager_nflog_snapshot_duration_seconds_sum",
        "alertmanager_nflog_snapshot_size_bytes",
        "alertmanager_notification_latency_seconds_bucket",
        "alertmanager_notifications_failed_total",
        "alertmanager_notifications_total",
        "alertmanager_oversize_gossip_message_duration_seconds_bucket",
        "alertmanager_oversized_gossip_message_dropped_total",
        "alertmanager_oversized_gossip_message_failure_total",
        "alertmanager_oversized_gossip_message_sent_total",
        "alertmanager_partial_state_merges_failed_total",
        "alertmanager_partial_state_merges_total",
        "alertmanager_silences",
        "alertmanager_silences_gc_duration_seconds_count",
        "alertmanager_silences_gc_duration_seconds_sum",
        "alertmanager_silences_gossip_messages_propagated_total",
        "alertmanager_silences_queries_total",
        "alertmanager_silences_query_duration_seconds_bucket",
        "alertmanager_silences_query_errors_total",
        "alertmanager_silences_snapshot_duration_seconds_count",
        "alertmanager_silences_snapshot_duration_seconds_sum",
        "alertmanager_silences_snapshot_size_bytes",
        "alertmanager_state_replication_failed_total",
        "alertmanager_state_replication_total"
    ],
    "prometheus-elasticsearch-exporter": [
        "elasticsearch_breakers_estimated_size_bytes",
        "elasticsearch_breakers_limit_size_bytes",
        "elasticsearch_breakers_tripped",
        "elasticsearch_cluster_health_active_primary_shards",
        "elasticsearch_cluster_health_active_shards",
        "elasticsearch_cluster_health_delayed_unassigned_shards",
        "elasticsearch_cluster_health_initializing_shards",
        "elasticsearch_cluster_health_number_of_data_nodes",
        "elasticsearch_cluster_health_number_of_nodes",
        "elasticsearch_cluster_health_number_of_pending_tasks",
        "elasticsearch_cluster_health_relocating_shards",
        "elasticsearch_cluster_health_status",
        "elasticsearch_cluster_health_unassigned_shards",
        "elasticsearch_exporter_build_info",
        "elasticsearch_indices_docs",
        "elasticsearch_indices_docs_deleted",
        "elasticsearch_indices_docs_primary",
        "elasticsearch_indices_fielddata_evictions",
        "elasticsearch_indices_fielddata_memory_size_bytes",
        "elasticsearch_indices_filter_cache_evictions",
        "elasticsearch_indices_flush_time_seconds",
        "elasticsearch_indices_flush_total",
        "elasticsearch_indices_get_exists_time_seconds",
        "elasticsearch_indices_get_exists_total",
        "elasticsearch_indices_get_missing_time_seconds",
        "elasticsearch_indices_get_missing_total",
        "elasticsearch_indices_get_time_seconds",
        "elasticsearch_indices_get_total",
        "elasticsearch_indices_indexing_delete_time_seconds_total",
        "elasticsearch_indices_indexing_delete_total",
        "elasticsearch_indices_indexing_index_time_seconds_total",
        "elasticsearch_indices_indexing_index_total",
        "elasticsearch_indices_merges_docs_total",
        "elasticsearch_indices_merges_total",
        "elasticsearch_indices_merges_total_size_bytes_total",
        "elasticsearch_indices_merges_total_time_seconds_total",
        "elasticsearch_indices_query_cache_evictions",
        "elasticsearch_indices_query_cache_memory_size_bytes",
        "elasticsearch_indices_refresh_time_seconds_total",
        "elasticsearch_indices_refresh_total",
        "elasticsearch_indices_search_fetch_time_seconds",
        "elasticsearch_indices_search_fetch_total",
        "elasticsearch_indices_search_query_time_seconds",
        "elasticsearch_indices_search_query_total",
        "elasticsearch_indices_segment_count_primary",
        "elasticsearch_indices_segment_count_total",
        "elasticsearch_indices_segment_doc_values_memory_bytes_primary",
        "elasticsearch_indices_segment_doc_values_memory_bytes_total",
        "elasticsearch_indices_segment_fields_memory_bytes_primary",
        "elasticsearch_indices_segment_fields_memory_bytes_total",
        "elasticsearch_indices_segment_fixed_bit_set_memory_bytes_primary",
        "elasticsearch_indices_segment_fixed_bit_set_memory_bytes_total",
        "elasticsearch_indices_segment_index_writer_memory_bytes_primary",
        "elasticsearch_indices_segment_index_writer_memory_bytes_total",
        "elasticsearch_indices_segment_memory_bytes_primary",
        "elasticsearch_indices_segment_memory_bytes_total",
        "elasticsearch_indices_segment_norms_memory_bytes_primary",
        "elasticsearch_indices_segment_norms_memory_bytes_total",
        "elasticsearch_indices_segment_points_memory_bytes_primary",
        "elasticsearch_indices_segment_points_memory_bytes_total",
        "elasticsearch_indices_segment_terms_memory_primary",
        "elasticsearch_indices_segment_terms_memory_total",
        "elasticsearch_indices_segment_version_map_memory_bytes_primary",
        "elasticsearch_indices_segment_version_map_memory_bytes_total",
        "elasticsearch_indices_segments_count",
        "elasticsearch_indices_segments_memory_bytes",
        "elasticsearch_indices_store_size_bytes",
        "elasticsearch_indices_store_size_bytes_primary",
        "elasticsearch_indices_store_size_bytes_total",
        "elasticsearch_indices_store_throttle_time_seconds_total",
        "elasticsearch_indices_translog_operations",
        "elasticsearch_indices_translog_size_in_bytes",
        "elasticsearch_jvm_gc_collection_seconds_count",
        "elasticsearch_jvm_gc_collection_seconds_sum",
        "elasticsearch_jvm_memory_committed_bytes",
        "elasticsearch_jvm_memory_max_bytes",
        "elasticsearch_jvm_memory_pool_peak_used_bytes",
        "elasticsearch_jvm_memory_used_bytes",
        "elasticsearch_os_load1",
        "elasticsearch_os_load15",
        "elasticsearch_os_load5",
        "elasticsearch_process_cpu_percent",
        "elasticsearch_process_cpu_seconds_total",
        "elasticsearch_process_cpu_time_seconds_sum",
        "elasticsearch_process_open_files_count",
        "elasticsearch_thread_pool_active_count",
        "elasticsearch_thread_pool_completed_count",
        "elasticsearch_thread_pool_queue_count",
        "elasticsearch_thread_pool_rejected_count",
        "elasticsearch_transport_rx_size_bytes_total",
        "elasticsearch_transport_tx_size_bytes_total"
    ],
    "prometheus-grafana": [
        "grafana_api_dashboard_get_milliseconds",
        "grafana_api_dashboard_get_milliseconds_count",
        "grafana_api_dashboard_get_milliseconds_sum",
        "grafana_api_dashboard_save_milliseconds",
        "grafana_api_dashboard_save_milliseconds_count",
        "grafana_api_dashboard_save_milliseconds_sum",
        "grafana_api_dashboard_search_milliseconds",
        "grafana_api_dashboard_search_milliseconds_count",
        "grafana_api_dashboard_search_milliseconds_sum",
        "grafana_api_dataproxy_request_all_milliseconds",
        "grafana_api_dataproxy_request_all_milliseconds_count",
        "grafana_api_dataproxy_request_all_milliseconds_sum",
        "grafana_api_login_oauth_total",
        "grafana_api_login_post_total",
        "grafana_api_response_status_total",
        "grafana_build_info",
        "grafana_feature_toggles_info",
        "grafana_http_request_duration_seconds_count",
        "grafana_page_response_status_total",
        "grafana_plugin_build_info",
        "grafana_proxy_response_status_total",
        "grafana_stat_total_orgs",
        "grafana_stat_total_users",
        "grafana_stat_totals_dashboard"
    ],
    "prometheus-kube-state-metrics": [
        "kube_cronjob_next_schedule_time",
        "kube_daemonset_created",
        "kube_daemonset_status_current_number_scheduled",
        "kube_daemonset_status_desired_number_scheduled",
        "kube_daemonset_status_number_available",
        "kube_daemonset_status_number_misscheduled",
        "kube_daemonset_status_number_ready",
        "kube_daemonset_status_number_unavailable",
        "kube_daemonset_status_observed_generation",
        "kube_daemonset_status_updated_number_scheduled",
        "kube_deployment_created",
        "kube_deployment_metadata_generation",
        "kube_deployment_spec_replicas",
        "kube_deployment_status_observed_generation",
        "kube_deployment_status_replicas",
        "kube_deployment_status_replicas_available",
        "kube_deployment_status_replicas_unavailable",
        "kube_deployment_status_replicas_updated",
        "kube_endpoint_address_available",
        "kube_job_status_active",
        "kube_job_status_failed",
        "kube_job_status_succeeded",
        "kube_namespace_created",
        "kube_namespace_status_phase",
        "kube_node_info",
        "kube_node_labels",
        "kube_node_role",
        "kube_node_spec_taint",
        "kube_node_spec_unschedulable",
        "kube_node_status_allocatable",
        "kube_node_status_capacity",
        "kube_node_status_condition",
        "kube_persistentvolume_capacity_bytes",
        "kube_persistentvolume_status_phase",
        "kube_persistentvolumeclaim_resource_requests_storage_bytes",
        "kube_pod_container_info",
        "kube_pod_container_resource_limits",
        "kube_pod_container_resource_requests",
        "kube_pod_container_status_restarts_total",
        "kube_pod_container_status_running",
        "kube_pod_container_status_terminated",
        "kube_pod_container_status_waiting",
        "kube_pod_info",
        "kube_pod_init_container_status_running",
        "kube_pod_status_phase",
        "kube_service_status_load_balancer_ingress",
        "kube_statefulset_created",
        "kube_statefulset_metadata_generation",
        "kube_statefulset_replicas",
        "kube_statefulset_status_current_revision",
        "kube_statefulset_status_observed_generation",
        "kube_statefulset_status_replicas",
        "kube_statefulset_status_replicas_available",
        "kube_statefulset_status_replicas_current",
        "kube_statefulset_status_replicas_ready",
        "kube_statefulset_status_replicas_updated",
        "kube_statefulset_status_update_revision"
    ],
    "prometheus-libvirt-exporter": [
        "libvirt_domain_block_stats_allocation",
        "libvirt_domain_block_stats_capacity",
        "libvirt_domain_block_stats_physical",
        "libvirt_domain_block_stats_read_bytes_total",
        "libvirt_domain_block_stats_read_requests_total",
        "libvirt_domain_block_stats_write_bytes_total",
        "libvirt_domain_block_stats_write_requests_total",
        "libvirt_domain_info_cpu_time_seconds_total",
        "libvirt_domain_info_maximum_memory_bytes",
        "libvirt_domain_info_memory_usage_bytes",
        "libvirt_domain_info_state",
        "libvirt_domain_info_virtual_cpus",
        "libvirt_domain_interface_stats_receive_bytes_total",
        "libvirt_domain_interface_stats_receive_drops_total",
        "libvirt_domain_interface_stats_receive_errors_total",
        "libvirt_domain_interface_stats_receive_packets_total",
        "libvirt_domain_interface_stats_transmit_bytes_total",
        "libvirt_domain_interface_stats_transmit_drops_total",
        "libvirt_domain_interface_stats_transmit_errors_total",
        "libvirt_domain_interface_stats_transmit_packets_total",
        "libvirt_domain_memory_actual_balloon_bytes",
        "libvirt_domain_memory_available_bytes",
        "libvirt_domain_memory_rss_bytes",
        "libvirt_domain_memory_unused_bytes",
        "libvirt_domain_memory_usable_bytes",
        "libvirt_up"
    ],
    "prometheus-memcached-exporter": [
        "memcached_commands_total",
        "memcached_current_bytes",
        "memcached_current_connections",
        "memcached_current_items",
        "memcached_exporter_build_info",
        "memcached_items_evicted_total",
        "memcached_items_reclaimed_total",
        "memcached_limit_bytes",
        "memcached_read_bytes_total",
        "memcached_up",
        "memcached_version",
        "memcached_written_bytes_total"
    ],
    "prometheus-msteams": [],
    "prometheus-mysql-exporter": [
        "mysql_global_status_aborted_clients",
        "mysql_global_status_aborted_connects",
        "mysql_global_status_buffer_pool_pages",
        "mysql_global_status_bytes_received",
        "mysql_global_status_bytes_sent",
        "mysql_global_status_commands_total",
        "mysql_global_status_created_tmp_disk_tables",
        "mysql_global_status_created_tmp_files",
        "mysql_global_status_created_tmp_tables",
        "mysql_global_status_handlers_total",
        "mysql_global_status_innodb_log_waits",
        "mysql_global_status_innodb_num_open_files",
        "mysql_global_status_innodb_page_size",
        "mysql_global_status_max_used_connections",
        "mysql_global_status_open_files",
        "mysql_global_status_open_table_definitions",
        "mysql_global_status_open_tables",
        "mysql_global_status_opened_files",
        "mysql_global_status_opened_table_definitions",
        "mysql_global_status_opened_tables",
        "mysql_global_status_qcache_free_memory",
        "mysql_global_status_qcache_hits",
        "mysql_global_status_qcache_inserts",
        "mysql_global_status_qcache_lowmem_prunes",
        "mysql_global_status_qcache_not_cached",
        "mysql_global_status_qcache_queries_in_cache",
        "mysql_global_status_queries",
        "mysql_global_status_questions",
        "mysql_global_status_select_full_join",
        "mysql_global_status_select_full_range_join",
        "mysql_global_status_select_range",
        "mysql_global_status_select_range_check",
        "mysql_global_status_select_scan",
        "mysql_global_status_slow_queries",
        "mysql_global_status_sort_merge_passes",
        "mysql_global_status_sort_range",
        "mysql_global_status_sort_rows",
        "mysql_global_status_sort_scan",
        "mysql_global_status_table_locks_immediate",
        "mysql_global_status_table_locks_waited",
        "mysql_global_status_threads_cached",
        "mysql_global_status_threads_connected",
        "mysql_global_status_threads_created",
        "mysql_global_status_threads_running",
        "mysql_global_status_wsrep_flow_control_paused",
        "mysql_global_status_wsrep_local_recv_queue",
        "mysql_global_status_wsrep_local_state",
        "mysql_global_status_wsrep_ready",
        "mysql_global_variables_innodb_buffer_pool_size",
        "mysql_global_variables_innodb_log_buffer_size",
        "mysql_global_variables_key_buffer_size",
        "mysql_global_variables_max_connections",
        "mysql_global_variables_open_files_limit",
        "mysql_global_variables_query_cache_size",
        "mysql_global_variables_table_definition_cache",
        "mysql_global_variables_table_open_cache",
        "mysql_global_variables_thread_cache_size",
        "mysql_global_variables_wsrep_desync",
        "mysql_up",
        "mysql_version_info",
        "mysqld_exporter_build_info"
    ],
    "prometheus-node-exporter": [
        "node_arp_entries",
        "node_bonding_active",
        "node_bonding_slaves",
        "node_boot_time_seconds",
        "node_context_switches_total",
        "node_cpu_seconds_total",
        "node_disk_io_now",
        "node_disk_io_time_seconds_total",
        "node_disk_io_time_weighted_seconds_total",
        "node_disk_read_bytes_total",
        "node_disk_read_time_seconds_total",
        "node_disk_reads_completed_total",
        "node_disk_reads_merged_total",
        "node_disk_write_time_seconds_total",
        "node_disk_writes_completed_total",
        "node_disk_writes_merged_total",
        "node_disk_written_bytes_total",
        "node_entropy_available_bits",
        "node_exporter_build_info",
        "node_filefd_allocated",
        "node_filefd_maximum",
        "node_filesystem_avail_bytes",
        "node_filesystem_files",
        "node_filesystem_files_free",
        "node_filesystem_free_bytes",
        "node_filesystem_readonly",
        "node_filesystem_size_bytes",
        "node_forks_total",
        "node_hwmon_temp_celsius",
        "node_hwmon_temp_crit_alarm_celsius",
        "node_hwmon_temp_crit_celsius",
        "node_hwmon_temp_crit_hyst_celsius",
        "node_hwmon_temp_max_celsius",
        "node_intr_total",
        "node_load1",
        "node_load15",
        "node_load5",
        "node_memory_Active_anon_bytes",
        "node_memory_Active_bytes",
        "node_memory_Active_file_bytes",
        "node_memory_AnonHugePages_bytes",
        "node_memory_AnonPages_bytes",
        "node_memory_Bounce_bytes",
        "node_memory_Buffers_bytes",
        "node_memory_Cached_bytes",
        "node_memory_CommitLimit_bytes",
        "node_memory_Committed_AS_bytes",
        "node_memory_DirectMap1G",
        "node_memory_DirectMap2M_bytes",
        "node_memory_DirectMap4k_bytes",
        "node_memory_Dirty_bytes",
        "node_memory_HardwareCorrupted_bytes",
        "node_memory_HugePages_Free",
        "node_memory_HugePages_Rsvd",
        "node_memory_HugePages_Surp",
        "node_memory_HugePages_Total",
        "node_memory_Hugepagesize_bytes",
        "node_memory_Inactive_anon_bytes",
        "node_memory_Inactive_bytes",
        "node_memory_Inactive_file_bytes",
        "node_memory_KernelStack_bytes",
        "node_memory_Mapped_bytes",
        "node_memory_MemAvailable_bytes",
        "node_memory_MemFree_bytes",
        "node_memory_MemTotal_bytes",
        "node_memory_Mlocked_bytes",
        "node_memory_NFS_Unstable_bytes",
        "node_memory_PageTables_bytes",
        "node_memory_SReclaimable_bytes",
        "node_memory_SUnreclaim_bytes",
        "node_memory_Shmem_bytes",
        "node_memory_Slab_bytes",
        "node_memory_SwapCached_bytes",
        "node_memory_SwapFree_bytes",
        "node_memory_SwapTotal_bytes",
        "node_memory_Unevictable_bytes",
        "node_memory_VmallocChunk_bytes",
        "node_memory_VmallocTotal_bytes",
        "node_memory_VmallocUsed_bytes",
        "node_memory_WritebackTmp_bytes",
        "node_memory_Writeback_bytes",
        "node_netstat_TcpExt_TCPSynRetrans",
        "node_netstat_Tcp_ActiveOpens",
        "node_netstat_Tcp_AttemptFails",
        "node_netstat_Tcp_CurrEstab",
        "node_netstat_Tcp_EstabResets",
        "node_netstat_Tcp_InCsumErrors",
        "node_netstat_Tcp_InErrs",
        "node_netstat_Tcp_InSegs",
        "node_netstat_Tcp_MaxConn",
        "node_netstat_Tcp_OutRsts",
        "node_netstat_Tcp_OutSegs",
        "node_netstat_Tcp_PassiveOpens",
        "node_netstat_Tcp_RetransSegs",
        "node_netstat_Udp_InCsumErrors",
        "node_netstat_Udp_InDatagrams",
        "node_netstat_Udp_InErrors",
        "node_netstat_Udp_NoPorts",
        "node_netstat_Udp_OutDatagrams",
        "node_netstat_Udp_RcvbufErrors",
        "node_netstat_Udp_SndbufErrors",
        "node_network_mtu_bytes",
        "node_network_receive_bytes_total",
        "node_network_receive_compressed_total",
        "node_network_receive_drop_total",
        "node_network_receive_errs_total",
        "node_network_receive_fifo_total",
        "node_network_receive_frame_total",
        "node_network_receive_multicast_total",
        "node_network_receive_packets_total",
        "node_network_transmit_bytes_total",
        "node_network_transmit_carrier_total",
        "node_network_transmit_colls_total",
        "node_network_transmit_compressed_total",
        "node_network_transmit_drop_total",
        "node_network_transmit_errs_total",
        "node_network_transmit_fifo_total",
        "node_network_transmit_packets_total",
        "node_network_up",
        "node_nf_conntrack_entries",
        "node_nf_conntrack_entries_limit",
        "node_procs_blocked",
        "node_procs_running",
        "node_scrape_collector_duration_seconds",
        "node_scrape_collector_success",
        "node_sockstat_FRAG_inuse",
        "node_sockstat_FRAG_memory",
        "node_sockstat_RAW_inuse",
        "node_sockstat_TCP_alloc",
        "node_sockstat_TCP_inuse",
        "node_sockstat_TCP_mem",
        "node_sockstat_TCP_mem_bytes",
        "node_sockstat_TCP_orphan",
        "node_sockstat_TCP_tw",
        "node_sockstat_UDPLITE_inuse",
        "node_sockstat_UDP_inuse",
        "node_sockstat_UDP_mem",
        "node_sockstat_UDP_mem_bytes",
        "node_sockstat_sockets_used",
        "node_textfile_scrape_error",
        "node_time_seconds",
        "node_timex_estimated_error_seconds",
        "node_timex_frequency_adjustment_ratio",
        "node_timex_maxerror_seconds",
        "node_timex_offset_seconds",
        "node_timex_sync_status",
        "node_uname_info"
    ],
    "prometheus-rabbitmq-exporter": [
        "rabbitmq_channels",
        "rabbitmq_connections",
        "rabbitmq_consumers",
        "rabbitmq_exchanges",
        "rabbitmq_exporter_build_info",
        "rabbitmq_fd_available",
        "rabbitmq_fd_used",
        "rabbitmq_node_disk_free",
        "rabbitmq_node_disk_free_alarm",
        "rabbitmq_node_mem_alarm",
        "rabbitmq_node_mem_used",
        "rabbitmq_partitions",
        "rabbitmq_queue_messages_global",
        "rabbitmq_queue_messages_ready_global",
        "rabbitmq_queue_messages_unacknowledged_global",
        "rabbitmq_queues",
        "rabbitmq_sockets_available",
        "rabbitmq_sockets_used",
        "rabbitmq_up",
        "rabbitmq_uptime",
        "rabbitmq_version_info"
    ],
    "prometheus-relay": [],
    "prometheus-server": [
        "prometheus_build_info",
        "prometheus_config_last_reload_success_timestamp_seconds",
        "prometheus_config_last_reload_successful",
        "prometheus_engine_query_duration_seconds",
        "prometheus_engine_query_duration_seconds_sum",
        "prometheus_http_request_duration_seconds_count",
        "prometheus_notifications_alertmanagers_discovered",
        "prometheus_notifications_errors_total",
        "prometheus_notifications_queue_capacity",
        "prometheus_notifications_queue_length",
        "prometheus_notifications_sent_total",
        "prometheus_rule_evaluation_failures_total",
        "prometheus_target_interval_length_seconds",
        "prometheus_target_interval_length_seconds_count",
        "prometheus_target_scrapes_sample_duplicate_timestamp_total",
        "prometheus_tsdb_blocks_loaded",
        "prometheus_tsdb_compaction_chunk_range_seconds_count",
        "prometheus_tsdb_compaction_chunk_range_seconds_sum",
        "prometheus_tsdb_compaction_chunk_samples_count",
        "prometheus_tsdb_compaction_chunk_samples_sum",
        "prometheus_tsdb_compaction_chunk_size_bytes_sum",
        "prometheus_tsdb_compaction_duration_seconds_bucket",
        "prometheus_tsdb_compaction_duration_seconds_count",
        "prometheus_tsdb_compaction_duration_seconds_sum",
        "prometheus_tsdb_compactions_failed_total",
        "prometheus_tsdb_compactions_total",
        "prometheus_tsdb_compactions_triggered_total",
        "prometheus_tsdb_head_active_appenders",
        "prometheus_tsdb_head_chunks",
        "prometheus_tsdb_head_chunks_created_total",
        "prometheus_tsdb_head_chunks_removed_total",
        "prometheus_tsdb_head_gc_duration_seconds_sum",
        "prometheus_tsdb_head_samples_appended_total",
        "prometheus_tsdb_head_series",
        "prometheus_tsdb_head_series_created_total",
        "prometheus_tsdb_head_series_removed_total",
        "prometheus_tsdb_reloads_failures_total",
        "prometheus_tsdb_reloads_total",
        "prometheus_tsdb_storage_blocks_bytes",
        "prometheus_tsdb_wal_corruptions_total",
        "prometheus_tsdb_wal_fsync_duration_seconds_count",
        "prometheus_tsdb_wal_fsync_duration_seconds_sum",
        "prometheus_tsdb_wal_truncations_failed_total",
        "prometheus_tsdb_wal_truncations_total"
    ],
    "rabbitmq-operator-metrics": [
        "rest_client_requests_total"
    ],
    "refapp": [],
    "sf-notifier": [
        "sf_auth_ok",
        "sf_error_count_created",
        "sf_error_count_total",
        "sf_request_count_created",
        "sf_request_count_total"
    ],
    "telegraf-docker-swarm": [
        "docker_n_containers",
        "docker_n_containers_paused",
        "docker_n_containers_running",
        "docker_n_containers_stopped",
        "docker_swarm_node_ready",
        "docker_swarm_tasks_desired",
        "docker_swarm_tasks_running",
        "internal_agent_gather_errors"
    ],
    "telemeter-client": [
        "federate_errors",
        "federate_filtered_samples",
        "federate_samples"
    ],
    "telemeter-server": [
        "telemeter_cleanups_total",
        "telemeter_partitions",
        "telemeter_samples_total"
    ],
    "tf-cassandra-jmx-exporter": [
        "cassandra_cache_entries",
        "cassandra_cache_estimated_size_bytes",
        "cassandra_cache_hits_total",
        "cassandra_cache_requests_total",
        "cassandra_client_authentication_failures_total",
        "cassandra_client_native_connections",
        "cassandra_client_request_failures_total",
        "cassandra_client_request_latency_seconds_count",
        "cassandra_client_request_latency_seconds_sum",
        "cassandra_client_request_timeouts_total",
        "cassandra_client_request_unavailable_exceptions_total",
        "cassandra_client_request_view_write_latency_seconds",
        "cassandra_commit_log_pending_tasks",
        "cassandra_compaction_bytes_compacted_total",
        "cassandra_compaction_completed_total",
        "cassandra_dropped_messages_total",
        "cassandra_endpoint_connection_timeouts_total",
        "cassandra_storage_exceptions_total",
        "cassandra_storage_hints_total",
        "cassandra_storage_load_bytes",
        "cassandra_table_estimated_pending_compactions",
        "cassandra_table_repaired_ratio",
        "cassandra_table_sstables_per_read_count",
        "cassandra_table_tombstones_scanned",
        "cassandra_thread_pool_active_tasks",
        "cassandra_thread_pool_blocked_tasks"
    ],
    "tf-control": [
        "tf_controller_sessions",
        "tf_controller_up"
    ],
    "tf-kafka-jmx": [
        "jmx_exporter_build_info",
        "kafka_controller_controllerstats_count",
        "kafka_controller_controllerstats_oneminuterate",
        "kafka_controller_kafkacontroller_value",
        "kafka_log_log_value",
        "kafka_network_processor_value",
        "kafka_network_requestmetrics_99thpercentile",
        "kafka_network_requestmetrics_mean",
        "kafka_network_requestmetrics_oneminuterate",
        "kafka_network_socketserver_value",
        "kafka_server_brokertopicmetrics_count",
        "kafka_server_brokertopicmetrics_oneminuterate",
        "kafka_server_delayedoperationpurgatory_value",
        "kafka_server_kafkarequesthandlerpool_oneminuterate",
        "kafka_server_replicamanager_oneminuterate",
        "kafka_server_replicamanager_value"
    ],
    "tf-operator": [
        "tf_operator_info"
    ],
    "tf-redis": [
        "redis_commands_duration_seconds_total",
        "redis_commands_processed_total",
        "redis_commands_total",
        "redis_connected_clients",
        "redis_connected_slaves",
        "redis_db_keys",
        "redis_db_keys_expiring",
        "redis_evicted_keys_total",
        "redis_expired_keys_total",
        "redis_exporter_build_info",
        "redis_instance_info",
        "redis_keyspace_hits_total",
        "redis_keyspace_misses_total",
        "redis_memory_max_bytes",
        "redis_memory_used_bytes",
        "redis_net_input_bytes_total",
        "redis_net_output_bytes_total",
        "redis_rejected_connections_total",
        "redis_slave_info",
        "redis_up",
        "redis_uptime_in_seconds"
    ],
    "tf-vrouter": [
        "tf_vrouter_ds_discard",
        "tf_vrouter_ds_flow_action_drop",
        "tf_vrouter_ds_flow_queue_limit_exceeded",
        "tf_vrouter_ds_flow_table_full",
        "tf_vrouter_ds_frag_err",
        "tf_vrouter_ds_invalid_if",
        "tf_vrouter_ds_invalid_label",
        "tf_vrouter_ds_invalid_nh",
        "tf_vrouter_flow_active",
        "tf_vrouter_flow_aged",
        "tf_vrouter_flow_created",
        "tf_vrouter_lls_session_info",
        "tf_vrouter_up",
        "tf_vrouter_xmpp_connection_state"
    ],
    "tf-zookeeper": [
        "approximate_data_size",
        "bytes_received_count",
        "commit_count",
        "connection_drop_count",
        "connection_rejected",
        "connection_request_count",
        "dead_watchers_cleaner_latency_sum",
        "dead_watchers_cleared",
        "dead_watchers_queued",
        "digest_mismatches_count",
        "election_time_sum",
        "ephemerals_count",
        "follower_sync_time_count",
        "follower_sync_time_sum",
        "fsynctime_sum",
        "global_sessions",
        "jvm_classes_loaded",
        "jvm_gc_collection_seconds_sum",
        "jvm_info",
        "jvm_memory_pool_bytes_used",
        "jvm_threads_current",
        "jvm_threads_deadlocked",
        "jvm_threads_state",
        "leader_uptime",
        "learner_commit_received_count",
        "learner_proposal_received_count",
        "learners",
        "local_sessions",
        "max_file_descriptor_count",
        "node_changed_watch_count_sum",
        "node_children_watch_count_sum",
        "node_created_watch_count_sum",
        "node_deleted_watch_count_sum",
        "num_alive_connections",
        "om_commit_process_time_ms_sum",
        "om_proposal_process_time_ms_sum",
        "open_file_descriptor_count",
        "outstanding_requests",
        "packets_received",
        "packets_sent",
        "pending_syncs",
        "proposal_count",
        "quorum_size",
        "response_packet_cache_hits",
        "response_packet_cache_misses",
        "response_packet_get_children_cache_hits",
        "response_packet_get_children_cache_misses",
        "revalidate_count",
        "snapshottime_sum",
        "stale_sessions_expired",
        "synced_followers",
        "synced_non_voting_followers",
        "synced_observers",
        "unrecoverable_error_count",
        "uptime",
        "watch_count",
        "znode_count"
    ],
    "ucp-kv": []
}

Note

The following MOSK-related metrics from the above list of white-listed scrape jobs are available since 23.3:

  • The tf-operator group: tf_operator_info for Tungsten Fabric deployments.

  • Removed in 24.1. The osdpl-exporter group:

    • osdpl_aodh_alarms

    • osdpl_cinder_zone_volumes

    • osdpl_neutron_availability_zone_info

    • osdpl_neutron_zone_routers

    • osdpl_nova_aggregate_hosts

    • osdpl_nova_availability_zone_info

    • osdpl_nova_availability_zone_instances

    • osdpl_nova_availability_zone_hosts

    • osdpl_version_info

Note

The kubelet_volume_stats_used_bytes metric from the above list is available since Container Cloud 2.26.0 (Cluster releases 17.1.0 and 16.1.0).

Note

The following Prometheus metrics are removed from the above list of white-listed scrape jobs in Container Cloud 2.25.0 (Cluster releases 17.0.0, 16.0.0, 14.1.0):

  • The prometheus-kube-state-metrics group:

    • kube_deployment_spec_paused

    • kube_deployment_spec_strategy_rollingupdate_max_unavailable

    • kube_deployment_status_condition

    • kube_deployment_status_replicas_ready

  • The prometheus-coredns job from the go-collector-metrics and process-collector-metrics groups

You can add necessary metrics that are dropped to this white list as described below. It is also possible to disable the filtering feature. However, Mirantis does not recommend disabling the feature to prevent direct impact on the Prometheus index size, which affects query speed. For clusters with extended retention period, performance degradation will be the most noticeable.

Add dropped metrics to the white list

You can expand the default white list of Prometheus metrics using the prometheusServer.metricsFiltering.extraMetricsInclude parameter to enable metrics that are dropped by default. For the parameter description, see Prometheus metrics filtering. For configuration steps, see StackLight configuration procedure.

Example configuration:

prometheusServer:
  metricsFiltering:
    enabled: true
    extraMetricsInclude:
      cadvisor:
        - container_memory_failcnt
        - container_network_transmit_errors_total
      calico:
        - felix_route_table_per_iface_sync_seconds_sum
        - felix_bpf_dataplane_endpoints
      _group-go-collector-metrics:
        - go_gc_heap_goal_bytes
        - go_gc_heap_objects_objects

Disable metrics filtering

Mirantis does not recommend disabling metrics filtering to prevent direct impact on the Prometheus index size, which affects query speed. In clusters with an extended retention period, performance degradation will be the most noticeable. Therefore, the best option is to keep the feature enabled and add the required dropped metrics to the white list as described in Add dropped metrics to the white list.

If disabling of metrics filtering is absolutely necessary, set the prometheusServer.metricsFiltering.enabled parameter to false:

prometheusServer:
  metricsFiltering:
    enabled: false

For configuration steps, see StackLight configuration procedure.