From 53503f852b93c64d34463c2b341e2c1a4c1d04f7 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 30 Mar 2025 12:35:16 +0300 Subject: [PATCH] mountstats/linux: fix multiple transports statistics labeling When mounting nfs with nconnect, mountstats will show multiple transports. Currently the mountstats exporter fails to create metrics from this information due to repeated entries. Fix this by adding transport metrics a "transport" label enumerating their order in the mountstats output. Signed-off-by: Sagi Grimberg --- collector/fixtures/e2e-64k-page-output.txt | 40 +++++++++--------- collector/fixtures/e2e-output.txt | 40 +++++++++--------- collector/mountstats_linux.go | 48 ++++++++++++---------- 3 files changed, 66 insertions(+), 62 deletions(-) diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 522350cd7d..db384df620 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -2267,44 +2267,44 @@ node_mountstats_nfs_total_write_bytes_total{export="192.168.1.1:/srv/test",mount node_mountstats_nfs_total_write_bytes_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 # HELP node_mountstats_nfs_transport_backlog_queue_total Total number of items added to the RPC backlog queue. # TYPE node_mountstats_nfs_transport_backlog_queue_total counter -node_mountstats_nfs_transport_backlog_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 0 -node_mountstats_nfs_transport_backlog_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +node_mountstats_nfs_transport_backlog_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 0 +node_mountstats_nfs_transport_backlog_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 0 # HELP node_mountstats_nfs_transport_bad_transaction_ids_total Number of times the NFS server sent a response with a transaction ID unknown to this client. # TYPE node_mountstats_nfs_transport_bad_transaction_ids_total counter -node_mountstats_nfs_transport_bad_transaction_ids_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 0 -node_mountstats_nfs_transport_bad_transaction_ids_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +node_mountstats_nfs_transport_bad_transaction_ids_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 0 +node_mountstats_nfs_transport_bad_transaction_ids_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 0 # HELP node_mountstats_nfs_transport_bind_total Number of times the client has had to establish a connection from scratch to the NFS server. # TYPE node_mountstats_nfs_transport_bind_total counter -node_mountstats_nfs_transport_bind_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 0 -node_mountstats_nfs_transport_bind_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +node_mountstats_nfs_transport_bind_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 0 +node_mountstats_nfs_transport_bind_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 0 # HELP node_mountstats_nfs_transport_connect_total Number of times the client has made a TCP connection to the NFS server. # TYPE node_mountstats_nfs_transport_connect_total counter -node_mountstats_nfs_transport_connect_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 1 -node_mountstats_nfs_transport_connect_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +node_mountstats_nfs_transport_connect_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 1 +node_mountstats_nfs_transport_connect_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 0 # HELP node_mountstats_nfs_transport_idle_time_seconds Duration since the NFS mount last saw any RPC traffic, in seconds. # TYPE node_mountstats_nfs_transport_idle_time_seconds gauge -node_mountstats_nfs_transport_idle_time_seconds{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 11 -node_mountstats_nfs_transport_idle_time_seconds{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +node_mountstats_nfs_transport_idle_time_seconds{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 11 +node_mountstats_nfs_transport_idle_time_seconds{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 0 # HELP node_mountstats_nfs_transport_maximum_rpc_slots Maximum number of simultaneously active RPC requests ever used. # TYPE node_mountstats_nfs_transport_maximum_rpc_slots gauge -node_mountstats_nfs_transport_maximum_rpc_slots{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 24 -node_mountstats_nfs_transport_maximum_rpc_slots{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 24 +node_mountstats_nfs_transport_maximum_rpc_slots{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 24 +node_mountstats_nfs_transport_maximum_rpc_slots{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 24 # HELP node_mountstats_nfs_transport_pending_queue_total Total number of items added to the RPC transmission pending queue. # TYPE node_mountstats_nfs_transport_pending_queue_total counter -node_mountstats_nfs_transport_pending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 5726 -node_mountstats_nfs_transport_pending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 5726 +node_mountstats_nfs_transport_pending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 5726 +node_mountstats_nfs_transport_pending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 5726 # HELP node_mountstats_nfs_transport_receives_total Number of RPC responses for this mount received from the NFS server. # TYPE node_mountstats_nfs_transport_receives_total counter -node_mountstats_nfs_transport_receives_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 6428 -node_mountstats_nfs_transport_receives_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 6428 +node_mountstats_nfs_transport_receives_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 6428 +node_mountstats_nfs_transport_receives_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 6428 # HELP node_mountstats_nfs_transport_sending_queue_total Total number of items added to the RPC transmission sending queue. # TYPE node_mountstats_nfs_transport_sending_queue_total counter -node_mountstats_nfs_transport_sending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 26 -node_mountstats_nfs_transport_sending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 26 +node_mountstats_nfs_transport_sending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 26 +node_mountstats_nfs_transport_sending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 26 # HELP node_mountstats_nfs_transport_sends_total Number of RPC requests for this mount sent to the NFS server. # TYPE node_mountstats_nfs_transport_sends_total counter -node_mountstats_nfs_transport_sends_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 6428 -node_mountstats_nfs_transport_sends_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 6428 +node_mountstats_nfs_transport_sends_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 6428 +node_mountstats_nfs_transport_sends_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 6428 # HELP node_mountstats_nfs_write_bytes_total Number of bytes written using the write() syscall. # TYPE node_mountstats_nfs_write_bytes_total counter node_mountstats_nfs_write_bytes_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 0 diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 6f2b0376cf..9d59cab31e 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -2299,44 +2299,44 @@ node_mountstats_nfs_total_write_bytes_total{export="192.168.1.1:/srv/test",mount node_mountstats_nfs_total_write_bytes_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 # HELP node_mountstats_nfs_transport_backlog_queue_total Total number of items added to the RPC backlog queue. # TYPE node_mountstats_nfs_transport_backlog_queue_total counter -node_mountstats_nfs_transport_backlog_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 0 -node_mountstats_nfs_transport_backlog_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +node_mountstats_nfs_transport_backlog_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 0 +node_mountstats_nfs_transport_backlog_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 0 # HELP node_mountstats_nfs_transport_bad_transaction_ids_total Number of times the NFS server sent a response with a transaction ID unknown to this client. # TYPE node_mountstats_nfs_transport_bad_transaction_ids_total counter -node_mountstats_nfs_transport_bad_transaction_ids_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 0 -node_mountstats_nfs_transport_bad_transaction_ids_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +node_mountstats_nfs_transport_bad_transaction_ids_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 0 +node_mountstats_nfs_transport_bad_transaction_ids_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 0 # HELP node_mountstats_nfs_transport_bind_total Number of times the client has had to establish a connection from scratch to the NFS server. # TYPE node_mountstats_nfs_transport_bind_total counter -node_mountstats_nfs_transport_bind_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 0 -node_mountstats_nfs_transport_bind_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +node_mountstats_nfs_transport_bind_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 0 +node_mountstats_nfs_transport_bind_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 0 # HELP node_mountstats_nfs_transport_connect_total Number of times the client has made a TCP connection to the NFS server. # TYPE node_mountstats_nfs_transport_connect_total counter -node_mountstats_nfs_transport_connect_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 1 -node_mountstats_nfs_transport_connect_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +node_mountstats_nfs_transport_connect_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 1 +node_mountstats_nfs_transport_connect_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 0 # HELP node_mountstats_nfs_transport_idle_time_seconds Duration since the NFS mount last saw any RPC traffic, in seconds. # TYPE node_mountstats_nfs_transport_idle_time_seconds gauge -node_mountstats_nfs_transport_idle_time_seconds{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 11 -node_mountstats_nfs_transport_idle_time_seconds{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 0 +node_mountstats_nfs_transport_idle_time_seconds{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 11 +node_mountstats_nfs_transport_idle_time_seconds{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 0 # HELP node_mountstats_nfs_transport_maximum_rpc_slots Maximum number of simultaneously active RPC requests ever used. # TYPE node_mountstats_nfs_transport_maximum_rpc_slots gauge -node_mountstats_nfs_transport_maximum_rpc_slots{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 24 -node_mountstats_nfs_transport_maximum_rpc_slots{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 24 +node_mountstats_nfs_transport_maximum_rpc_slots{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 24 +node_mountstats_nfs_transport_maximum_rpc_slots{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 24 # HELP node_mountstats_nfs_transport_pending_queue_total Total number of items added to the RPC transmission pending queue. # TYPE node_mountstats_nfs_transport_pending_queue_total counter -node_mountstats_nfs_transport_pending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 5726 -node_mountstats_nfs_transport_pending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 5726 +node_mountstats_nfs_transport_pending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 5726 +node_mountstats_nfs_transport_pending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 5726 # HELP node_mountstats_nfs_transport_receives_total Number of RPC responses for this mount received from the NFS server. # TYPE node_mountstats_nfs_transport_receives_total counter -node_mountstats_nfs_transport_receives_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 6428 -node_mountstats_nfs_transport_receives_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 6428 +node_mountstats_nfs_transport_receives_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 6428 +node_mountstats_nfs_transport_receives_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 6428 # HELP node_mountstats_nfs_transport_sending_queue_total Total number of items added to the RPC transmission sending queue. # TYPE node_mountstats_nfs_transport_sending_queue_total counter -node_mountstats_nfs_transport_sending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 26 -node_mountstats_nfs_transport_sending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 26 +node_mountstats_nfs_transport_sending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 26 +node_mountstats_nfs_transport_sending_queue_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 26 # HELP node_mountstats_nfs_transport_sends_total Number of RPC requests for this mount sent to the NFS server. # TYPE node_mountstats_nfs_transport_sends_total counter -node_mountstats_nfs_transport_sends_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 6428 -node_mountstats_nfs_transport_sends_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp"} 6428 +node_mountstats_nfs_transport_sends_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp",transport="0"} 6428 +node_mountstats_nfs_transport_sends_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="udp",transport="0"} 6428 # HELP node_mountstats_nfs_write_bytes_total Number of bytes written using the write() syscall. # TYPE node_mountstats_nfs_write_bytes_total counter node_mountstats_nfs_write_bytes_total{export="192.168.1.1:/srv/test",mountaddr="192.168.1.1",protocol="tcp"} 0 diff --git a/collector/mountstats_linux.go b/collector/mountstats_linux.go index 319b89149c..1c3b9a99e6 100644 --- a/collector/mountstats_linux.go +++ b/collector/mountstats_linux.go @@ -18,6 +18,7 @@ package collector import ( "fmt" "log/slog" + "strconv" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/procfs" @@ -126,8 +127,9 @@ func NewMountStatsCollector(logger *slog.Logger) (Collector, error) { ) var ( - labels = []string{"export", "protocol", "mountaddr"} - opLabels = []string{"export", "protocol", "mountaddr", "operation"} + labels = []string{"export", "protocol", "mountaddr"} + opLabels = []string{"export", "protocol", "mountaddr", "operation"} + translabels = []string{"export", "protocol", "mountaddr", "transport"} ) return &mountStatsCollector{ @@ -197,70 +199,70 @@ func NewMountStatsCollector(logger *slog.Logger) (Collector, error) { NFSTransportBindTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_bind_total"), "Number of times the client has had to establish a connection from scratch to the NFS server.", - labels, + translabels, nil, ), NFSTransportConnectTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_connect_total"), "Number of times the client has made a TCP connection to the NFS server.", - labels, + translabels, nil, ), NFSTransportIdleTimeSeconds: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_idle_time_seconds"), "Duration since the NFS mount last saw any RPC traffic, in seconds.", - labels, + translabels, nil, ), NFSTransportSendsTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_sends_total"), "Number of RPC requests for this mount sent to the NFS server.", - labels, + translabels, nil, ), NFSTransportReceivesTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_receives_total"), "Number of RPC responses for this mount received from the NFS server.", - labels, + translabels, nil, ), NFSTransportBadTransactionIDsTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_bad_transaction_ids_total"), "Number of times the NFS server sent a response with a transaction ID unknown to this client.", - labels, + translabels, nil, ), NFSTransportBacklogQueueTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_backlog_queue_total"), "Total number of items added to the RPC backlog queue.", - labels, + translabels, nil, ), NFSTransportMaximumRPCSlots: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_maximum_rpc_slots"), "Maximum number of simultaneously active RPC requests ever used.", - labels, + translabels, nil, ), NFSTransportSendingQueueTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_sending_queue_total"), "Total number of items added to the RPC transmission sending queue.", - labels, + translabels, nil, ), NFSTransportPendingQueueTotal: prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "transport_pending_queue_total"), "Total number of items added to the RPC transmission pending queue.", - labels, + translabels, nil, ), @@ -617,74 +619,76 @@ func (c *mountStatsCollector) updateNFSStats(ch chan<- prometheus.Metric, s *pro ) for i := range s.Transport { + translabelValues := []string{export, protocol, mountAddress, strconv.Itoa(i)} + ch <- prometheus.MustNewConstMetric( c.NFSTransportBindTotal, prometheus.CounterValue, float64(s.Transport[i].Bind), - labelValues..., + translabelValues..., ) ch <- prometheus.MustNewConstMetric( c.NFSTransportConnectTotal, prometheus.CounterValue, float64(s.Transport[i].Connect), - labelValues..., + translabelValues..., ) ch <- prometheus.MustNewConstMetric( c.NFSTransportIdleTimeSeconds, prometheus.GaugeValue, float64(s.Transport[i].IdleTimeSeconds%float64Mantissa), - labelValues..., + translabelValues..., ) ch <- prometheus.MustNewConstMetric( c.NFSTransportSendsTotal, prometheus.CounterValue, float64(s.Transport[i].Sends), - labelValues..., + translabelValues..., ) ch <- prometheus.MustNewConstMetric( c.NFSTransportReceivesTotal, prometheus.CounterValue, float64(s.Transport[i].Receives), - labelValues..., + translabelValues..., ) ch <- prometheus.MustNewConstMetric( c.NFSTransportBadTransactionIDsTotal, prometheus.CounterValue, float64(s.Transport[i].BadTransactionIDs), - labelValues..., + translabelValues..., ) ch <- prometheus.MustNewConstMetric( c.NFSTransportBacklogQueueTotal, prometheus.CounterValue, float64(s.Transport[i].CumulativeBacklog), - labelValues..., + translabelValues..., ) ch <- prometheus.MustNewConstMetric( c.NFSTransportMaximumRPCSlots, prometheus.GaugeValue, float64(s.Transport[i].MaximumRPCSlotsUsed), - labelValues..., + translabelValues..., ) ch <- prometheus.MustNewConstMetric( c.NFSTransportSendingQueueTotal, prometheus.CounterValue, float64(s.Transport[i].CumulativeSendingQueue), - labelValues..., + translabelValues..., ) ch <- prometheus.MustNewConstMetric( c.NFSTransportPendingQueueTotal, prometheus.CounterValue, float64(s.Transport[i].CumulativePendingQueue), - labelValues..., + translabelValues..., ) }