From 838cacffa6b518455b5a14e93280da1f9bb02384 Mon Sep 17 00:00:00 2001 From: hardikl Date: Wed, 10 Dec 2025 18:40:02 +0530 Subject: [PATCH 1/5] fix: adding dynamic threshold of link speed in 2 network tables --- cmd/collectors/commonutils.go | 6 ++ cmd/collectors/restperf/plugins/nic/nic.go | 7 +- cmd/collectors/zapiperf/plugins/nic/nic.go | 7 +- cmd/tools/generate/counter.yaml | 12 +++ docs/ontap-metrics.md | 101 ++++++++++++++++++++- grafana/dashboards/cmode/network.json | 76 +++++++++++++++- mcp/metadata/ontap_metrics.json | 9 ++ 7 files changed, 210 insertions(+), 8 deletions(-) diff --git a/cmd/collectors/commonutils.go b/cmd/collectors/commonutils.go index 8214f24d8..d0366e95a 100644 --- a/cmd/collectors/commonutils.go +++ b/cmd/collectors/commonutils.go @@ -39,6 +39,7 @@ type PortData struct { Port string Read float64 Write float64 + Speed float64 } // Reference https://kb.netapp.com/onprem/ontap/hardware/FAQ%3A_How_do_shelf_product_IDs_and_modules_in_ONTAP_map_to_a_model_of_a_shelf_or_storage_system_with_embedded_storage @@ -482,6 +483,7 @@ func PopulateIfgroupMetrics(portIfgroupMap map[string]string, portDataMap map[st port := portInfo.Port readBytes := portInfo.Read writeBytes := portInfo.Write + linkSpeed := portInfo.Speed ifgrpupInstanceKey := nodeName + ifgroupName ifgroupInstance := nData.GetInstance(ifgrpupInstanceKey) @@ -516,6 +518,10 @@ func PopulateIfgroupMetrics(portIfgroupMap map[string]string, portDataMap map[st tx := nData.GetMetric("tx_bytes") txv, _ := tx.GetValueFloat64(ifgroupInstance) tx.SetValueFloat64(ifgroupInstance, writeBytes+txv) + + speed := nData.GetMetric("speed") + speedv, _ := speed.GetValueFloat64(ifgroupInstance) + speed.SetValueFloat64(ifgroupInstance, linkSpeed+speedv) } return nil } diff --git a/cmd/collectors/restperf/plugins/nic/nic.go b/cmd/collectors/restperf/plugins/nic/nic.go index 7206a7190..ed695ee1d 100644 --- a/cmd/collectors/restperf/plugins/nic/nic.go +++ b/cmd/collectors/restperf/plugins/nic/nic.go @@ -43,6 +43,7 @@ type Nic struct { var ifgrpMetrics = []string{ "rx_bytes", "tx_bytes", + "speed", } func New(p *plugin.AbstractPlugin) plugin.Plugin { @@ -197,7 +198,11 @@ func (n *Nic) Run(dataMap map[string]*matrix.Matrix) ([]*matrix.Matrix, *collect tx.SetValueFloat64(instance, txPercent) } - portDataMap[nodeName+port] = collectors.PortData{Node: nodeName, Port: port, Read: rxBytes, Write: txBytes} + linkSpeed := float64(speed) + if strings.HasSuffix(s, "M") { + linkSpeed = float64(speed) * 8 + } + portDataMap[nodeName+port] = collectors.PortData{Node: nodeName, Port: port, Read: rxBytes, Write: txBytes, Speed: linkSpeed} if rxOk || txOk { utilPercent.SetValueFloat64(instance, math.Max(rxPercent, txPercent)) diff --git a/cmd/collectors/zapiperf/plugins/nic/nic.go b/cmd/collectors/zapiperf/plugins/nic/nic.go index 41b238c47..c85dc1a39 100644 --- a/cmd/collectors/zapiperf/plugins/nic/nic.go +++ b/cmd/collectors/zapiperf/plugins/nic/nic.go @@ -42,6 +42,7 @@ type Nic struct { var ifgrpMetrics = []string{ "rx_bytes", "tx_bytes", + "speed", } func New(p *plugin.AbstractPlugin) plugin.Plugin { @@ -175,7 +176,11 @@ func (n *Nic) Run(dataMap map[string]*matrix.Matrix) ([]*matrix.Matrix, *collect tx.SetValueFloat64(instance, txPercent) } - portDataMap[nodeName+port] = collectors.PortData{Node: nodeName, Port: port, Read: rxBytes, Write: txBytes} + linkSpeed := float64(speed) + if strings.HasSuffix(s, "M") { + linkSpeed = float64(speed) * 8 + } + portDataMap[nodeName+port] = collectors.PortData{Node: nodeName, Port: port, Read: rxBytes, Write: txBytes, Speed: linkSpeed} if rxOk || txOk { utilPercent.SetValueFloat64(instance, math.Max(rxPercent, txPercent)) diff --git a/cmd/tools/generate/counter.yaml b/cmd/tools/generate/counter.yaml index 3e5a24bea..5815cbf17 100644 --- a/cmd/tools/generate/counter.yaml +++ b/cmd/tools/generate/counter.yaml @@ -1895,6 +1895,18 @@ counters: ONTAPCounter: Harvest generated Template: conf/zapiperf/cdot/9.8.0/nic_common.yaml + - Name: nic_ifgrp_speed + Description: Link Aggregation Group (LAG) link speed. + APIs: + - API: RestPerf + Endpoint: NA + ONTAPCounter: Harvest generated + Template: conf/restperf/9.12.0/nic_common.yaml + - API: ZapiPerf + Endpoint: NA + ONTAPCounter: Harvest generated + Template: conf/zapiperf/cdot/9.8.0/nic_common.yaml + - Name: nic_rx_percent Description: Bytes received percentage. APIs: diff --git a/docs/ontap-metrics.md b/docs/ontap-metrics.md index 935ac690e..9e6303e77 100644 --- a/docs/ontap-metrics.md +++ b/docs/ontap-metrics.md @@ -7,7 +7,7 @@ These can be generated on demand by running `bin/harvest grafana metrics`. See - More information about ONTAP REST performance counters can be found [here](https://docs.netapp.com/us-en/ontap-pcmap-9121/index.html). ``` -Creation Date : 2025-Nov-21 +Creation Date : 2025-Dec-10 ONTAP Version: 9.16.1 ``` @@ -8114,6 +8114,86 @@ The `net_route_labels` metric is visualized in the following Grafana dashboards: +### netstat_bytes_recvd + +Number of bytes received by a TCP connection + +| API | Endpoint | Metric | Template | +|--------|----------|--------|---------| +| ZapiPerf | `perf-object-get-instances netstat` | `bytes_recvd`
Unit: none
Type: raw
Base: | conf/zapiperf/cdot/9.8.0/netstat.yaml | + + + +### netstat_bytes_sent + +Number of bytes sent by a TCP connection + +| API | Endpoint | Metric | Template | +|--------|----------|--------|---------| +| ZapiPerf | `perf-object-get-instances netstat` | `bytes_sent`
Unit: none
Type: raw
Base: | conf/zapiperf/cdot/9.8.0/netstat.yaml | + + + +### netstat_cong_win + +Congestion window of a TCP connection + +| API | Endpoint | Metric | Template | +|--------|----------|--------|---------| +| ZapiPerf | `perf-object-get-instances netstat` | `cong_win`
Unit: none
Type: raw
Base: | conf/zapiperf/cdot/9.8.0/netstat.yaml | + + + +### netstat_cong_win_th + +Congestion window threshold of a TCP connection + +| API | Endpoint | Metric | Template | +|--------|----------|--------|---------| +| ZapiPerf | `perf-object-get-instances netstat` | `cong_win_th`
Unit: none
Type: raw
Base: | conf/zapiperf/cdot/9.8.0/netstat.yaml | + + + +### netstat_ooorcv_pkts + +Number of out-of-order packets received by this TCP connection + +| API | Endpoint | Metric | Template | +|--------|----------|--------|---------| +| ZapiPerf | `perf-object-get-instances netstat` | `ooorcv_pkts`
Unit: none
Type: raw
Base: | conf/zapiperf/cdot/9.8.0/netstat.yaml | + + + +### netstat_recv_window + +Receive window size of a TCP connection + +| API | Endpoint | Metric | Template | +|--------|----------|--------|---------| +| ZapiPerf | `perf-object-get-instances netstat` | `recv_window`
Unit: none
Type: raw
Base: | conf/zapiperf/cdot/9.8.0/netstat.yaml | + + + +### netstat_rexmit_pkts + +Number of packets retransmitted by this TCP connection + +| API | Endpoint | Metric | Template | +|--------|----------|--------|---------| +| ZapiPerf | `perf-object-get-instances netstat` | `rexmit_pkts`
Unit: none
Type: raw
Base: | conf/zapiperf/cdot/9.8.0/netstat.yaml | + + + +### netstat_send_window + +Send window size of a TCP connection + +| API | Endpoint | Metric | Template | +|--------|----------|--------|---------| +| ZapiPerf | `perf-object-get-instances netstat` | `send_window`
Unit: none
Type: raw
Base: | conf/zapiperf/cdot/9.8.0/netstat.yaml | + + + ### nfs_clients_idle_duration Specifies an ISO-8601 format of date and time to retrieve the idle time duration in hours, minutes, and seconds format. @@ -8864,6 +8944,25 @@ The `nic_ifgrp_rx_bytes` metric is visualized in the following Grafana dashboard +### nic_ifgrp_speed + +Link Aggregation Group (LAG) link speed. + +| API | Endpoint | Metric | Template | +|--------|----------|--------|---------| +| RestPerf | `NA` | `Harvest generated`
Unit:
Type:
Base: | conf/restperf/9.12.0/nic_common.yaml | +| ZapiPerf | `NA` | `Harvest generated`
Unit:
Type:
Base: | conf/zapiperf/cdot/9.8.0/nic_common.yaml | + +The `nic_ifgrp_speed` metric is visualized in the following Grafana dashboards: + +/// html | div.grafana-table +| Dashboard | Row | Type | Panel | +|--------|----------|--------|--------| +| ONTAP: Network | Link Aggregation Group (LAG) | table | [Link Aggregation Groups](/d/cdot-network/ontap3a-network?orgId=1&viewPanel=122) | +/// + + + ### nic_ifgrp_tx_bytes Link Aggregation Group (LAG) Bytes sent. diff --git a/grafana/dashboards/cmode/network.json b/grafana/dashboards/cmode/network.json index c858c7984..12052a3ba 100644 --- a/grafana/dashboards/cmode/network.json +++ b/grafana/dashboards/cmode/network.json @@ -1318,6 +1318,23 @@ "id": "merge", "options": {} }, + { + "id": "configFromData", + "options": { + "applyTo": { + "id": "byName", + "options": "Value #D" + }, + "configRefId": "A", + "mappings": [ + { + "fieldName": "speed", + "handlerKey": "threshold1", + "reducerId": "p50" + } + ] + } + }, { "id": "organize", "options": { @@ -2295,6 +2312,26 @@ "unitScale": true }, "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Link Speed" + }, + "properties": [ + { + "id": "unit", + "value": "bps" + }, + { + "id": "custom.width", + "value": 170 + }, + { + "id": "decimals", + "value": 0 + } + ] + }, { "matcher": { "id": "byName", @@ -2446,6 +2483,15 @@ "interval": "", "legendFormat": "", "refId": "E" + }, + { + "editorMode": "code", + "expr": "nic_ifgrp_speed{cluster=~\"$Cluster\",datacenter=~\"$Datacenter\",node=~\"$Node\",ports!=\"\"}", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "", + "refId": "A" } ], "title": "Link Aggregation Groups", @@ -2461,7 +2507,8 @@ "ifgroup", "ports", "datacenter", - "cluster" + "cluster", + "Value #A" ] } } @@ -2470,6 +2517,23 @@ "id": "merge", "options": {} }, + { + "id": "configFromData", + "options": { + "applyTo": { + "id": "byName", + "options": "Value #D" + }, + "configRefId": "A", + "mappings": [ + { + "fieldName": "speed", + "handlerKey": "threshold1", + "reducerId": "p50" + } + ] + } + }, { "id": "organize", "options": { @@ -2484,15 +2548,17 @@ }, "includeByName": {}, "indexByName": { - "Value #D": 5, - "Value #E": 6, + "Value #A": 4, + "Value #D": 6, + "Value #E": 7, "cluster": 1, "datacenter": 0, "ifgroup": 3, "node": 2, - "ports": 4 + "ports": 5 }, "renameByName": { + "Value #A": "Link Speed", "ifgroup": "LAG", "ports": "Ports" } @@ -5256,5 +5322,5 @@ "timezone": "", "title": "ONTAP: Network", "uid": "cdot-network", - "version": 13 + "version": 14 } diff --git a/mcp/metadata/ontap_metrics.json b/mcp/metadata/ontap_metrics.json index c494bdeb6..284261aa8 100644 --- a/mcp/metadata/ontap_metrics.json +++ b/mcp/metadata/ontap_metrics.json @@ -499,6 +499,14 @@ "net_port_mtu": "Maximum transmission unit, largest packet size on this network", "net_port_status": "This metric indicates a value of 1 if the port state is up and a value of 0 for any other state.", "net_route_labels": "This metric provides information about NetRoute", + "netstat_bytes_recvd": "Number of bytes received by a TCP connection", + "netstat_bytes_sent": "Number of bytes sent by a TCP connection", + "netstat_cong_win": "Congestion window of a TCP connection", + "netstat_cong_win_th": "Congestion window threshold of a TCP connection", + "netstat_ooorcv_pkts": "Number of out-of-order packets received by this TCP connection", + "netstat_recv_window": "Receive window size of a TCP connection", + "netstat_rexmit_pkts": "Number of packets retransmitted by this TCP connection", + "netstat_send_window": "Send window size of a TCP connection", "nfs_clients_idle_duration": "Specifies an ISO-8601 format of date and time to retrieve the idle time duration in hours, minutes, and seconds format.", "nfs_diag_storePool_ByteLockAlloc": "Current number of byte range lock objects allocated.", "nfs_diag_storePool_ByteLockMax": "Maximum number of byte range lock objects.", @@ -535,6 +543,7 @@ "nfs_diag_storePool_StringAlloc": "Current number of string objects allocated.", "nfs_diag_storePool_StringMax": "Maximum number of string objects.", "nic_ifgrp_rx_bytes": "Link Aggregation Group (LAG) Bytes received.", + "nic_ifgrp_speed": "Link Aggregation Group (LAG) link speed.", "nic_ifgrp_tx_bytes": "Link Aggregation Group (LAG) Bytes sent.", "nic_labels": "This metric provides information about NicCommon", "nic_link_up_to_downs": "Number of link state change from UP to DOWN.", From 01d7be42adc7c7ac1fda50d81128e7212e4e9f53 Mon Sep 17 00:00:00 2001 From: hardikl Date: Wed, 10 Dec 2025 18:45:43 +0530 Subject: [PATCH 2/5] fix: merge fix --- docs/ontap-metrics.md | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/docs/ontap-metrics.md b/docs/ontap-metrics.md index afa4f232b..0de557de5 100644 --- a/docs/ontap-metrics.md +++ b/docs/ontap-metrics.md @@ -7,7 +7,7 @@ These can be generated on demand by running `bin/harvest grafana metrics`. See - More information about ONTAP REST performance counters can be found [here](https://docs.netapp.com/us-en/ontap-pcmap-9121/index.html). ``` -Creation Date : 2025-Dec-08 +Creation Date : 2025-Dec-10 ONTAP Version: 9.16.1 ``` @@ -9071,6 +9071,25 @@ The `nic_ifgrp_rx_bytes` metric is visualized in the following Grafana dashboard +### nic_ifgrp_speed + +Link Aggregation Group (LAG) link speed. + +| API | Endpoint | Metric | Template | +|--------|----------|--------|---------| +| RestPerf | `NA` | `Harvest generated`
Unit:
Type:
Base: | conf/restperf/9.12.0/nic_common.yaml | +| ZapiPerf | `NA` | `Harvest generated`
Unit:
Type:
Base: | conf/zapiperf/cdot/9.8.0/nic_common.yaml | + +The `nic_ifgrp_speed` metric is visualized in the following Grafana dashboards: + +/// html | div.grafana-table +| Dashboard | Row | Type | Panel | +|--------|----------|--------|--------| +| ONTAP: Network | Link Aggregation Group (LAG) | table | [Link Aggregation Groups](/d/cdot-network/ontap3a-network?orgId=1&viewPanel=122) | +/// + + + ### nic_ifgrp_tx_bytes Link Aggregation Group (LAG) Bytes sent. From f1a76d5c24e29e1de9462df7528e20dbae05cbae Mon Sep 17 00:00:00 2001 From: hardikl Date: Thu, 11 Dec 2025 19:27:00 +0530 Subject: [PATCH 3/5] feat: adding rx, tx perc for nic and ifgrp in network dashboard --- cmd/collectors/commonutils.go | 24 +- cmd/collectors/restperf/plugins/nic/nic.go | 9 +- cmd/collectors/zapiperf/plugins/nic/nic.go | 9 +- cmd/tools/generate/counter.yaml | 16 +- docs/ontap-metrics.md | 43 ++- grafana/dashboards/cmode/network.json | 388 +++++++++++++++------ mcp/metadata/ontap_metrics.json | 3 +- 7 files changed, 356 insertions(+), 136 deletions(-) diff --git a/cmd/collectors/commonutils.go b/cmd/collectors/commonutils.go index d0366e95a..527a8f0b4 100644 --- a/cmd/collectors/commonutils.go +++ b/cmd/collectors/commonutils.go @@ -35,11 +35,12 @@ type embedShelf struct { } type PortData struct { - Node string - Port string - Read float64 - Write float64 - Speed float64 + Node string + Port string + Read float64 + Write float64 + ReadPerc float64 + WritePerc float64 } // Reference https://kb.netapp.com/onprem/ontap/hardware/FAQ%3A_How_do_shelf_product_IDs_and_modules_in_ONTAP_map_to_a_model_of_a_shelf_or_storage_system_with_embedded_storage @@ -483,7 +484,8 @@ func PopulateIfgroupMetrics(portIfgroupMap map[string]string, portDataMap map[st port := portInfo.Port readBytes := portInfo.Read writeBytes := portInfo.Write - linkSpeed := portInfo.Speed + readPerc := portInfo.ReadPerc + writePerc := portInfo.WritePerc ifgrpupInstanceKey := nodeName + ifgroupName ifgroupInstance := nData.GetInstance(ifgrpupInstanceKey) @@ -519,9 +521,13 @@ func PopulateIfgroupMetrics(portIfgroupMap map[string]string, portDataMap map[st txv, _ := tx.GetValueFloat64(ifgroupInstance) tx.SetValueFloat64(ifgroupInstance, writeBytes+txv) - speed := nData.GetMetric("speed") - speedv, _ := speed.GetValueFloat64(ifgroupInstance) - speed.SetValueFloat64(ifgroupInstance, linkSpeed+speedv) + rxp := nData.GetMetric("rx_perc") + rxpv, _ := rxp.GetValueFloat64(ifgroupInstance) + rxp.SetValueFloat64(ifgroupInstance, readPerc+rxpv) + + txp := nData.GetMetric("tx_perc") + txpv, _ := txp.GetValueFloat64(ifgroupInstance) + txp.SetValueFloat64(ifgroupInstance, writePerc+txpv) } return nil } diff --git a/cmd/collectors/restperf/plugins/nic/nic.go b/cmd/collectors/restperf/plugins/nic/nic.go index ed695ee1d..0765e423b 100644 --- a/cmd/collectors/restperf/plugins/nic/nic.go +++ b/cmd/collectors/restperf/plugins/nic/nic.go @@ -43,7 +43,8 @@ type Nic struct { var ifgrpMetrics = []string{ "rx_bytes", "tx_bytes", - "speed", + "rx_perc", + "tx_perc", } func New(p *plugin.AbstractPlugin) plugin.Plugin { @@ -198,11 +199,7 @@ func (n *Nic) Run(dataMap map[string]*matrix.Matrix) ([]*matrix.Matrix, *collect tx.SetValueFloat64(instance, txPercent) } - linkSpeed := float64(speed) - if strings.HasSuffix(s, "M") { - linkSpeed = float64(speed) * 8 - } - portDataMap[nodeName+port] = collectors.PortData{Node: nodeName, Port: port, Read: rxBytes, Write: txBytes, Speed: linkSpeed} + portDataMap[nodeName+port] = collectors.PortData{Node: nodeName, Port: port, Read: rxBytes, Write: txBytes, ReadPerc: rxPercent, WritePerc: txPercent} if rxOk || txOk { utilPercent.SetValueFloat64(instance, math.Max(rxPercent, txPercent)) diff --git a/cmd/collectors/zapiperf/plugins/nic/nic.go b/cmd/collectors/zapiperf/plugins/nic/nic.go index c85dc1a39..bebddbd44 100644 --- a/cmd/collectors/zapiperf/plugins/nic/nic.go +++ b/cmd/collectors/zapiperf/plugins/nic/nic.go @@ -42,7 +42,8 @@ type Nic struct { var ifgrpMetrics = []string{ "rx_bytes", "tx_bytes", - "speed", + "rx_perc", + "tx_perc", } func New(p *plugin.AbstractPlugin) plugin.Plugin { @@ -176,11 +177,7 @@ func (n *Nic) Run(dataMap map[string]*matrix.Matrix) ([]*matrix.Matrix, *collect tx.SetValueFloat64(instance, txPercent) } - linkSpeed := float64(speed) - if strings.HasSuffix(s, "M") { - linkSpeed = float64(speed) * 8 - } - portDataMap[nodeName+port] = collectors.PortData{Node: nodeName, Port: port, Read: rxBytes, Write: txBytes, Speed: linkSpeed} + portDataMap[nodeName+port] = collectors.PortData{Node: nodeName, Port: port, Read: rxBytes, Write: txBytes, ReadPerc: rxPercent, WritePerc: txPercent} if rxOk || txOk { utilPercent.SetValueFloat64(instance, math.Max(rxPercent, txPercent)) diff --git a/cmd/tools/generate/counter.yaml b/cmd/tools/generate/counter.yaml index 06dba93e1..f1cccae48 100644 --- a/cmd/tools/generate/counter.yaml +++ b/cmd/tools/generate/counter.yaml @@ -1895,8 +1895,20 @@ counters: ONTAPCounter: Harvest generated Template: conf/zapiperf/cdot/9.8.0/nic_common.yaml - - Name: nic_ifgrp_speed - Description: Link Aggregation Group (LAG) link speed. + - Name: nic_ifgrp_rx_perc + Description: Link Aggregation Group (LAG) Bytes received percentage. + APIs: + - API: RestPerf + Endpoint: NA + ONTAPCounter: Harvest generated + Template: conf/restperf/9.12.0/nic_common.yaml + - API: ZapiPerf + Endpoint: NA + ONTAPCounter: Harvest generated + Template: conf/zapiperf/cdot/9.8.0/nic_common.yaml + + - Name: nic_ifgrp_tx_perc + Description: Link Aggregation Group (LAG) Bytes sent percentage. APIs: - API: RestPerf Endpoint: NA diff --git a/docs/ontap-metrics.md b/docs/ontap-metrics.md index 0de557de5..a38ba4033 100644 --- a/docs/ontap-metrics.md +++ b/docs/ontap-metrics.md @@ -7,7 +7,7 @@ These can be generated on demand by running `bin/harvest grafana metrics`. See - More information about ONTAP REST performance counters can be found [here](https://docs.netapp.com/us-en/ontap-pcmap-9121/index.html). ``` -Creation Date : 2025-Dec-10 +Creation Date : 2025-Dec-11 ONTAP Version: 9.16.1 ``` @@ -9071,16 +9071,16 @@ The `nic_ifgrp_rx_bytes` metric is visualized in the following Grafana dashboard -### nic_ifgrp_speed +### nic_ifgrp_rx_perc -Link Aggregation Group (LAG) link speed. +Link Aggregation Group (LAG) Bytes received percentage. | API | Endpoint | Metric | Template | |--------|----------|--------|---------| | RestPerf | `NA` | `Harvest generated`
Unit:
Type:
Base: | conf/restperf/9.12.0/nic_common.yaml | | ZapiPerf | `NA` | `Harvest generated`
Unit:
Type:
Base: | conf/zapiperf/cdot/9.8.0/nic_common.yaml | -The `nic_ifgrp_speed` metric is visualized in the following Grafana dashboards: +The `nic_ifgrp_rx_perc` metric is visualized in the following Grafana dashboards: /// html | div.grafana-table | Dashboard | Row | Type | Panel | @@ -9110,6 +9110,25 @@ The `nic_ifgrp_tx_bytes` metric is visualized in the following Grafana dashboard +### nic_ifgrp_tx_perc + +Link Aggregation Group (LAG) Bytes sent percentage. + +| API | Endpoint | Metric | Template | +|--------|----------|--------|---------| +| RestPerf | `NA` | `Harvest generated`
Unit:
Type:
Base: | conf/restperf/9.12.0/nic_common.yaml | +| ZapiPerf | `NA` | `Harvest generated`
Unit:
Type:
Base: | conf/zapiperf/cdot/9.8.0/nic_common.yaml | + +The `nic_ifgrp_tx_perc` metric is visualized in the following Grafana dashboards: + +/// html | div.grafana-table +| Dashboard | Row | Type | Panel | +|--------|----------|--------|--------| +| ONTAP: Network | Link Aggregation Group (LAG) | table | [Link Aggregation Groups](/d/cdot-network/ontap3a-network?orgId=1&viewPanel=122) | +/// + + + ### nic_labels This metric provides information about NicCommon @@ -9275,6 +9294,14 @@ Bytes received percentage. | RestPerf | `NA` | `Harvest generated`
Unit:
Type:
Base: | conf/restperf/9.12.0/nic_common.yaml | | ZapiPerf | `NA` | `Harvest generated`
Unit:
Type:
Base: | conf/zapiperf/cdot/9.8.0/nic_common.yaml | +The `nic_rx_percent` metric is visualized in the following Grafana dashboards: + +/// html | div.grafana-table +| Dashboard | Row | Type | Panel | +|--------|----------|--------|--------| +| ONTAP: Network | Ethernet | table | [NIC ports](/d/cdot-network/ontap3a-network?orgId=1&viewPanel=58) | +/// + ### nic_rx_total_errors @@ -9361,6 +9388,14 @@ Bytes sent percentage. | RestPerf | `NA` | `Harvest generated`
Unit:
Type:
Base: | conf/restperf/9.12.0/nic_common.yaml | | ZapiPerf | `NA` | `Harvest generated`
Unit:
Type:
Base: | conf/zapiperf/cdot/9.8.0/nic_common.yaml | +The `nic_tx_percent` metric is visualized in the following Grafana dashboards: + +/// html | div.grafana-table +| Dashboard | Row | Type | Panel | +|--------|----------|--------|--------| +| ONTAP: Network | Ethernet | table | [NIC ports](/d/cdot-network/ontap3a-network?orgId=1&viewPanel=58) | +/// + ### nic_tx_total_errors diff --git a/grafana/dashboards/cmode/network.json b/grafana/dashboards/cmode/network.json index 12052a3ba..2854f8f5a 100644 --- a/grafana/dashboards/cmode/network.json +++ b/grafana/dashboards/cmode/network.json @@ -922,20 +922,8 @@ "mode": "absolute", "steps": [ { - "color": "rgb(80, 220, 20)", + "color": "green", "value": null - }, - { - "color": "light-yellow", - "value": 1000000 - }, - { - "color": "semi-dark-orange", - "value": 10000000 - }, - { - "color": "semi-dark-red", - "value": 100000000 } ] }, @@ -991,7 +979,125 @@ { "id": "thresholds", "value": { - "mode": "absolute", + "mode": "percentage", + "steps": [ + { + "color": "rgb(80, 220, 20)", + "value": null + }, + { + "color": "light-yellow", + "value": 50 + }, + { + "color": "semi-dark-orange", + "value": 75 + }, + { + "color": "semi-dark-red", + "value": 90 + } + ] + } + }, + { + "id": "max", + "value": 1 + }, + { + "id": "min", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #F" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "gauge" + } + }, + { + "id": "displayName", + "value": "Send %" + }, + { + "id": "noValue", + "value": "n/a" + }, + { + "id": "thresholds", + "value": { + "mode": "percentage", + "steps": [ + { + "color": "rgb(80, 220, 20)", + "value": null + }, + { + "color": "light-yellow", + "value": 50 + }, + { + "color": "semi-dark-orange", + "value": 75 + }, + { + "color": "semi-dark-red", + "value": 90 + } + ] + } + }, + { + "id": "max", + "value": 1 + }, + { + "id": "min", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #G" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "gauge" + } + }, + { + "id": "displayName", + "value": "Receive %" + }, + { + "id": "noValue", + "value": "n/a" + }, + { + "id": "thresholds", + "value": { + "mode": "percentage", "steps": [ { "color": "rgb(80, 220, 20)", @@ -1139,13 +1245,6 @@ { "id": "displayName", "value": "Send" - }, - { - "id": "custom.cellOptions", - "value": { - "mode": "gradient", - "type": "gauge" - } } ] }, @@ -1158,13 +1257,6 @@ { "id": "displayName", "value": "Receive" - }, - { - "id": "custom.cellOptions", - "value": { - "mode": "gradient", - "type": "gauge" - } } ] }, @@ -1291,6 +1383,26 @@ "interval": "", "legendFormat": "", "refId": "E" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "nic_tx_percent{cluster=~\"$Cluster\",datacenter=~\"$Datacenter\",nic=~\"$Eth\",node=~\"$Node\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "F" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "nic_rx_percent{cluster=~\"$Cluster\",datacenter=~\"$Datacenter\",nic=~\"$Eth\",node=~\"$Node\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "G" } ], "title": "NIC ports", @@ -1308,6 +1420,8 @@ "Value #C", "Value #D", "Value #E", + "Value #F", + "Value #G", "cluster", "datacenter" ] @@ -1318,23 +1432,6 @@ "id": "merge", "options": {} }, - { - "id": "configFromData", - "options": { - "applyTo": { - "id": "byName", - "options": "Value #D" - }, - "configRefId": "A", - "mappings": [ - { - "fieldName": "speed", - "handlerKey": "threshold1", - "reducerId": "p50" - } - ] - } - }, { "id": "organize", "options": { @@ -1354,7 +1451,9 @@ "Value #B": 6, "Value #C": 7, "Value #D": 8, - "Value #E": 9, + "Value #E": 10, + "Value #F": 9, + "Value #G": 11, "cluster": 1, "datacenter": 0, "nic": 3, @@ -2291,20 +2390,8 @@ "mode": "absolute", "steps": [ { - "color": "rgb(80, 220, 20)", + "color": "green", "value": null - }, - { - "color": "light-yellow", - "value": 1000000 - }, - { - "color": "semi-dark-orange", - "value": 10000000 - }, - { - "color": "semi-dark-red", - "value": 100000000 } ] }, @@ -2312,26 +2399,6 @@ "unitScale": true }, "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Link Speed" - }, - "properties": [ - { - "id": "unit", - "value": "bps" - }, - { - "id": "custom.width", - "value": 170 - }, - { - "id": "decimals", - "value": 0 - } - ] - }, { "matcher": { "id": "byName", @@ -2411,10 +2478,6 @@ { "id": "displayName", "value": "Send" - }, - { - "id": "custom.displayMode", - "value": "gradient-gauge" } ] }, @@ -2427,10 +2490,124 @@ { "id": "displayName", "value": "Receive" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #A" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" }, { - "id": "custom.displayMode", - "value": "gradient-gauge" + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "gauge" + } + }, + { + "id": "displayName", + "value": "Send %" + }, + { + "id": "noValue", + "value": "n/a" + }, + { + "id": "thresholds", + "value": { + "mode": "percentage", + "steps": [ + { + "color": "rgb(80, 220, 20)", + "value": null + }, + { + "color": "light-yellow", + "value": 50 + }, + { + "color": "semi-dark-orange", + "value": 75 + }, + { + "color": "semi-dark-red", + "value": 90 + } + ] + } + }, + { + "id": "max", + "value": 1 + }, + { + "id": "min", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #B" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "gauge" + } + }, + { + "id": "displayName", + "value": "Receive %" + }, + { + "id": "noValue", + "value": "n/a" + }, + { + "id": "thresholds", + "value": { + "mode": "percentage", + "steps": [ + { + "color": "rgb(80, 220, 20)", + "value": null + }, + { + "color": "light-yellow", + "value": 50 + }, + { + "color": "semi-dark-orange", + "value": 75 + }, + { + "color": "semi-dark-red", + "value": 90 + } + ] + } + }, + { + "id": "max", + "value": 1 + }, + { + "id": "min", + "value": 0 } ] } @@ -2486,12 +2663,23 @@ }, { "editorMode": "code", - "expr": "nic_ifgrp_speed{cluster=~\"$Cluster\",datacenter=~\"$Datacenter\",node=~\"$Node\",ports!=\"\"}", + "expr": "nic_ifgrp_tx_perc{cluster=~\"$Cluster\",datacenter=~\"$Datacenter\",node=~\"$Node\",ports!=\"\"}", "format": "table", "hide": false, "instant": true, + "interval": "", "legendFormat": "", "refId": "A" + }, + { + "editorMode": "code", + "expr": "nic_ifgrp_rx_perc{cluster=~\"$Cluster\",datacenter=~\"$Datacenter\",node=~\"$Node\",ports!=\"\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "B" } ], "title": "Link Aggregation Groups", @@ -2504,11 +2692,12 @@ "node", "Value #D", "Value #E", + "Value #A", + "Value #B", "ifgroup", "ports", "datacenter", - "cluster", - "Value #A" + "cluster" ] } } @@ -2517,23 +2706,6 @@ "id": "merge", "options": {} }, - { - "id": "configFromData", - "options": { - "applyTo": { - "id": "byName", - "options": "Value #D" - }, - "configRefId": "A", - "mappings": [ - { - "fieldName": "speed", - "handlerKey": "threshold1", - "reducerId": "p50" - } - ] - } - }, { "id": "organize", "options": { @@ -2548,9 +2720,10 @@ }, "includeByName": {}, "indexByName": { - "Value #A": 4, + "Value #A": 7, + "Value #B": 9, "Value #D": 6, - "Value #E": 7, + "Value #E": 8, "cluster": 1, "datacenter": 0, "ifgroup": 3, @@ -2558,7 +2731,6 @@ "ports": 5 }, "renameByName": { - "Value #A": "Link Speed", "ifgroup": "LAG", "ports": "Ports" } diff --git a/mcp/metadata/ontap_metrics.json b/mcp/metadata/ontap_metrics.json index d716f38a7..4a3387cbc 100644 --- a/mcp/metadata/ontap_metrics.json +++ b/mcp/metadata/ontap_metrics.json @@ -549,8 +549,9 @@ "nfs_diag_storePool_StringAlloc": "Current number of string objects allocated.", "nfs_diag_storePool_StringMax": "Maximum number of string objects.", "nic_ifgrp_rx_bytes": "Link Aggregation Group (LAG) Bytes received.", - "nic_ifgrp_speed": "Link Aggregation Group (LAG) link speed.", + "nic_ifgrp_rx_perc": "Link Aggregation Group (LAG) Bytes received percentage.", "nic_ifgrp_tx_bytes": "Link Aggregation Group (LAG) Bytes sent.", + "nic_ifgrp_tx_perc": "Link Aggregation Group (LAG) Bytes sent percentage.", "nic_labels": "This metric provides information about NicCommon", "nic_link_up_to_downs": "Number of link state change from UP to DOWN.", "nic_new_status": "This metric indicates a value of 1 if the NIC state is up (indicating the NIC is operational) and a value of 0 for any other state.", From 6e2c09149dc72d4caa968519a1f98e5dc9e9f327 Mon Sep 17 00:00:00 2001 From: hardikl Date: Tue, 6 Jan 2026 15:35:52 +0530 Subject: [PATCH 4/5] feat: handled weighted avg percentage --- cmd/collectors/commonutils.go | 35 ++++++++++++------- cmd/collectors/restperf/plugins/nic/nic.go | 2 +- .../restperf/plugins/nic/nic_test.go | 28 +++++++++------ cmd/collectors/zapiperf/plugins/nic/nic.go | 2 +- 4 files changed, 42 insertions(+), 25 deletions(-) diff --git a/cmd/collectors/commonutils.go b/cmd/collectors/commonutils.go index 527a8f0b4..dacc2b950 100644 --- a/cmd/collectors/commonutils.go +++ b/cmd/collectors/commonutils.go @@ -35,12 +35,18 @@ type embedShelf struct { } type PortData struct { - Node string - Port string - Read float64 - Write float64 - ReadPerc float64 - WritePerc float64 + Node string + Port string + Read float64 + Write float64 + Speed float64 +} + +type IfgroData struct { + Key string + Read float64 + Write float64 + Speed float64 } // Reference https://kb.netapp.com/onprem/ontap/hardware/FAQ%3A_How_do_shelf_product_IDs_and_modules_in_ONTAP_map_to_a_model_of_a_shelf_or_storage_system_with_embedded_storage @@ -475,6 +481,7 @@ func AggregatePerScanner(logger *slog.Logger, data *matrix.Matrix, latencyKey st func PopulateIfgroupMetrics(portIfgroupMap map[string]string, portDataMap map[string]PortData, nData *matrix.Matrix, logger *slog.Logger) error { var err error + ifgrpMap := make(map[string]IfgroData) for portKey, ifgroupName := range portIfgroupMap { portInfo, ok := portDataMap[portKey] if !ok { @@ -484,8 +491,7 @@ func PopulateIfgroupMetrics(portIfgroupMap map[string]string, portDataMap map[st port := portInfo.Port readBytes := portInfo.Read writeBytes := portInfo.Write - readPerc := portInfo.ReadPerc - writePerc := portInfo.WritePerc + speed := portInfo.Speed ifgrpupInstanceKey := nodeName + ifgroupName ifgroupInstance := nData.GetInstance(ifgrpupInstanceKey) @@ -501,6 +507,7 @@ func PopulateIfgroupMetrics(portIfgroupMap map[string]string, portDataMap map[st } } + ifgrpMap[ifgrpupInstanceKey] = IfgroData{Key: ifgrpupInstanceKey, Read: readBytes + ifgrpMap[ifgrpupInstanceKey].Read, Write: writeBytes + ifgrpMap[ifgrpupInstanceKey].Write, Speed: speed + ifgrpMap[ifgrpupInstanceKey].Speed} // set labels ifgroupInstance.SetLabel("node", nodeName) ifgroupInstance.SetLabel("ifgroup", ifgroupName) @@ -520,14 +527,18 @@ func PopulateIfgroupMetrics(portIfgroupMap map[string]string, portDataMap map[st tx := nData.GetMetric("tx_bytes") txv, _ := tx.GetValueFloat64(ifgroupInstance) tx.SetValueFloat64(ifgroupInstance, writeBytes+txv) + } + + for key, ifgroupInstance := range nData.GetInstances() { + if !ifgroupInstance.IsExportable() { + continue + } rxp := nData.GetMetric("rx_perc") - rxpv, _ := rxp.GetValueFloat64(ifgroupInstance) - rxp.SetValueFloat64(ifgroupInstance, readPerc+rxpv) + rxp.SetValueFloat64(ifgroupInstance, ifgrpMap[key].Read/ifgrpMap[key].Speed) txp := nData.GetMetric("tx_perc") - txpv, _ := txp.GetValueFloat64(ifgroupInstance) - txp.SetValueFloat64(ifgroupInstance, writePerc+txpv) + txp.SetValueFloat64(ifgroupInstance, ifgrpMap[key].Write/ifgrpMap[key].Speed) } return nil } diff --git a/cmd/collectors/restperf/plugins/nic/nic.go b/cmd/collectors/restperf/plugins/nic/nic.go index 0765e423b..024b4b345 100644 --- a/cmd/collectors/restperf/plugins/nic/nic.go +++ b/cmd/collectors/restperf/plugins/nic/nic.go @@ -199,7 +199,7 @@ func (n *Nic) Run(dataMap map[string]*matrix.Matrix) ([]*matrix.Matrix, *collect tx.SetValueFloat64(instance, txPercent) } - portDataMap[nodeName+port] = collectors.PortData{Node: nodeName, Port: port, Read: rxBytes, Write: txBytes, ReadPerc: rxPercent, WritePerc: txPercent} + portDataMap[nodeName+port] = collectors.PortData{Node: nodeName, Port: port, Read: rxBytes, Write: txBytes, Speed: float64(speed)} if rxOk || txOk { utilPercent.SetValueFloat64(instance, math.Max(rxPercent, txPercent)) diff --git a/cmd/collectors/restperf/plugins/nic/nic_test.go b/cmd/collectors/restperf/plugins/nic/nic_test.go index 66b9198f4..b9220f66b 100644 --- a/cmd/collectors/restperf/plugins/nic/nic_test.go +++ b/cmd/collectors/restperf/plugins/nic/nic_test.go @@ -10,6 +10,7 @@ import ( "github.com/netapp/harvest/v2/pkg/matrix" "github.com/netapp/harvest/v2/pkg/tree/node" "log/slog" + "math" "testing" ) @@ -75,13 +76,13 @@ func runNicTest(t *testing.T, createRestNic func(params *node.Node) plugin.Plugi instanceB4, _ := data.NewInstance("rtp-a700s-01:f5y") instanceB4.SetLabel("id", "rtp-a700s-01:f5y") - instanceB4.SetLabel("speed", "10000M") + instanceB4.SetLabel("speed", "20000M") instanceB4.SetLabel("node", "rtp-a700s-01") instanceB4.SetLabel("type", "nic_ixl") instanceB5, _ := data.NewInstance("rtp-a700s-01:f5z") instanceB5.SetLabel("id", "rtp-a700s-01:f5z") - instanceB5.SetLabel("speed", "10000M") + instanceB5.SetLabel("speed", "30000M") instanceB5.SetLabel("node", "rtp-a700s-01") instanceB5.SetLabel("type", "nic_ixl") @@ -108,17 +109,17 @@ func runNicTest(t *testing.T, createRestNic func(params *node.Node) plugin.Plugi receiveBytes.SetValueFloat64(instanceB1, 2861802356977) transmitBytes.SetValueFloat64(instanceB1, 5789662182305) - receiveBytes.SetValueFloat64(instanceB2, 2861802356977) - transmitBytes.SetValueFloat64(instanceB2, 5789662182305) + receiveBytes.SetValueFloat64(instanceB2, 5000000000) + transmitBytes.SetValueFloat64(instanceB2, 90000000000) - receiveBytes.SetValueFloat64(instanceB3, 2861802356977) - transmitBytes.SetValueFloat64(instanceB3, 5789662182305) + receiveBytes.SetValueFloat64(instanceB3, 5000000000) + transmitBytes.SetValueFloat64(instanceB3, 90000000000) - receiveBytes.SetValueFloat64(instanceB4, 2861802356977) - transmitBytes.SetValueFloat64(instanceB4, 5789662182305) + receiveBytes.SetValueFloat64(instanceB4, 5000000000) + transmitBytes.SetValueFloat64(instanceB4, 90000000000) - receiveBytes.SetValueFloat64(instanceB5, 2861802356977) - transmitBytes.SetValueFloat64(instanceB5, 5789662182305) + receiveBytes.SetValueFloat64(instanceB5, 5000000000) + transmitBytes.SetValueFloat64(instanceB5, 90000000000) dataMap := map[string]*matrix.Matrix{ "nic": data, @@ -156,7 +157,12 @@ func runNicTest(t *testing.T, createRestNic func(params *node.Node) plugin.Plugi value, ok := ifgroupData.GetMetric("rx_bytes").GetValueFloat64(ifgroupInstance1) assert.True(t, ok) - assert.Equal(t, value, 11447209427908.0) + assert.Equal(t, value, 20000000000.0) + + readPercVal, _ := ifgroupData.GetMetric("rx_perc").GetValueFloat64(ifgroupInstance1) + assert.Equal(t, math.Round(readPercVal*100)/100, 2.29) + writePercVal, _ := ifgroupData.GetMetric("tx_perc").GetValueFloat64(ifgroupInstance1) + assert.Equal(t, math.Round(writePercVal*100)/100, 41.14) } func TestRunForAllImplementations(t *testing.T) { diff --git a/cmd/collectors/zapiperf/plugins/nic/nic.go b/cmd/collectors/zapiperf/plugins/nic/nic.go index bebddbd44..6c7e147eb 100644 --- a/cmd/collectors/zapiperf/plugins/nic/nic.go +++ b/cmd/collectors/zapiperf/plugins/nic/nic.go @@ -177,7 +177,7 @@ func (n *Nic) Run(dataMap map[string]*matrix.Matrix) ([]*matrix.Matrix, *collect tx.SetValueFloat64(instance, txPercent) } - portDataMap[nodeName+port] = collectors.PortData{Node: nodeName, Port: port, Read: rxBytes, Write: txBytes, ReadPerc: rxPercent, WritePerc: txPercent} + portDataMap[nodeName+port] = collectors.PortData{Node: nodeName, Port: port, Read: rxBytes, Write: txBytes, Speed: float64(speed)} if rxOk || txOk { utilPercent.SetValueFloat64(instance, math.Max(rxPercent, txPercent)) From 019d42eaf21788be05e13b0fcfe4f4cc54ddecb1 Mon Sep 17 00:00:00 2001 From: hardikl Date: Wed, 7 Jan 2026 13:50:10 +0530 Subject: [PATCH 5/5] feat: handled review comment --- cmd/collectors/commonutils.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/collectors/commonutils.go b/cmd/collectors/commonutils.go index dacc2b950..09bed593e 100644 --- a/cmd/collectors/commonutils.go +++ b/cmd/collectors/commonutils.go @@ -42,7 +42,7 @@ type PortData struct { Speed float64 } -type IfgroData struct { +type IfgrpData struct { Key string Read float64 Write float64 @@ -481,7 +481,7 @@ func AggregatePerScanner(logger *slog.Logger, data *matrix.Matrix, latencyKey st func PopulateIfgroupMetrics(portIfgroupMap map[string]string, portDataMap map[string]PortData, nData *matrix.Matrix, logger *slog.Logger) error { var err error - ifgrpMap := make(map[string]IfgroData) + ifgrpMap := make(map[string]IfgrpData) for portKey, ifgroupName := range portIfgroupMap { portInfo, ok := portDataMap[portKey] if !ok { @@ -507,7 +507,7 @@ func PopulateIfgroupMetrics(portIfgroupMap map[string]string, portDataMap map[st } } - ifgrpMap[ifgrpupInstanceKey] = IfgroData{Key: ifgrpupInstanceKey, Read: readBytes + ifgrpMap[ifgrpupInstanceKey].Read, Write: writeBytes + ifgrpMap[ifgrpupInstanceKey].Write, Speed: speed + ifgrpMap[ifgrpupInstanceKey].Speed} + ifgrpMap[ifgrpupInstanceKey] = IfgrpData{Key: ifgrpupInstanceKey, Read: readBytes + ifgrpMap[ifgrpupInstanceKey].Read, Write: writeBytes + ifgrpMap[ifgrpupInstanceKey].Write, Speed: speed + ifgrpMap[ifgrpupInstanceKey].Speed} // set labels ifgroupInstance.SetLabel("node", nodeName) ifgroupInstance.SetLabel("ifgroup", ifgroupName)