diff --git a/cmd/collectors/commonutils.go b/cmd/collectors/commonutils.go index 8214f24d8..09bed593e 100644 --- a/cmd/collectors/commonutils.go +++ b/cmd/collectors/commonutils.go @@ -39,6 +39,14 @@ type PortData struct { Port string Read float64 Write float64 + Speed float64 +} + +type IfgrpData struct { + Key string + Read float64 + Write float64 + Speed float64 } // Reference https://kb.netapp.com/onprem/ontap/hardware/FAQ%3A_How_do_shelf_product_IDs_and_modules_in_ONTAP_map_to_a_model_of_a_shelf_or_storage_system_with_embedded_storage @@ -473,6 +481,7 @@ func AggregatePerScanner(logger *slog.Logger, data *matrix.Matrix, latencyKey st func PopulateIfgroupMetrics(portIfgroupMap map[string]string, portDataMap map[string]PortData, nData *matrix.Matrix, logger *slog.Logger) error { var err error + ifgrpMap := make(map[string]IfgrpData) for portKey, ifgroupName := range portIfgroupMap { portInfo, ok := portDataMap[portKey] if !ok { @@ -482,6 +491,7 @@ func PopulateIfgroupMetrics(portIfgroupMap map[string]string, portDataMap map[st port := portInfo.Port readBytes := portInfo.Read writeBytes := portInfo.Write + speed := portInfo.Speed ifgrpupInstanceKey := nodeName + ifgroupName ifgroupInstance := nData.GetInstance(ifgrpupInstanceKey) @@ -497,6 +507,7 @@ func PopulateIfgroupMetrics(portIfgroupMap map[string]string, portDataMap map[st } } + ifgrpMap[ifgrpupInstanceKey] = IfgrpData{Key: ifgrpupInstanceKey, Read: readBytes + ifgrpMap[ifgrpupInstanceKey].Read, Write: writeBytes + ifgrpMap[ifgrpupInstanceKey].Write, Speed: speed + ifgrpMap[ifgrpupInstanceKey].Speed} // set labels ifgroupInstance.SetLabel("node", nodeName) ifgroupInstance.SetLabel("ifgroup", ifgroupName) @@ -517,6 +528,18 @@ func PopulateIfgroupMetrics(portIfgroupMap map[string]string, portDataMap map[st txv, _ := tx.GetValueFloat64(ifgroupInstance) tx.SetValueFloat64(ifgroupInstance, writeBytes+txv) } + + for key, ifgroupInstance := range nData.GetInstances() { + if !ifgroupInstance.IsExportable() { + continue + } + + rxp := nData.GetMetric("rx_perc") + rxp.SetValueFloat64(ifgroupInstance, ifgrpMap[key].Read/ifgrpMap[key].Speed) + + txp := nData.GetMetric("tx_perc") + txp.SetValueFloat64(ifgroupInstance, ifgrpMap[key].Write/ifgrpMap[key].Speed) + } return nil } diff --git a/cmd/collectors/restperf/plugins/nic/nic.go b/cmd/collectors/restperf/plugins/nic/nic.go index 7206a7190..024b4b345 100644 --- a/cmd/collectors/restperf/plugins/nic/nic.go +++ b/cmd/collectors/restperf/plugins/nic/nic.go @@ -43,6 +43,8 @@ type Nic struct { var ifgrpMetrics = []string{ "rx_bytes", "tx_bytes", + "rx_perc", + "tx_perc", } func New(p *plugin.AbstractPlugin) plugin.Plugin { @@ -197,7 +199,7 @@ func (n *Nic) Run(dataMap map[string]*matrix.Matrix) ([]*matrix.Matrix, *collect tx.SetValueFloat64(instance, txPercent) } - portDataMap[nodeName+port] = collectors.PortData{Node: nodeName, Port: port, Read: rxBytes, Write: txBytes} + portDataMap[nodeName+port] = collectors.PortData{Node: nodeName, Port: port, Read: rxBytes, Write: txBytes, Speed: float64(speed)} if rxOk || txOk { utilPercent.SetValueFloat64(instance, math.Max(rxPercent, txPercent)) diff --git a/cmd/collectors/restperf/plugins/nic/nic_test.go b/cmd/collectors/restperf/plugins/nic/nic_test.go index 66b9198f4..b9220f66b 100644 --- a/cmd/collectors/restperf/plugins/nic/nic_test.go +++ b/cmd/collectors/restperf/plugins/nic/nic_test.go @@ -10,6 +10,7 @@ import ( "github.com/netapp/harvest/v2/pkg/matrix" "github.com/netapp/harvest/v2/pkg/tree/node" "log/slog" + "math" "testing" ) @@ -75,13 +76,13 @@ func runNicTest(t *testing.T, createRestNic func(params *node.Node) plugin.Plugi instanceB4, _ := data.NewInstance("rtp-a700s-01:f5y") instanceB4.SetLabel("id", "rtp-a700s-01:f5y") - instanceB4.SetLabel("speed", "10000M") + instanceB4.SetLabel("speed", "20000M") instanceB4.SetLabel("node", "rtp-a700s-01") instanceB4.SetLabel("type", "nic_ixl") instanceB5, _ := data.NewInstance("rtp-a700s-01:f5z") instanceB5.SetLabel("id", "rtp-a700s-01:f5z") - instanceB5.SetLabel("speed", "10000M") + instanceB5.SetLabel("speed", "30000M") instanceB5.SetLabel("node", "rtp-a700s-01") instanceB5.SetLabel("type", "nic_ixl") @@ -108,17 +109,17 @@ func runNicTest(t *testing.T, createRestNic func(params *node.Node) plugin.Plugi receiveBytes.SetValueFloat64(instanceB1, 2861802356977) transmitBytes.SetValueFloat64(instanceB1, 5789662182305) - receiveBytes.SetValueFloat64(instanceB2, 2861802356977) - transmitBytes.SetValueFloat64(instanceB2, 5789662182305) + receiveBytes.SetValueFloat64(instanceB2, 5000000000) + transmitBytes.SetValueFloat64(instanceB2, 90000000000) - receiveBytes.SetValueFloat64(instanceB3, 2861802356977) - transmitBytes.SetValueFloat64(instanceB3, 5789662182305) + receiveBytes.SetValueFloat64(instanceB3, 5000000000) + transmitBytes.SetValueFloat64(instanceB3, 90000000000) - receiveBytes.SetValueFloat64(instanceB4, 2861802356977) - transmitBytes.SetValueFloat64(instanceB4, 5789662182305) + receiveBytes.SetValueFloat64(instanceB4, 5000000000) + transmitBytes.SetValueFloat64(instanceB4, 90000000000) - receiveBytes.SetValueFloat64(instanceB5, 2861802356977) - transmitBytes.SetValueFloat64(instanceB5, 5789662182305) + receiveBytes.SetValueFloat64(instanceB5, 5000000000) + transmitBytes.SetValueFloat64(instanceB5, 90000000000) dataMap := map[string]*matrix.Matrix{ "nic": data, @@ -156,7 +157,12 @@ func runNicTest(t *testing.T, createRestNic func(params *node.Node) plugin.Plugi value, ok := ifgroupData.GetMetric("rx_bytes").GetValueFloat64(ifgroupInstance1) assert.True(t, ok) - assert.Equal(t, value, 11447209427908.0) + assert.Equal(t, value, 20000000000.0) + + readPercVal, _ := ifgroupData.GetMetric("rx_perc").GetValueFloat64(ifgroupInstance1) + assert.Equal(t, math.Round(readPercVal*100)/100, 2.29) + writePercVal, _ := ifgroupData.GetMetric("tx_perc").GetValueFloat64(ifgroupInstance1) + assert.Equal(t, math.Round(writePercVal*100)/100, 41.14) } func TestRunForAllImplementations(t *testing.T) { diff --git a/cmd/collectors/zapiperf/plugins/nic/nic.go b/cmd/collectors/zapiperf/plugins/nic/nic.go index 41b238c47..6c7e147eb 100644 --- a/cmd/collectors/zapiperf/plugins/nic/nic.go +++ b/cmd/collectors/zapiperf/plugins/nic/nic.go @@ -42,6 +42,8 @@ type Nic struct { var ifgrpMetrics = []string{ "rx_bytes", "tx_bytes", + "rx_perc", + "tx_perc", } func New(p *plugin.AbstractPlugin) plugin.Plugin { @@ -175,7 +177,7 @@ func (n *Nic) Run(dataMap map[string]*matrix.Matrix) ([]*matrix.Matrix, *collect tx.SetValueFloat64(instance, txPercent) } - portDataMap[nodeName+port] = collectors.PortData{Node: nodeName, Port: port, Read: rxBytes, Write: txBytes} + portDataMap[nodeName+port] = collectors.PortData{Node: nodeName, Port: port, Read: rxBytes, Write: txBytes, Speed: float64(speed)} if rxOk || txOk { utilPercent.SetValueFloat64(instance, math.Max(rxPercent, txPercent)) diff --git a/cmd/tools/generate/counter.yaml b/cmd/tools/generate/counter.yaml index 41d9605fd..f1cccae48 100644 --- a/cmd/tools/generate/counter.yaml +++ b/cmd/tools/generate/counter.yaml @@ -1895,6 +1895,30 @@ counters: ONTAPCounter: Harvest generated Template: conf/zapiperf/cdot/9.8.0/nic_common.yaml + - Name: nic_ifgrp_rx_perc + Description: Link Aggregation Group (LAG) Bytes received percentage. + APIs: + - API: RestPerf + Endpoint: NA + ONTAPCounter: Harvest generated + Template: conf/restperf/9.12.0/nic_common.yaml + - API: ZapiPerf + Endpoint: NA + ONTAPCounter: Harvest generated + Template: conf/zapiperf/cdot/9.8.0/nic_common.yaml + + - Name: nic_ifgrp_tx_perc + Description: Link Aggregation Group (LAG) Bytes sent percentage. + APIs: + - API: RestPerf + Endpoint: NA + ONTAPCounter: Harvest generated + Template: conf/restperf/9.12.0/nic_common.yaml + - API: ZapiPerf + Endpoint: NA + ONTAPCounter: Harvest generated + Template: conf/zapiperf/cdot/9.8.0/nic_common.yaml + - Name: nic_rx_percent Description: Bytes received percentage. APIs: diff --git a/docs/ontap-metrics.md b/docs/ontap-metrics.md index afa4f232b..a38ba4033 100644 --- a/docs/ontap-metrics.md +++ b/docs/ontap-metrics.md @@ -7,7 +7,7 @@ These can be generated on demand by running `bin/harvest grafana metrics`. See - More information about ONTAP REST performance counters can be found [here](https://docs.netapp.com/us-en/ontap-pcmap-9121/index.html). ``` -Creation Date : 2025-Dec-08 +Creation Date : 2025-Dec-11 ONTAP Version: 9.16.1 ``` @@ -9071,6 +9071,25 @@ The `nic_ifgrp_rx_bytes` metric is visualized in the following Grafana dashboard +### nic_ifgrp_rx_perc + +Link Aggregation Group (LAG) Bytes received percentage. + +| API | Endpoint | Metric | Template | +|--------|----------|--------|---------| +| RestPerf | `NA` | `Harvest generated`
Unit:
Type:
Base: | conf/restperf/9.12.0/nic_common.yaml | +| ZapiPerf | `NA` | `Harvest generated`
Unit:
Type:
Base: | conf/zapiperf/cdot/9.8.0/nic_common.yaml | + +The `nic_ifgrp_rx_perc` metric is visualized in the following Grafana dashboards: + +/// html | div.grafana-table +| Dashboard | Row | Type | Panel | +|--------|----------|--------|--------| +| ONTAP: Network | Link Aggregation Group (LAG) | table | [Link Aggregation Groups](/d/cdot-network/ontap3a-network?orgId=1&viewPanel=122) | +/// + + + ### nic_ifgrp_tx_bytes Link Aggregation Group (LAG) Bytes sent. @@ -9091,6 +9110,25 @@ The `nic_ifgrp_tx_bytes` metric is visualized in the following Grafana dashboard +### nic_ifgrp_tx_perc + +Link Aggregation Group (LAG) Bytes sent percentage. + +| API | Endpoint | Metric | Template | +|--------|----------|--------|---------| +| RestPerf | `NA` | `Harvest generated`
Unit:
Type:
Base: | conf/restperf/9.12.0/nic_common.yaml | +| ZapiPerf | `NA` | `Harvest generated`
Unit:
Type:
Base: | conf/zapiperf/cdot/9.8.0/nic_common.yaml | + +The `nic_ifgrp_tx_perc` metric is visualized in the following Grafana dashboards: + +/// html | div.grafana-table +| Dashboard | Row | Type | Panel | +|--------|----------|--------|--------| +| ONTAP: Network | Link Aggregation Group (LAG) | table | [Link Aggregation Groups](/d/cdot-network/ontap3a-network?orgId=1&viewPanel=122) | +/// + + + ### nic_labels This metric provides information about NicCommon @@ -9256,6 +9294,14 @@ Bytes received percentage. | RestPerf | `NA` | `Harvest generated`
Unit:
Type:
Base: | conf/restperf/9.12.0/nic_common.yaml | | ZapiPerf | `NA` | `Harvest generated`
Unit:
Type:
Base: | conf/zapiperf/cdot/9.8.0/nic_common.yaml | +The `nic_rx_percent` metric is visualized in the following Grafana dashboards: + +/// html | div.grafana-table +| Dashboard | Row | Type | Panel | +|--------|----------|--------|--------| +| ONTAP: Network | Ethernet | table | [NIC ports](/d/cdot-network/ontap3a-network?orgId=1&viewPanel=58) | +/// + ### nic_rx_total_errors @@ -9342,6 +9388,14 @@ Bytes sent percentage. | RestPerf | `NA` | `Harvest generated`
Unit:
Type:
Base: | conf/restperf/9.12.0/nic_common.yaml | | ZapiPerf | `NA` | `Harvest generated`
Unit:
Type:
Base: | conf/zapiperf/cdot/9.8.0/nic_common.yaml | +The `nic_tx_percent` metric is visualized in the following Grafana dashboards: + +/// html | div.grafana-table +| Dashboard | Row | Type | Panel | +|--------|----------|--------|--------| +| ONTAP: Network | Ethernet | table | [NIC ports](/d/cdot-network/ontap3a-network?orgId=1&viewPanel=58) | +/// + ### nic_tx_total_errors diff --git a/grafana/dashboards/cmode/network.json b/grafana/dashboards/cmode/network.json index c858c7984..2854f8f5a 100644 --- a/grafana/dashboards/cmode/network.json +++ b/grafana/dashboards/cmode/network.json @@ -922,20 +922,8 @@ "mode": "absolute", "steps": [ { - "color": "rgb(80, 220, 20)", + "color": "green", "value": null - }, - { - "color": "light-yellow", - "value": 1000000 - }, - { - "color": "semi-dark-orange", - "value": 10000000 - }, - { - "color": "semi-dark-red", - "value": 100000000 } ] }, @@ -991,7 +979,125 @@ { "id": "thresholds", "value": { - "mode": "absolute", + "mode": "percentage", + "steps": [ + { + "color": "rgb(80, 220, 20)", + "value": null + }, + { + "color": "light-yellow", + "value": 50 + }, + { + "color": "semi-dark-orange", + "value": 75 + }, + { + "color": "semi-dark-red", + "value": 90 + } + ] + } + }, + { + "id": "max", + "value": 1 + }, + { + "id": "min", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #F" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "gauge" + } + }, + { + "id": "displayName", + "value": "Send %" + }, + { + "id": "noValue", + "value": "n/a" + }, + { + "id": "thresholds", + "value": { + "mode": "percentage", + "steps": [ + { + "color": "rgb(80, 220, 20)", + "value": null + }, + { + "color": "light-yellow", + "value": 50 + }, + { + "color": "semi-dark-orange", + "value": 75 + }, + { + "color": "semi-dark-red", + "value": 90 + } + ] + } + }, + { + "id": "max", + "value": 1 + }, + { + "id": "min", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #G" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "gauge" + } + }, + { + "id": "displayName", + "value": "Receive %" + }, + { + "id": "noValue", + "value": "n/a" + }, + { + "id": "thresholds", + "value": { + "mode": "percentage", "steps": [ { "color": "rgb(80, 220, 20)", @@ -1139,13 +1245,6 @@ { "id": "displayName", "value": "Send" - }, - { - "id": "custom.cellOptions", - "value": { - "mode": "gradient", - "type": "gauge" - } } ] }, @@ -1158,13 +1257,6 @@ { "id": "displayName", "value": "Receive" - }, - { - "id": "custom.cellOptions", - "value": { - "mode": "gradient", - "type": "gauge" - } } ] }, @@ -1291,6 +1383,26 @@ "interval": "", "legendFormat": "", "refId": "E" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "nic_tx_percent{cluster=~\"$Cluster\",datacenter=~\"$Datacenter\",nic=~\"$Eth\",node=~\"$Node\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "F" + }, + { + "datasource": "${DS_PROMETHEUS}", + "expr": "nic_rx_percent{cluster=~\"$Cluster\",datacenter=~\"$Datacenter\",nic=~\"$Eth\",node=~\"$Node\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "G" } ], "title": "NIC ports", @@ -1308,6 +1420,8 @@ "Value #C", "Value #D", "Value #E", + "Value #F", + "Value #G", "cluster", "datacenter" ] @@ -1337,7 +1451,9 @@ "Value #B": 6, "Value #C": 7, "Value #D": 8, - "Value #E": 9, + "Value #E": 10, + "Value #F": 9, + "Value #G": 11, "cluster": 1, "datacenter": 0, "nic": 3, @@ -2274,20 +2390,8 @@ "mode": "absolute", "steps": [ { - "color": "rgb(80, 220, 20)", + "color": "green", "value": null - }, - { - "color": "light-yellow", - "value": 1000000 - }, - { - "color": "semi-dark-orange", - "value": 10000000 - }, - { - "color": "semi-dark-red", - "value": 100000000 } ] }, @@ -2374,10 +2478,6 @@ { "id": "displayName", "value": "Send" - }, - { - "id": "custom.displayMode", - "value": "gradient-gauge" } ] }, @@ -2390,10 +2490,124 @@ { "id": "displayName", "value": "Receive" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #A" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" }, { - "id": "custom.displayMode", - "value": "gradient-gauge" + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "gauge" + } + }, + { + "id": "displayName", + "value": "Send %" + }, + { + "id": "noValue", + "value": "n/a" + }, + { + "id": "thresholds", + "value": { + "mode": "percentage", + "steps": [ + { + "color": "rgb(80, 220, 20)", + "value": null + }, + { + "color": "light-yellow", + "value": 50 + }, + { + "color": "semi-dark-orange", + "value": 75 + }, + { + "color": "semi-dark-red", + "value": 90 + } + ] + } + }, + { + "id": "max", + "value": 1 + }, + { + "id": "min", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #B" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + }, + { + "id": "custom.cellOptions", + "value": { + "mode": "gradient", + "type": "gauge" + } + }, + { + "id": "displayName", + "value": "Receive %" + }, + { + "id": "noValue", + "value": "n/a" + }, + { + "id": "thresholds", + "value": { + "mode": "percentage", + "steps": [ + { + "color": "rgb(80, 220, 20)", + "value": null + }, + { + "color": "light-yellow", + "value": 50 + }, + { + "color": "semi-dark-orange", + "value": 75 + }, + { + "color": "semi-dark-red", + "value": 90 + } + ] + } + }, + { + "id": "max", + "value": 1 + }, + { + "id": "min", + "value": 0 } ] } @@ -2446,6 +2660,26 @@ "interval": "", "legendFormat": "", "refId": "E" + }, + { + "editorMode": "code", + "expr": "nic_ifgrp_tx_perc{cluster=~\"$Cluster\",datacenter=~\"$Datacenter\",node=~\"$Node\",ports!=\"\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "editorMode": "code", + "expr": "nic_ifgrp_rx_perc{cluster=~\"$Cluster\",datacenter=~\"$Datacenter\",node=~\"$Node\",ports!=\"\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "B" } ], "title": "Link Aggregation Groups", @@ -2458,6 +2692,8 @@ "node", "Value #D", "Value #E", + "Value #A", + "Value #B", "ifgroup", "ports", "datacenter", @@ -2484,13 +2720,15 @@ }, "includeByName": {}, "indexByName": { - "Value #D": 5, - "Value #E": 6, + "Value #A": 7, + "Value #B": 9, + "Value #D": 6, + "Value #E": 8, "cluster": 1, "datacenter": 0, "ifgroup": 3, "node": 2, - "ports": 4 + "ports": 5 }, "renameByName": { "ifgroup": "LAG", @@ -5256,5 +5494,5 @@ "timezone": "", "title": "ONTAP: Network", "uid": "cdot-network", - "version": 13 + "version": 14 } diff --git a/mcp/metadata/ontap_metrics.json b/mcp/metadata/ontap_metrics.json index 06a838ead..4a3387cbc 100644 --- a/mcp/metadata/ontap_metrics.json +++ b/mcp/metadata/ontap_metrics.json @@ -549,7 +549,9 @@ "nfs_diag_storePool_StringAlloc": "Current number of string objects allocated.", "nfs_diag_storePool_StringMax": "Maximum number of string objects.", "nic_ifgrp_rx_bytes": "Link Aggregation Group (LAG) Bytes received.", + "nic_ifgrp_rx_perc": "Link Aggregation Group (LAG) Bytes received percentage.", "nic_ifgrp_tx_bytes": "Link Aggregation Group (LAG) Bytes sent.", + "nic_ifgrp_tx_perc": "Link Aggregation Group (LAG) Bytes sent percentage.", "nic_labels": "This metric provides information about NicCommon", "nic_link_up_to_downs": "Number of link state change from UP to DOWN.", "nic_new_status": "This metric indicates a value of 1 if the NIC state is up (indicating the NIC is operational) and a value of 0 for any other state.",