From b583159fe0b56c5d859d424cb90eacaea7f41115 Mon Sep 17 00:00:00 2001 From: rodrodsilo Date: Mon, 9 Feb 2026 13:23:38 +0200 Subject: [PATCH] GPU metrics collected from pod directly --- .../templates/collectors-metrics-rest.yaml | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/sources/otel-lgtm-stack/v1.0.7/templates/collectors-metrics-rest.yaml b/sources/otel-lgtm-stack/v1.0.7/templates/collectors-metrics-rest.yaml index 78976467..0857538b 100644 --- a/sources/otel-lgtm-stack/v1.0.7/templates/collectors-metrics-rest.yaml +++ b/sources/otel-lgtm-stack/v1.0.7/templates/collectors-metrics-rest.yaml @@ -61,21 +61,24 @@ spec: scrape_timeout: 10s - job_name: gpu-operator-metrics-exporter kubernetes_sd_configs: - - role: node + - role: pod + namespaces: + names: + - kube-amd-gpu metrics_path: /metrics relabel_configs: - - action: keep - regex: true - source_labels: - - __meta_kubernetes_node_label_feature_node_kubernetes_io_amd_gpu - - regex: (.+) - replacement: $1:32500 - source_labels: - - __meta_kubernetes_node_address_InternalIP - target_label: __address__ - - source_labels: - - __meta_kubernetes_node_name - target_label: hostname + - action: keep + regex: gpu-operator-metrics-exporter.* + source_labels: + - __meta_kubernetes_pod_name + - source_labels: + - __meta_kubernetes_pod_ip + regex: (.+) + replacement: $1:5000 + target_label: __address__ + - source_labels: + - __meta_kubernetes_pod_node_name + target_label: hostname - job_name: minio-cluster-metrics metrics_path: /minio/v2/metrics/cluster scheme: http