From 21c821ceb50dfa5d675c6513092bdf9f2320ac9e Mon Sep 17 00:00:00 2001
From: Kamil Kisiela <kamil.kisiela@gmail.com>
Date: Fri, 13 Feb 2026 11:20:44 +0100
Subject: [PATCH 01/14] metrics: wip

---
 .../components/otel-metrics/label-card.tsx    |  60 ++
 .../components/otel-metrics/metric-card.tsx   |  95 +++
 .../otel-metrics/metrics-section.tsx          |  55 ++
 .../content/router/observability/metrics.mdx  | 637 ++++++++++++++++++
 4 files changed, 847 insertions(+)
 create mode 100644 packages/web/docs/src/components/otel-metrics/label-card.tsx
 create mode 100644 packages/web/docs/src/components/otel-metrics/metric-card.tsx
 create mode 100644 packages/web/docs/src/components/otel-metrics/metrics-section.tsx
 create mode 100644 packages/web/docs/src/content/router/observability/metrics.mdx
diff --git a/packages/web/docs/src/components/otel-metrics/label-card.tsx b/packages/web/docs/src/components/otel-metrics/label-card.tsx
new file mode 100644
index 00000000000..357a0ed020d
--- /dev/null
+++ b/packages/web/docs/src/components/otel-metrics/label-card.tsx
@@ -0,0 +1,60 @@
+import { Info, Lightbulb, Tag } from 'lucide-react';
+
+interface LabelCardProps {
+  name: string;
+  meaning: string;
+  typicalValues: string[];
+  notes?: string;
+}
+
+export function LabelCard({ name, meaning, typicalValues, notes }: LabelCardProps) {
+  return (
+    <div className="overflow-hidden rounded-lg border border-gray-200 bg-white transition-shadow duration-200 hover:shadow-md dark:border-neutral-800 dark:bg-neutral-900 dark:hover:shadow-black/30">
+      <div className="p-5">
+        <div className="mb-3 flex items-start gap-3">
+          <div className="shrink-0 rounded-md border border-gray-200 bg-gray-100 p-1.5 dark:border-neutral-700 dark:bg-neutral-800">
+            <Tag className="h-4 w-4 text-gray-600 dark:text-slate-100" />
+          </div>
+          <div className="min-w-0 flex-1">
+            <code className="break-all text-sm font-semibold text-gray-900 dark:text-slate-100">
+              {name}
+            </code>
+            <p className="mt-1 text-sm leading-relaxed text-gray-600 dark:text-slate-100">
+              {meaning}
+            </p>
+          </div>
+        </div>
+
+        <div className="mt-4 space-y-3">
+          <div>
+            <div className="mb-2 flex items-center gap-1.5">
+              <Info className="h-3.5 w-3.5 text-gray-500 dark:text-slate-400" />
+              <span className="text-xs font-semibold uppercase text-gray-700 dark:text-slate-100">
+                Typical Values
+              </span>
+            </div>
+            <div className="flex flex-wrap gap-1.5">
+              {typicalValues.map(value => (
+                <code
+                  key={value}
+                  className="rounded-md border border-slate-200 bg-slate-50 px-2.5 py-1 text-xs font-medium text-slate-700 dark:border-neutral-700 dark:bg-neutral-800 dark:text-slate-200"
+                >
+                  {value}
+                </code>
+              ))}
+            </div>
+          </div>
+
+          {notes && (
+            <div className="border-t border-gray-100 pt-3 dark:border-neutral-800">
+              <div className="flex items-start gap-2">
+                <Lightbulb className="mt-0.5 h-3.5 w-3.5 shrink-0 text-amber-600 dark:text-amber-400" />
+                <p className="text-sm leading-relaxed text-gray-600 dark:text-slate-100">{notes}</p>
+              </div>
+            </div>
+          )}
+        </div>
+      </div>
+    </div>
+  );
+}
diff --git a/packages/web/docs/src/components/otel-metrics/metric-card.tsx b/packages/web/docs/src/components/otel-metrics/metric-card.tsx
new file mode 100644
index 00000000000..b31d81488f6
--- /dev/null
+++ b/packages/web/docs/src/components/otel-metrics/metric-card.tsx
@@ -0,0 +1,95 @@
+import { Activity, BarChart3, Gauge, TrendingUp } from 'lucide-react';
+
+interface MetricCardProps {
+  name: string;
+  type: 'Counter' | 'Histogram' | 'UpDownCounter' | 'Gauge';
+  unit?: string;
+  description?: string;
+  labels?: string[];
+}
+
+const typeConfig = {
+  Counter: {
+    icon: TrendingUp,
+    color:
+      'bg-emerald-50 text-emerald-700 border-emerald-200 dark:bg-emerald-900/30 dark:text-emerald-300 dark:border-emerald-700/50',
+    badge: 'bg-emerald-100 text-emerald-800',
+  },
+  Histogram: {
+    icon: BarChart3,
+    color:
+      'bg-blue-50 text-blue-700 border-blue-200 dark:bg-blue-900/30 dark:text-blue-300 dark:border-blue-700/50',
+    badge: 'bg-blue-100 text-blue-800',
+  },
+  UpDownCounter: {
+    icon: Activity,
+    color:
+      'bg-amber-50 text-amber-700 border-amber-200 dark:bg-amber-900/30 dark:text-amber-300 dark:border-amber-700/50',
+    badge: 'bg-amber-100 text-amber-800',
+  },
+  Gauge: {
+    icon: Gauge,
+    color:
+      'bg-slate-50 text-slate-700 border-slate-200 dark:bg-slate-800/60 dark:text-slate-100 dark:border-slate-700',
+    badge: 'bg-slate-100 text-slate-800',
+  },
+};
+
+export function MetricCard({ name, type, unit, description, labels }: MetricCardProps) {
+  const config = typeConfig[type];
+  const Icon = config.icon;
+
+  return (
+    <div className="overflow-hidden rounded-lg border border-gray-200 bg-white transition-shadow duration-200 hover:shadow-md dark:border-neutral-800 dark:bg-neutral-900 dark:hover:shadow-black/30">
+      <div className="p-5">
+        <div className="mb-3 flex items-start justify-between gap-4">
+          <div className="min-w-0 flex-1">
+            <code className="break-all text-sm font-semibold text-gray-900 dark:text-slate-100">
+              {name}
+            </code>
+          </div>
+          <div className="flex shrink-0 items-center gap-2">
+            {unit && (
+              <div className="flex items-center gap-1.5 rounded-md border border-gray-200 bg-gray-100 px-2.5 py-1 text-xs text-gray-700 dark:border-neutral-700 dark:bg-neutral-800 dark:text-slate-200">
+                <span className="font-medium text-gray-500 dark:text-slate-300">Unit:</span>
+                <code>{unit}</code>
+              </div>
+            )}
+            <div
+              className={`flex items-center gap-1.5 rounded-md border px-2.5 py-1 ${config.color}`}
+            >
+              <Icon className="h-3.5 w-3.5" />
+              <span className="text-xs font-medium">{type}</span>
+            </div>
+          </div>
+        </div>
+
+        {description && (
+          <p className="mb-4 text-sm leading-relaxed text-gray-600 dark:text-slate-100">
+            {description}
+          </p>
+        )}
+
+        {labels && labels.length > 0 && (
+          <div className="mt-4 border-t border-gray-100 pt-4 dark:border-neutral-800">
+            <div className="mb-2 flex items-center gap-2">
+              <span className="text-xs font-semibold uppercase text-gray-700 dark:text-slate-100">
+                Labels
+              </span>
+            </div>
+            <div className="flex flex-wrap gap-1.5">
+              {labels.map(label => (
+                <code
+                  key={label}
+                  className="rounded border border-gray-200 bg-gray-50 px-2 py-1 text-xs text-gray-700 transition-colors hover:border-gray-300 dark:border-neutral-700 dark:bg-neutral-800 dark:text-slate-200 dark:hover:border-neutral-600"
+                >
+                  {label}
+                </code>
+              ))}
+            </div>
+          </div>
+        )}
+      </div>
+    </div>
+  );
+}
diff --git a/packages/web/docs/src/components/otel-metrics/metrics-section.tsx b/packages/web/docs/src/components/otel-metrics/metrics-section.tsx
new file mode 100644
index 00000000000..f0866350cc9
--- /dev/null
+++ b/packages/web/docs/src/components/otel-metrics/metrics-section.tsx
@@ -0,0 +1,55 @@
+import { LabelCard } from './label-card';
+import { MetricCard } from './metric-card';
+
+interface Metric {
+  name: string;
+  type: 'Counter' | 'Histogram' | 'UpDownCounter' | 'Gauge';
+  unit?: string;
+  description?: string;
+  labels?: string[];
+}
+
+interface Label {
+  name: string;
+  meaning: string;
+  typicalValues: string[];
+  notes?: string;
+}
+
+interface MetricsSectionProps {
+  title?: string;
+  description?: string;
+  metrics?: Metric[];
+  labels?: Label[];
+}
+export function MetricsSection({ metrics, labels }: MetricsSectionProps) {
+  return (
+    <div className="space-y-6">
+      {metrics && metrics.length > 0 && (
+        <div className="space-y-4">
+          <h4 className="mt-8 text-xl font-semibold tracking-tight text-slate-900 dark:text-slate-100">
+            Metrics
+          </h4>
+          <div className="grid gap-4">
+            {metrics.map(metric => (
+              <MetricCard key={metric.name} {...metric} />
+            ))}
+          </div>
+        </div>
+      )}
+
+      {labels && labels.length > 0 && (
+        <div className="space-y-4">
+          <h4 className="text-xl font-semibold tracking-tight text-slate-900 dark:text-slate-100">
+            Labels Reference
+          </h4>
+          <div className="grid gap-4">
+            {labels.map(label => (
+              <LabelCard key={label.name} {...label} />
+            ))}
+          </div>
+        </div>
+      )}
+    </div>
+  );
+}
diff --git a/packages/web/docs/src/content/router/observability/metrics.mdx b/packages/web/docs/src/content/router/observability/metrics.mdx
new file mode 100644
index 00000000000..0533d1c3e8b
--- /dev/null
+++ b/packages/web/docs/src/content/router/observability/metrics.mdx
@@ -0,0 +1,637 @@
+---
+title: 'OpenTelemetry Metrics'
+---
+
+import { Callout } from '#components/callout'
+import { MetricsSection } from '#components/otel-metrics/metrics-section'
+import { Tabs } from '@theguild/components'
+
+# OpenTelemetry Metrics
+
+Hive Router exposes OpenTelemetry metrics for gateway traffic, subgraph traffic, cache behavior,
+supergraph lifecycle, and GraphQL errors.
+
+This guide explains where to export metrics, how to configure OTLP and Prometheus, how to customize
+instruments, and what each metric/label means in practice.
+
+## Choose your metrics destination
+
+Hive Router supports two common metrics paths:
+
+- OTLP-compatible backends
+- Prometheus scraping
+
+In practice, teams with existing OpenTelemetry pipelines usually choose OTLP. Teams with existing
+Prometheus/Grafana stacks usually choose Prometheus.
+
+### Send metrics to OTLP-compatible backends
+
+Hive Router can export metrics directly to any OTLP-compatible destination, including OpenTelemetry
+Collector and vendor backends that support OTLP ingestion, either through HTTP or gRPC.
+
+After enabling the exporter, send traffic through the router and verify that new metric series
+appear in your backend (for example HTTP server/client duration, cache metrics, and supergraph
+metrics). If metrics are missing, start by validating endpoint reachability and auth credentials,
+then check exporter protocol alignment (HTTP vs gRPC).
+
+<Tabs items={["OTLP over HTTP", "OTLP over gRPC"]}>
+
+<Tabs.Tab>
+
+```yaml filename="router.config.yaml"
+telemetry:
+  metrics:
+    exporters:
+      - kind: otlp
+        enabled: true
+        protocol: http
+        endpoint: https://otel-collector.example.com/v1/metrics
+        interval: 30s
+        max_export_timeout: 5s
+        http:
+          headers:
+            authorization:
+              expression: |
+                "Bearer " + env("OTLP_TOKEN")
+```
+
+</Tabs.Tab>
+
+<Tabs.Tab>
+
+```yaml filename="router.config.yaml"
+telemetry:
+  metrics:
+    exporters:
+      - kind: otlp
+        enabled: true
+        protocol: grpc
+        endpoint: https://otel-collector.example.com:4317
+        interval: 30s
+        max_export_timeout: 5s
+        grpc:
+          metadata:
+            x-api-key:
+              expression: env("OTEL_API_KEY")
+          tls:
+            domain_name: otel-collector.example.com
+            ca: /etc/certs/ca.pem
+            cert: /etc/certs/client.pem
+            key: /etc/certs/client.key
+```
+
+</Tabs.Tab>
+
+</Tabs>
+
+### Expose metrics for Prometheus scraping
+
+If your observability stack is Prometheus-first, Hive Router can expose a scrape endpoint that
+Prometheus polls on a schedule.
+
+The `port` and `path` settings define where Router serves metrics. Prometheus must be able to reach
+that address from its runtime environment (local network, Kubernetes service, or VM network path).
+
+<Callout type="note">
+  If `port` is not set, or is the same as the main HTTP server port, the Router exposes metrics
+  through the same HTTP server that serves the GraphQL API. If the port is different, the Router
+  starts a separate HTTP server dedicated solely to the Prometheus metrics endpoint.
+</Callout>
+
+In production, make sure this endpoint is reachable only by trusted scrapers and that any ingress or
+firewall policy allows Prometheus access. Once configured, confirm the target appears as healthy
+(`UP`) in Prometheus and then verify expected series are present (for example
+`http.server.request.duration`, `http.client.request.duration`, and `hive.router.*` cache/supergraph
+metrics).
+
+```yaml filename="router.config.yaml"
+telemetry:
+  metrics:
+    exporters:
+      - kind: prometheus
+        enabled: true
+        port: 9090
+        path: /metrics
+```
+
+## Production baseline
+
+For production workloads, start with one primary exporter, a clear service identity, and default
+instrument settings.
+
+```yaml filename="router.config.yaml"
+telemetry:
+  resource:
+    attributes:
+      service.name: hive-router
+      service.namespace: your-platform
+      deployment.environment:
+        expression: env("ENVIRONMENT")
+  metrics:
+    exporters:
+      - kind: otlp
+        enabled: true
+        protocol: grpc
+        endpoint: https://otel-collector.example.com:4317
+        interval: 30s
+        max_export_timeout: 5s
+```
+
+This is a safe baseline and works well before introducing instrumentation-level customization. By
+default, all metrics and labels are exposed.
+
+## Instrument customization
+
+You can override behavior per metric under `telemetry.metrics.instrumentation.instruments`.
+
+- `false` disables a metric.
+- `true` keeps default behavior.
+- object form enables metric + optional attribute overrides.
+
+```yaml filename="router.config.yaml"
+telemetry:
+  metrics:
+    instrumentation:
+      instruments:
+        # Disable HTTP server request duration metric
+        http.server.request.duration: false
+        http.client.request.duration:
+          attributes:
+            # Disable the label
+            subgraph.name: false
+            # Enable the label (labels are enabled by default)
+            http.response.status_code: true
+```
+
+Attribute override behavior:
+
+- `false` - drop label from that metric
+- `true` - keep label (all labels are enabled by default)
+
+## Metrics reference
+
+### GraphQL
+
+GraphQL metrics focus on validation, parsing and execution-time errors surfaced by the router.
+
+<MetricsSection
+  metrics={[
+    {
+      name: 'hive.router.graphql.errors_total',
+      type: 'Counter',
+      unit: '{error}',
+      description:
+        'Total count of GraphQL errors encountered during query processing and execution, categorized by error code.',
+      labels: ['code']
+    }
+  ]}
+  labels={[
+    {
+      name: 'code',
+      meaning: 'GraphQL error code',
+      typicalValues: ['GRAPHQL_PARSE_FAILED', 'GRAPHQL_VALIDATION_FAILED', 'PLAN_EXECUTION_FAILED', 'UNKNOWN', '...'],
+      notes:
+        `Uses "extensions.code" values and router's error codes. "UNKNOWN" is used when no code is available.`
+    }
+  ]}
+/>
+
+### Supergraph
+
+Supergraph metrics cover polling and processing lifecycle of schema updates.
+
+<MetricsSection
+  metrics={[
+    {
+      name: 'hive.router.supergraph.poll.total',
+      type: 'Counter',
+      description: 'Total number of supergraph polling attempts, categorized by poll result.',
+      labels: ['result']
+    },
+    {
+      name: 'hive.router.supergraph.poll.duration',
+      type: 'Histogram',
+      unit: 'Seconds',
+      description: 'Duration of supergraph polling attempts, categorized by poll result.',
+      labels: ['result']
+    },
+    {
+      name: 'hive.router.supergraph.process.duration',
+      type: 'Histogram',
+      unit: 'Seconds',
+      description: 'Time spent processing supergraph updates, categorized by status.',
+      labels: ['status']
+    }
+  ]}
+  labels={[
+    {
+      name: 'result',
+      meaning: 'Result of the poll',
+      typicalValues: ['updated', 'not_modified', 'error'],
+      notes: 'Used by "hive.router.supergraph.poll.*" metrics only'
+    },
+    {
+      name: 'status',
+      meaning: 'Supergraph processing status',
+      typicalValues: ['ok', 'error'],
+      notes: 'Used by "hive.router.supergraph.process.*" metrics only'
+    }
+  ]}
+/>
+
+### HTTP server
+
+HTTP server metrics describe inbound client traffic handled by the router.
+
+<MetricsSection
+  metrics={[
+    {
+      name: 'http.server.request.duration',
+      type: 'Histogram',
+      unit: 'Seconds',
+      description: 'Duration of inbound HTTP requests handled by the router.',
+      labels: [
+        'http.request.method',
+        'http.response.status_code',
+        'http.route',
+        'network.protocol.name',
+        'network.protocol.version',
+        'url.scheme',
+        'error.type'
+      ]
+    },
+    {
+      name: 'http.server.request.body.size',
+      type: 'Histogram',
+      unit: 'Bytes',
+      description: 'Size of inbound HTTP request bodies handled by the router.',
+      labels: [
+        'http.request.method',
+        'http.response.status_code',
+        'http.route',
+        'network.protocol.name',
+        'network.protocol.version',
+        'url.scheme',
+        'error.type'
+      ]
+    },
+    {
+      name: 'http.server.response.body.size',
+      type: 'Histogram',
+      unit: 'Bytes',
+      description: 'Size of outbound HTTP response bodies returned by the router.',
+      labels: [
+        'http.request.method',
+        'http.response.status_code',
+        'http.route',
+        'network.protocol.name',
+        'network.protocol.version',
+        'url.scheme',
+        'error.type'
+      ]
+    },
+    {
+      name: 'http.server.active_requests',
+      type: 'UpDownCounter',
+      unit: '{request}',
+      description: 'Current number of in-flight inbound HTTP requests.',
+      labels: ['http.request.method', 'network.protocol.name', 'url.scheme']
+    }
+  ]}
+  labels={[
+    {
+      name: 'http.request.method',
+      meaning: 'HTTP method',
+      typicalValues: [
+        'GET',
+        'POST',
+        'PUT',
+        'PATCH',
+        'DELETE',
+        'HEAD',
+        'OPTIONS',
+        'CONNECT',
+        'TRACE',
+        'QUERY',
+        '_OTHER'
+      ],
+      notes: '_OTHER is fallback for unknown methods'
+    },
+    {
+      name: 'http.response.status_code',
+      meaning: 'Response status code',
+      typicalValues: ['200', '400', '500', '...']
+    },
+    {
+      name: 'http.route',
+      meaning: 'Normalized router path',
+      typicalValues: ['/graphql']
+    },
+    {
+      name: 'network.protocol.name',
+      meaning: 'Protocol name',
+      typicalValues: ['http']
+    },
+    {
+      name: 'network.protocol.version',
+      meaning: 'Protocol version',
+      typicalValues: ['0.9', '1.0', '1.1', '2', '3']
+    },
+    {
+      name: 'url.scheme',
+      meaning: 'URL scheme',
+      typicalValues: ['http', 'https']
+    },
+    {
+      name: 'error.type',
+      meaning: 'Error classification for failed requests',
+      typicalValues: ['status code >= 400'],
+      notes: 'Only set for failed requests'
+    }
+  ]}
+/>
+
+### HTTP client
+
+HTTP client metrics describe outbound requests to the subgraphs, made by the router.
+
+<MetricsSection
+  metrics={[
+    {
+      name: 'http.client.request.duration',
+      type: 'Histogram',
+      unit: 'Seconds',
+      description: 'Duration of outbound HTTP requests sent from router to subgraphs.',
+      labels: [
+        'http.request.method',
+        'server.address',
+        'server.port',
+        'network.protocol.name',
+        'network.protocol.version',
+        'url.scheme',
+        'subgraph.name',
+        'http.response.status_code',
+        'error.type'
+      ]
+    },
+    {
+      name: 'http.client.request.body.size',
+      type: 'Histogram',
+      unit: 'Bytes',
+      description: 'Size of outbound HTTP request bodies sent to subgraphs.',
+      labels: [
+        'http.request.method',
+        'server.address',
+        'server.port',
+        'network.protocol.name',
+        'network.protocol.version',
+        'url.scheme',
+        'subgraph.name',
+        'http.response.status_code',
+        'error.type'
+      ]
+    },
+    {
+      name: 'http.client.response.body.size',
+      type: 'Histogram',
+      unit: 'Bytes',
+      description: 'Size of HTTP response bodies returned by subgraphs.',
+      labels: [
+        'http.request.method',
+        'server.address',
+        'server.port',
+        'network.protocol.name',
+        'network.protocol.version',
+        'url.scheme',
+        'subgraph.name',
+        'http.response.status_code',
+        'error.type'
+      ]
+    },
+    {
+      name: 'http.client.active_requests',
+      type: 'UpDownCounter',
+      unit: '{request}',
+      description: 'Current number of in-flight outbound HTTP requests to subgraphs.',
+      labels: [
+        'http.request.method',
+        'server.address',
+        'server.port',
+        'url.scheme',
+        'subgraph.name'
+      ]
+    }
+  ]}
+  labels={[
+    {
+      name: 'http.request.method',
+      meaning: 'HTTP method',
+      typicalValues: [
+        'GET',
+        'POST',
+        'PUT',
+        'PATCH',
+        'DELETE',
+        'HEAD',
+        'OPTIONS',
+        'CONNECT',
+        'TRACE',
+        'QUERY',
+        '_OTHER'
+      ],
+      notes: '_OTHER is fallback for unknown methods'
+    },
+    {
+      name: 'http.response.status_code',
+      meaning: 'Response status code',
+      typicalValues: ['200', '400', '500', '...']
+    },
+    {
+      name: 'network.protocol.name',
+      meaning: 'Protocol name',
+      typicalValues: ['http']
+    },
+    {
+      name: 'network.protocol.version',
+      meaning: 'Protocol version',
+      typicalValues: ['0.9', '1.0', '1.1', '2', '3']
+    },
+    {
+      name: 'url.scheme',
+      meaning: 'URL scheme',
+      typicalValues: ['http', 'https']
+    },
+    {
+      name: 'server.address',
+      meaning: 'Subgraph host',
+      typicalValues: ['URI host', 'unknown'],
+      notes: 'URI host, or unknown fallback'
+    },
+    {
+      name: 'server.port',
+      meaning: 'Subgraph port',
+      typicalValues: ['80', '443'],
+      notes: 'Explicit URI port, or fallback 80/443'
+    },
+    {
+      name: 'subgraph.name',
+      meaning: 'Subgraph identifier',
+      typicalValues: ['accounts'],
+      notes: 'Configured names (for example "accounts")'
+    },
+    {
+      name: 'error.type',
+      meaning: 'Error classification',
+      typicalValues: ['400', 'SUBGRAPH_REQUEST_FAILURE', '...'],
+      notes: 'Numeric status code >= 400 or execution error code string'
+    }
+  ]}
+/>
+
+### Cache
+
+Cache metrics track lookup behavior and cache size across router caches.
+
+#### Parsing cache
+
+Parsing cache metrics measure query parse cache hit/miss behavior and cache size.
+
+<MetricsSection
+  metrics={[
+    {
+      name: 'hive.router.parse_cache.requests_total',
+      type: 'Counter',
+      description: 'Total number of parsing cache lookups, categorized by result.',
+      labels: ['result']
+    },
+    {
+      name: 'hive.router.parse_cache.duration',
+      type: 'Histogram',
+      unit: 'Seconds',
+      description: 'Duration of parsing cache lookups, categorized by result.',
+      labels: ['result']
+    },
+    {
+      name: 'hive.router.parse_cache.size',
+      type: 'Gauge',
+      description: 'Current number of entries stored in the parsing cache.'
+    }
+  ]}
+/>
+
+#### Validation cache
+
+Validation cache metrics measure query validation cache hit/miss behavior and cache size.
+
+<MetricsSection
+  metrics={[
+    {
+      name: 'hive.router.validate_cache.requests_total',
+      type: 'Counter',
+      description: 'Total number of validation cache lookups, categorized by result.',
+      labels: ['result']
+    },
+    {
+      name: 'hive.router.validate_cache.duration',
+      type: 'Histogram',
+      unit: 'Seconds',
+      description: 'Duration of validation cache lookups, categorized by result.',
+      labels: ['result']
+    },
+    {
+      name: 'hive.router.validate_cache.size',
+      type: 'Gauge',
+      description: 'Current number of entries stored in the validation cache.'
+    }
+  ]}
+/>
+
+#### Normalization cache
+
+Normalization cache metrics measure query normalization cache hit/miss behavior and cache size.
+
+<MetricsSection
+  metrics={[
+    {
+      name: 'hive.router.normalize_cache.requests_total',
+      type: 'Counter',
+      description: 'Total number of normalization cache lookups, categorized by result.',
+      labels: ['result']
+    },
+    {
+      name: 'hive.router.normalize_cache.duration',
+      type: 'Histogram',
+      unit: 'Seconds',
+      description: 'Duration of normalization cache lookups, categorized by result.',
+      labels: ['result']
+    },
+    {
+      name: 'hive.router.normalize_cache.size',
+      type: 'Gauge',
+      description: 'Current number of entries stored in the normalization cache.'
+    }
+  ]}
+/>
+
+#### Planning cache
+
+Planning cache metrics measure query planning cache hit/miss behavior and cache size.
+
+<MetricsSection
+  metrics={[
+    {
+      name: 'hive.router.plan_cache.requests_total',
+      type: 'Counter',
+      description: 'Total number of planning cache lookups, categorized by result.',
+      labels: ['result']
+    },
+    {
+      name: 'hive.router.plan_cache.duration',
+      type: 'Histogram',
+      unit: 'Seconds',
+      description: 'Duration of planning cache lookups, categorized by result.',
+      labels: ['result']
+    },
+    {
+      name: 'hive.router.plan_cache.size',
+      type: 'Gauge',
+      description: 'Current number of entries stored in the planning cache.'
+    }
+  ]}
+/>
+
+#### Labels
+
+These labels are shared by cache lookup counters and duration histograms.
+
+<MetricsSection
+  labels={[
+    {
+      name: 'result',
+      meaning: 'Cache lookup outcome',
+      typicalValues: ['hit', 'miss'],
+      notes: 'Used by cache `requests_total` and `duration` metrics'
+    }
+  ]}
+/>
+
+## Troubleshooting
+
+When metrics are missing or incomplete, check in layers:
+
+- exporter setup
+- instrument overrides
+- label overrides
+- transport reachability
+
+If no metrics appear at all, verify exporter enablement, endpoint reachability, and credentials.
+
+If a configured instrument key is unknown, startup fails with a clear error and valid metric names.
+
+If a configured attribute key is unknown, Router logs a warning and ignores it.
+
+## Configuration reference
+
+For full options and defaults, see:
+
+- [telemetry configuration reference](/docs/router/configuration/telemetry)

From 02b1b0be4d115a2ff68285b930f51843facc5ba3 Mon Sep 17 00:00:00 2001
From: Kamil Kisiela <kamil.kisiela@gmail.com>
Date: Fri, 13 Feb 2026 11:52:04 +0100
Subject: [PATCH 02/14] asd

---
 .../components/otel-metrics/label-card.tsx    | 76 +++++++++----------
 .../components/otel-metrics/metric-card.tsx   | 76 ++++++++++++++++++-
 .../otel-metrics/metrics-section.tsx          | 41 ++++++++--
 .../content/router/observability/metrics.mdx  | 22 +++---
 4 files changed, 154 insertions(+), 61 deletions(-)

diff --git a/packages/web/docs/src/components/otel-metrics/label-card.tsx b/packages/web/docs/src/components/otel-metrics/label-card.tsx
index 357a0ed020d..182630dc29c 100644
--- a/packages/web/docs/src/components/otel-metrics/label-card.tsx
+++ b/packages/web/docs/src/components/otel-metrics/label-card.tsx
@@ -9,51 +9,49 @@ interface LabelCardProps {
 
 export function LabelCard({ name, meaning, typicalValues, notes }: LabelCardProps) {
   return (
-    <div className="overflow-hidden rounded-lg border border-gray-200 bg-white transition-shadow duration-200 hover:shadow-md dark:border-neutral-800 dark:bg-neutral-900 dark:hover:shadow-black/30">
-      <div className="p-5">
-        <div className="mb-3 flex items-start gap-3">
-          <div className="shrink-0 rounded-md border border-gray-200 bg-gray-100 p-1.5 dark:border-neutral-700 dark:bg-neutral-800">
-            <Tag className="h-4 w-4 text-gray-600 dark:text-slate-100" />
+    <div>
+      <div className="mb-3 flex items-start gap-3">
+        <div className="shrink-0 rounded-md border border-gray-200 bg-gray-100 p-1.5 dark:border-neutral-700 dark:bg-neutral-800">
+          <Tag className="h-4 w-4 text-gray-600 dark:text-slate-100" />
+        </div>
+        <div className="min-w-0 flex-1">
+          <code className="break-all text-sm font-semibold text-gray-900 dark:text-slate-100">
+            {name}
+          </code>
+          <p className="mt-1 text-sm leading-relaxed text-gray-600 dark:text-slate-100">
+            {meaning}
+          </p>
+        </div>
+      </div>
+
+      <div className="mt-4 space-y-3">
+        <div>
+          <div className="mb-2 flex items-center gap-1.5">
+            <Info className="h-3.5 w-3.5 text-gray-500 dark:text-slate-400" />
+            <span className="text-xs font-semibold uppercase text-gray-700 dark:text-slate-100">
+              Typical Values
+            </span>
           </div>
-          <div className="min-w-0 flex-1">
-            <code className="break-all text-sm font-semibold text-gray-900 dark:text-slate-100">
-              {name}
-            </code>
-            <p className="mt-1 text-sm leading-relaxed text-gray-600 dark:text-slate-100">
-              {meaning}
-            </p>
+          <div className="flex flex-wrap gap-1.5">
+            {typicalValues.map(value => (
+              <code
+                key={value}
+                className="rounded-md border border-slate-200 bg-slate-50 px-2.5 py-1 text-xs font-medium text-slate-700 dark:border-neutral-700 dark:bg-neutral-800 dark:text-slate-200"
+              >
+                {value}
+              </code>
+            ))}
           </div>
         </div>
 
-        <div className="mt-4 space-y-3">
-          <div>
-            <div className="mb-2 flex items-center gap-1.5">
-              <Info className="h-3.5 w-3.5 text-gray-500 dark:text-slate-400" />
-              <span className="text-xs font-semibold uppercase text-gray-700 dark:text-slate-100">
-                Typical Values
-              </span>
-            </div>
-            <div className="flex flex-wrap gap-1.5">
-              {typicalValues.map(value => (
-                <code
-                  key={value}
-                  className="rounded-md border border-slate-200 bg-slate-50 px-2.5 py-1 text-xs font-medium text-slate-700 dark:border-neutral-700 dark:bg-neutral-800 dark:text-slate-200"
-                >
-                  {value}
-                </code>
-              ))}
+        {notes && (
+          <div className="pt-1">
+            <div className="flex items-start gap-2">
+              <Lightbulb className="mt-0.5 h-3.5 w-3.5 shrink-0 text-amber-600 dark:text-amber-400" />
+              <p className="text-sm leading-relaxed text-gray-600 dark:text-slate-100">{notes}</p>
             </div>
           </div>
-
-          {notes && (
-            <div className="border-t border-gray-100 pt-3 dark:border-neutral-800">
-              <div className="flex items-start gap-2">
-                <Lightbulb className="mt-0.5 h-3.5 w-3.5 shrink-0 text-amber-600 dark:text-amber-400" />
-                <p className="text-sm leading-relaxed text-gray-600 dark:text-slate-100">{notes}</p>
-              </div>
-            </div>
-          )}
-        </div>
+        )}
       </div>
     </div>
   );
diff --git a/packages/web/docs/src/components/otel-metrics/metric-card.tsx b/packages/web/docs/src/components/otel-metrics/metric-card.tsx
index b31d81488f6..116937dbb65 100644
--- a/packages/web/docs/src/components/otel-metrics/metric-card.tsx
+++ b/packages/web/docs/src/components/otel-metrics/metric-card.tsx
@@ -1,3 +1,4 @@
+import { useEffect, useRef, useState } from 'react';
 import { Activity, BarChart3, Gauge, TrendingUp } from 'lucide-react';
 
 interface MetricCardProps {
@@ -38,15 +39,82 @@ const typeConfig = {
 export function MetricCard({ name, type, unit, description, labels }: MetricCardProps) {
   const config = typeConfig[type];
   const Icon = config.icon;
+  const [isCopied, setIsCopied] = useState(false);
+  const copiedTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+  const metricId = `metric-${name
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/(^-|-$)/g, '')}`;
+
+  useEffect(() => {
+    return () => {
+      if (copiedTimeoutRef.current) {
+        clearTimeout(copiedTimeoutRef.current);
+      }
+    };
+  }, []);
+
+  function showCopiedState() {
+    setIsCopied(true);
+
+    if (copiedTimeoutRef.current) {
+      clearTimeout(copiedTimeoutRef.current);
+    }
+
+    copiedTimeoutRef.current = setTimeout(() => {
+      setIsCopied(false);
+    }, 1200);
+  }
+
+  async function copyMetricLink() {
+    if (typeof window === 'undefined') {
+      return;
+    }
+
+    const metricUrl = `${window.location.origin}${window.location.pathname}${window.location.search}#${metricId}`;
+
+    try {
+      await navigator.clipboard.writeText(metricUrl);
+      showCopiedState();
+    } catch {
+      window.location.hash = metricId;
+    }
+  }
 
   return (
-    <div className="overflow-hidden rounded-lg border border-gray-200 bg-white transition-shadow duration-200 hover:shadow-md dark:border-neutral-800 dark:bg-neutral-900 dark:hover:shadow-black/30">
+    <div
+      id={metricId}
+      className="group scroll-mt-20 overflow-hidden rounded-lg border border-gray-200 bg-white transition-shadow duration-200 hover:shadow-md dark:border-neutral-800 dark:bg-neutral-900 dark:hover:shadow-black/30"
+    >
       <div className="p-5">
         <div className="mb-3 flex items-start justify-between gap-4">
           <div className="min-w-0 flex-1">
-            <code className="break-all text-sm font-semibold text-gray-900 dark:text-slate-100">
-              {name}
-            </code>
+            <div className="flex items-center gap-1.5">
+              <code className="break-all text-sm font-semibold text-gray-900 dark:text-slate-100">
+                {name}
+              </code>
+              <button
+                type="button"
+                onClick={() => {
+                  void copyMetricLink();
+                }}
+                className={`hive-focus inline-flex items-center justify-center rounded font-mono text-sm font-semibold leading-none transition-all duration-200 ${isCopied ? 'translate-y-0 text-gray-500 opacity-100 dark:text-slate-500' : 'translate-y-0 text-gray-500 opacity-0 hover:text-gray-700 focus:text-gray-700 group-focus-within:opacity-100 group-hover:opacity-100 dark:text-slate-500 dark:hover:text-slate-200 dark:focus:text-slate-200'}`}
+                aria-label={`Copy link to ${name}`}
+                title="Copy metric link"
+              >
+                {isCopied ? (
+                  <>
+                    <span>✓</span>
+                    <span className="ml-1 text-xs">copied</span>
+                  </>
+                ) : (
+                  '#'
+                )}
+              </button>
+              <span className="sr-only" aria-live="polite">
+                {isCopied ? `Copied link to ${name}` : ''}
+              </span>
+            </div>
           </div>
           <div className="flex shrink-0 items-center gap-2">
             {unit && (
diff --git a/packages/web/docs/src/components/otel-metrics/metrics-section.tsx b/packages/web/docs/src/components/otel-metrics/metrics-section.tsx
index f0866350cc9..72c4dd68b45 100644
--- a/packages/web/docs/src/components/otel-metrics/metrics-section.tsx
+++ b/packages/web/docs/src/components/otel-metrics/metrics-section.tsx
@@ -1,3 +1,7 @@
+'use client';
+
+import { useId, useState } from 'react';
+import { ChevronDown } from 'lucide-react';
 import { LabelCard } from './label-card';
 import { MetricCard } from './metric-card';
 
@@ -23,6 +27,9 @@ interface MetricsSectionProps {
   labels?: Label[];
 }
 export function MetricsSection({ metrics, labels }: MetricsSectionProps) {
+  const [isLabelsOpen, setIsLabelsOpen] = useState(false);
+  const labelsRegionId = useId();
+
   return (
     <div className="space-y-6">
       {metrics && metrics.length > 0 && (
@@ -39,14 +46,32 @@ export function MetricsSection({ metrics, labels }: MetricsSectionProps) {
       )}
 
       {labels && labels.length > 0 && (
-        <div className="space-y-4">
-          <h4 className="text-xl font-semibold tracking-tight text-slate-900 dark:text-slate-100">
-            Labels Reference
-          </h4>
-          <div className="grid gap-4">
-            {labels.map(label => (
-              <LabelCard key={label.name} {...label} />
-            ))}
+        <div className="overflow-hidden rounded-lg border border-gray-200 bg-white dark:border-neutral-800 dark:bg-neutral-900">
+          <button
+            type="button"
+            onClick={() => setIsLabelsOpen(current => !current)}
+            aria-expanded={isLabelsOpen}
+            aria-controls={labelsRegionId}
+            className="hive-focus flex w-full items-center justify-between px-5 py-4 text-left text-xl font-semibold tracking-tight text-slate-900 dark:text-slate-100"
+          >
+            <span>Labels Reference</span>
+            <ChevronDown
+              className={`h-5 w-5 transition-transform duration-200 ${isLabelsOpen ? 'rotate-180' : ''}`}
+            />
+          </button>
+          <div
+            id={labelsRegionId}
+            className={`overflow-hidden transition-[max-height,opacity] duration-300 ease-out ${isLabelsOpen ? 'max-h-[4000px] opacity-100' : 'max-h-0 opacity-90'}`}
+          >
+            <div className="border-t border-gray-100 px-5 pb-5 dark:border-neutral-800">
+              <div className="divide-y divide-gray-100 pt-2 dark:divide-neutral-800">
+                {labels.map(label => (
+                  <div key={label.name} className="py-6">
+                    <LabelCard {...label} />
+                  </div>
+                ))}
+              </div>
+            </div>
           </div>
         </div>
       )}
diff --git a/packages/web/docs/src/content/router/observability/metrics.mdx b/packages/web/docs/src/content/router/observability/metrics.mdx
index 0533d1c3e8b..b228701444c 100644
--- a/packages/web/docs/src/content/router/observability/metrics.mdx
+++ b/packages/web/docs/src/content/router/observability/metrics.mdx
@@ -604,16 +604,18 @@ Planning cache metrics measure query planning cache hit/miss behavior and cache
 
 These labels are shared by cache lookup counters and duration histograms.
 
-<MetricsSection
-  labels={[
-    {
-      name: 'result',
-      meaning: 'Cache lookup outcome',
-      typicalValues: ['hit', 'miss'],
-      notes: 'Used by cache `requests_total` and `duration` metrics'
-    }
-  ]}
-/>
+<div className="mt-4">
+    <MetricsSection
+    labels={[
+        {
+        name: 'result',
+        meaning: 'Cache lookup outcome',
+        typicalValues: ['hit', 'miss'],
+        notes: 'Used by cache `requests_total` and `duration` metrics'
+        }
+    ]}
+    />
+</div>
 
 ## Troubleshooting
 

From 40e8fe32b9d1d7dcadd6eaf88d832a1c0777de28 Mon Sep 17 00:00:00 2001
From: Kamil Kisiela <kamil.kisiela@gmail.com>
Date: Fri, 13 Feb 2026 11:57:03 +0100
Subject: [PATCH 03/14] sad

---
 .../content/router/observability/metrics.mdx  | 51 ++++++++++++++++---
 1 file changed, 44 insertions(+), 7 deletions(-)

diff --git a/packages/web/docs/src/content/router/observability/metrics.mdx b/packages/web/docs/src/content/router/observability/metrics.mdx
index b228701444c..831b4fb41df 100644
--- a/packages/web/docs/src/content/router/observability/metrics.mdx
+++ b/packages/web/docs/src/content/router/observability/metrics.mdx
@@ -189,9 +189,14 @@ GraphQL metrics focus on validation, parsing and execution-time errors surfaced
     {
       name: 'code',
       meaning: 'GraphQL error code',
-      typicalValues: ['GRAPHQL_PARSE_FAILED', 'GRAPHQL_VALIDATION_FAILED', 'PLAN_EXECUTION_FAILED', 'UNKNOWN', '...'],
-      notes:
-        `Uses "extensions.code" values and router's error codes. "UNKNOWN" is used when no code is available.`
+      typicalValues: [
+        'GRAPHQL_PARSE_FAILED',
+        'GRAPHQL_VALIDATION_FAILED',
+        'PLAN_EXECUTION_FAILED',
+        'UNKNOWN',
+        '...'
+      ],
+      notes: `Uses "extensions.code" values and router's error codes. "UNKNOWN" is used when no code is available.`
     }
   ]}
 />
@@ -605,18 +610,50 @@ Planning cache metrics measure query planning cache hit/miss behavior and cache
 These labels are shared by cache lookup counters and duration histograms.
 
 <div className="mt-4">
-    <MetricsSection
+  <MetricsSection
     labels={[
-        {
+      {
         name: 'result',
         meaning: 'Cache lookup outcome',
         typicalValues: ['hit', 'miss'],
         notes: 'Used by cache `requests_total` and `duration` metrics'
-        }
+      }
     ]}
-    />
+  />
 </div>
 
+## Production observability playbook
+
+### Monitor latency of your GraphQL API
+
+Use `http.server.request.duration` as your primary latency signal. In production, break this metric
+down by `http.route`, `http.request.method`, and `http.response.status_code`, then track p95 and p99
+per route and method. Keep successful and failed responses separated so error-path latency does not
+get hidden by healthy traffic.
+
+### Monitor health of your subgraphs
+
+Use `http.client.request.duration` and `http.client.active_requests` to monitor dependency health
+across your federated graph. Break these metrics down by `subgraph.name`,
+`http.response.status_code`, and `error.type` to identify which subgraph is driving tail latency or
+error spikes. When you need infrastructure-level debugging, add `server.address` and `server.port`
+to distinguish endpoint-level issues from service-level behavior.
+
+### Monitor cache effectiveness and planning pressure
+
+Use the cache metric families `hive.router.parse_cache.*`, `hive.router.validate_cache.*`,
+`hive.router.normalize_cache.*`, and `hive.router.plan_cache.*` to evaluate cache behavior over
+time. For request and duration metrics, split by `result` (`hit` and `miss`) so you can track hit
+ratio and miss latency per cache tier. Combine those views with cache size trends to catch sustained
+cache-efficiency regressions, especially in planning cache paths.
+
+### Monitor GraphQL errors over time
+
+Use `hive.router.graphql.errors_total` and break it down by `code` to track both volume and error
+shape. In production, monitor how error-code distribution changes over time, not only total count,
+so you can separate validation issues from execution failures. Alert on sharp increases in
+`GRAPHQL_VALIDATION_FAILED`, `PLAN_EXECUTION_FAILED`, or `UNKNOWN`.
+
 ## Troubleshooting
 
 When metrics are missing or incomplete, check in layers:

From fbadb6de2a97e1af41a78585b92f7078c11b034e Mon Sep 17 00:00:00 2001
From: Kamil Kisiela <kamil.kisiela@gmail.com>
Date: Fri, 13 Feb 2026 12:07:57 +0100
Subject: [PATCH 04/14] asd

---
 .../content/router/observability/metrics.mdx  | 64 ++++++++-----------
 1 file changed, 27 insertions(+), 37 deletions(-)

diff --git a/packages/web/docs/src/content/router/observability/metrics.mdx b/packages/web/docs/src/content/router/observability/metrics.mdx
index 831b4fb41df..bd60da693a5 100644
--- a/packages/web/docs/src/content/router/observability/metrics.mdx
+++ b/packages/web/docs/src/content/router/observability/metrics.mdx
@@ -140,7 +140,7 @@ telemetry:
 This is a safe baseline and works well before introducing instrumentation-level customization. By
 default, all metrics and labels are exposed.
 
-## Instrument customization
+## Customize instrumentation
 
 You can override behavior per metric under `telemetry.metrics.instrumentation.instruments`.
 
@@ -622,55 +622,45 @@ These labels are shared by cache lookup counters and duration histograms.
   />
 </div>
 
-## Production observability playbook
+## What to monitor in production
 
-### Monitor latency of your GraphQL API
-
-Use `http.server.request.duration` as your primary latency signal. In production, break this metric
-down by `http.route`, `http.request.method`, and `http.response.status_code`, then track p95 and p99
-per route and method. Keep successful and failed responses separated so error-path latency does not
-get hidden by healthy traffic.
+The examples below show what to monitor in production and how to break metrics down so you can
+quickly detect and isolate API, subgraph, cache, and GraphQL issues.
 
-### Monitor health of your subgraphs
+### Monitor latency of your GraphQL API
 
-Use `http.client.request.duration` and `http.client.active_requests` to monitor dependency health
-across your federated graph. Break these metrics down by `subgraph.name`,
-`http.response.status_code`, and `error.type` to identify which subgraph is driving tail latency or
-error spikes. When you need infrastructure-level debugging, add `server.address` and `server.port`
-to distinguish endpoint-level issues from service-level behavior.
+Use [`http.server.request.duration`](#metric-http-server-request-duration) as your primary latency
+signal.
 
-### Monitor cache effectiveness and planning pressure
+In production, break this metric down by `http.route`, `http.request.method`, and
+`http.response.status_code`, then track p95 and p99 per route and method. Keep successful and failed
+responses separated so error-path latency does not get hidden by healthy traffic.
 
-Use the cache metric families `hive.router.parse_cache.*`, `hive.router.validate_cache.*`,
-`hive.router.normalize_cache.*`, and `hive.router.plan_cache.*` to evaluate cache behavior over
-time. For request and duration metrics, split by `result` (`hit` and `miss`) so you can track hit
-ratio and miss latency per cache tier. Combine those views with cache size trends to catch sustained
-cache-efficiency regressions, especially in planning cache paths.
+### Monitor health of your subgraphs
 
-### Monitor GraphQL errors over time
+Use [`http.client.request.duration`](#metric-http-client-request-duration) and
+[`http.client.active_requests`](#metric-http-client-active-requests) to monitor dependency health
+across your federated graph.
 
-Use `hive.router.graphql.errors_total` and break it down by `code` to track both volume and error
-shape. In production, monitor how error-code distribution changes over time, not only total count,
-so you can separate validation issues from execution failures. Alert on sharp increases in
-`GRAPHQL_VALIDATION_FAILED`, `PLAN_EXECUTION_FAILED`, or `UNKNOWN`.
+Break these metrics down by `subgraph.name`, `http.response.status_code`, and `error.type` to
+identify which subgraph is driving tail latency or error spikes.
 
-## Troubleshooting
+### Monitor cache effectiveness and planning pressure
 
-When metrics are missing or incomplete, check in layers:
+Use the cache metrics to evaluate cache behavior over time.
 
-- exporter setup
-- instrument overrides
-- label overrides
-- transport reachability
+For request and duration metrics, split by `result` (`hit` and `miss`) so you can track hit ratio
+and miss latency per cache tier.
 
-If no metrics appear at all, verify exporter enablement, endpoint reachability, and credentials.
+### Monitor GraphQL errors over time
 
-If a configured instrument key is unknown, startup fails with a clear error and valid metric names.
+Use [`hive.router.graphql.errors_total`](#metric-hive-router-graphql-errors-total) and break it down
+by `code` to track both volume and error shape.
 
-If a configured attribute key is unknown, Router logs a warning and ignores it.
+In production, monitor how error-code distribution changes over time, not only total count, so you
+can separate validation issues from execution failures.
 
 ## Configuration reference
 
-For full options and defaults, see:
-
-- [telemetry configuration reference](/docs/router/configuration/telemetry)
+For full options and defaults, see
+[telemetry configuration reference](/docs/router/configuration/telemetry).

From f66246fbd81fb24bcbf6ce77e0551d50e8bb5a8a Mon Sep 17 00:00:00 2001
From: Kamil Kisiela <kamil.kisiela@gmail.com>
Date: Fri, 13 Feb 2026 12:47:30 +0100
Subject: [PATCH 05/14] asd

---
 .../router/configuration/telemetry.mdx        | 200 +++++++++++++++++-
 1 file changed, 199 insertions(+), 1 deletion(-)

diff --git a/packages/web/docs/src/content/router/configuration/telemetry.mdx b/packages/web/docs/src/content/router/configuration/telemetry.mdx
index b5f979f1087..0e8f8242d7d 100644
--- a/packages/web/docs/src/content/router/configuration/telemetry.mdx
+++ b/packages/web/docs/src/content/router/configuration/telemetry.mdx
@@ -5,7 +5,7 @@ title: 'telemetry'
 # telemetry
 
 The `telemetry` configuration controls client identification, Hive reporting, and OpenTelemetry
-tracing behavior in Hive Router.
+tracing and metrics behavior in Hive Router.
 
 ## client_identification
 
@@ -232,6 +232,204 @@ telemetry:
             x-api-key: key
 ```
 
+</details>
+
+</div>
+
+</details>
+
+## metrics
+
+Top-level OpenTelemetry metrics configuration.
+
+<details>
+  <summary>Show metrics configuration</summary>
+
+Metrics are enabled when at least one exporter is configured and enabled.
+
+| Field             | Type     | Default | Notes                                                                                 |
+| ----------------- | -------- | ------- | ------------------------------------------------------------------------------------- |
+| `exporters`       | `array`  | `[]`    | List of exporters used to send metrics. Supported kinds: `otlp`, `prometheus`.        |
+| `instrumentation` | `object` | `{}`    | Instrument behavior for metrics (histogram aggregation and per-instrument overrides). |
+
+<div id="telemetry-metrics-exporters" style={{marginTop: 10}}>
+<details>
+  <summary>`exporters`</summary>
+
+Each item configures one metrics exporter.
+
+Each item in this array defines one exporter instance, so you can configure multiple metrics
+destinations if needed.
+
+This reference documents OTLP and Prometheus exporter configuration.
+
+| Field     | Type      | Default | Notes                                                  |
+| --------- | --------- | ------- | ------------------------------------------------------ |
+| `kind`    | `string`  | -       | Exporter kind. Supported values: `otlp`, `prometheus`. |
+| `enabled` | `boolean` | `true`  | Enables or disables this exporter.                     |
+
+<details>
+  <summary>`otlp`</summary>
+
+| Field                | Type                 | Default      | Notes                                                             |
+| -------------------- | -------------------- | ------------ | ----------------------------------------------------------------- |
+| `kind`               | `string`             | -            | Must be `otlp`.                                                   |
+| `enabled`            | `boolean`            | `true`       | Enables or disables this exporter.                                |
+| `endpoint`           | `StringOrExpression` | -            | OTLP endpoint. Must be set explicitly.                            |
+| `protocol`           | `string`             | -            | OTLP transport protocol. Supported values: `http`, `grpc`.        |
+| `interval`           | `string`             | `60s`        | Interval between OTLP export attempts.                            |
+| `temporality`        | `string`             | `cumulative` | Aggregation temporality. Supported values: `cumulative`, `delta`. |
+| `max_export_timeout` | `string`             | `5s`         | Maximum time for one OTLP export attempt.                         |
+| `http`               | `object`             | -            | HTTP-specific OTLP settings (for `protocol: http`).               |
+| `grpc`               | `object`             | -            | gRPC-specific OTLP settings (for `protocol: grpc`).               |
+
+OTLP over HTTP:
+
+| Field          | Type     | Value / Default | Notes                                                         |
+| -------------- | -------- | --------------- | ------------------------------------------------------------- |
+| `protocol`     | `string` | `http`          | OTLP transport protocol.                                      |
+| `http.headers` | `object` | `{}`            | Map of header names to values (`string` or `{ expression }`). |
+
+```yaml filename="router.config.yaml"
+telemetry:
+  metrics:
+    exporters:
+      - kind: otlp
+        enabled: true
+        protocol: http
+        endpoint: https://otel-collector.example.com/v1/metrics
+        interval: 60s
+        temporality: cumulative
+        max_export_timeout: 5s
+        http:
+          headers:
+            x-otlp-header: value
+```
+
+OTLP over gRPC:
+
+| Field                  | Type     | Value / Default | Notes                                                                        |
+| ---------------------- | -------- | --------------- | ---------------------------------------------------------------------------- |
+| `protocol`             | `string` | `grpc`          | OTLP transport protocol.                                                     |
+| `grpc.metadata`        | `object` | `{}`            | Map of metadata keys to values (`string` or `{ expression }`).               |
+| `grpc.tls.domain_name` | `string` | -               | Domain name used to verify the server certificate.                           |
+| `grpc.tls.key`         | `string` | -               | Path to the client private key file.                                         |
+| `grpc.tls.cert`        | `string` | -               | Path to the client certificate file (PEM).                                   |
+| `grpc.tls.ca`          | `string` | -               | Path to the CA certificate file (PEM) used to verify the server certificate. |
+
+```yaml filename="router.config.yaml"
+telemetry:
+  metrics:
+    exporters:
+      - kind: otlp
+        enabled: true
+        protocol: grpc
+        endpoint: https://otel-collector.example.com:4317
+        interval: 60s
+        temporality: cumulative
+        max_export_timeout: 5s
+        grpc:
+          metadata:
+            x-api-key: key
+```
+
+</details>
+
+<details>
+  <summary>`prometheus`</summary>
+
+| Field     | Type      | Default    | Notes                                       |
+| --------- | --------- | ---------- | ------------------------------------------- |
+| `kind`    | `string`  | -          | Must be `prometheus`.                       |
+| `enabled` | `boolean` | `true`     | Enables/disables Prometheus metrics export. |
+| `port`    | `integer` | -          | Optional port for metrics endpoint.         |
+| `path`    | `string`  | `/metrics` | HTTP path exposed for scraping.             |
+
+```yaml filename="router.config.yaml"
+telemetry:
+  metrics:
+    exporters:
+      - kind: prometheus
+        enabled: true
+        port: 9090
+        path: /metrics
+```
+
+</details>
+
+</details>
+</div>
+
+<div id="telemetry-metrics-instrumentation" style={{marginTop: 10}}>
+<details>
+  <summary>`instrumentation`</summary>
+
+Controls histogram aggregation and per-instrument overrides.
+
+| Field              | Type     | Default                 | Notes                                                       |
+| ------------------ | -------- | ----------------------- | ----------------------------------------------------------- |
+| `common.histogram` | `object` | exponential aggregation | Histogram aggregation strategy for instrumented histograms. |
+| `instruments`      | `object` | `{}`                    | Map of metric name to `false`, `true`, or object override.  |
+
+<details>
+  <summary>`common.histogram`</summary>
+
+Set aggregation mode with `aggregation`.
+
+`explicit` aggregation:
+
+| Field            | Type       | Default | Notes                       |
+| ---------------- | ---------- | ------- | --------------------------- |
+| `aggregation`    | `string`   | -       | Must be `explicit`.         |
+| `boundaries`     | `number[]` | -       | Explicit bucket boundaries. |
+| `record_min_max` | `boolean`  | `false` | Record min/max values.      |
+
+`exponential` aggregation (default):
+
+| Field            | Type      | Default | Notes                           |
+| ---------------- | --------- | ------- | ------------------------------- |
+| `aggregation`    | `string`  | -       | Must be `exponential`.          |
+| `max_size`       | `integer` | `160`   | Max bucket count.               |
+| `max_scale`      | `integer` | `20`    | Max scale for bucket precision. |
+| `record_min_max` | `boolean` | `false` | Record min/max values.          |
+
+</details>
+
+<details>
+  <summary>`instruments`</summary>
+
+`instruments` is a map keyed by metric name. Value can be:
+
+- `false` to disable a metric
+- `true` to keep defaults
+- object to keep metric enabled and override attributes
+
+Object form supports:
+
+| Field        | Type     | Notes                                                                          |
+| ------------ | -------- | ------------------------------------------------------------------------------ |
+| `attributes` | `object` | Map of attribute name to `boolean` (`false` drops attribute, `true` keeps it). |
+
+```yaml filename="router.config.yaml"
+telemetry:
+  metrics:
+    instrumentation:
+      common:
+        histogram:
+          aggregation: exponential
+          max_size: 160
+          max_scale: 20
+      instruments:
+        http.server.request.duration: true
+        http.client.request.duration:
+          attributes:
+            subgraph.name: true
+            http.response.status_code: true
+            server.address: false
+```
+
+</details>
+
 </details>
 </div>
 

From 1c6f7d9e6a86de6113dc64eeaed39edca47a9c2f Mon Sep 17 00:00:00 2001
From: Kamil Kisiela <kamil.kisiela@gmail.com>
Date: Fri, 13 Feb 2026 13:50:36 +0100
Subject: [PATCH 06/14] asd

---
 .../router/configuration/telemetry.mdx        |  2 +-
 .../content/router/observability/metrics.mdx  | 80 ++++++++++---------
 2 files changed, 45 insertions(+), 37 deletions(-)

diff --git a/packages/web/docs/src/content/router/configuration/telemetry.mdx b/packages/web/docs/src/content/router/configuration/telemetry.mdx
index 0e8f8242d7d..32c5e1850fa 100644
--- a/packages/web/docs/src/content/router/configuration/telemetry.mdx
+++ b/packages/web/docs/src/content/router/configuration/telemetry.mdx
@@ -249,7 +249,7 @@ Metrics are enabled when at least one exporter is configured and enabled.
 
 | Field             | Type     | Default | Notes                                                                                 |
 | ----------------- | -------- | ------- | ------------------------------------------------------------------------------------- |
-| `exporters`       | `array`  | `[]`    | List of exporters used to send metrics. Supported kinds: `otlp`, `prometheus`.        |
+| `exporters`       | `array`  | `[]`    | List of exporters used to send metrics.        |
 | `instrumentation` | `object` | `{}`    | Instrument behavior for metrics (histogram aggregation and per-instrument overrides). |
 
 <div id="telemetry-metrics-exporters" style={{marginTop: 10}}>
diff --git a/packages/web/docs/src/content/router/observability/metrics.mdx b/packages/web/docs/src/content/router/observability/metrics.mdx
index bd60da693a5..c9a9534d724 100644
--- a/packages/web/docs/src/content/router/observability/metrics.mdx
+++ b/packages/web/docs/src/content/router/observability/metrics.mdx
@@ -16,23 +16,28 @@ instruments, and what each metric/label means in practice.
 
 ## Choose your metrics destination
 
-Hive Router supports two common metrics paths:
+Hive Router exposes metrics through two widely used integration patterns:
 
-- OTLP-compatible backends
-- Prometheus scraping
+- OTLP-based observability backends
+- Prometheus scrape endpoints
 
-In practice, teams with existing OpenTelemetry pipelines usually choose OTLP. Teams with existing
-Prometheus/Grafana stacks usually choose Prometheus.
+Most teams already running an OpenTelemetry pipeline tend to integrate via OTLP, while teams built
+around Prometheus and Grafana typically stick with Prometheus scraping.
 
 ### Send metrics to OTLP-compatible backends
 
-Hive Router can export metrics directly to any OTLP-compatible destination, including OpenTelemetry
-Collector and vendor backends that support OTLP ingestion, either through HTTP or gRPC.
+Hive Router can export metrics using OTLP to standard OpenTelemetry pipelines, including the
+OpenTelemetry Collector and vendor backends that support OTLP ingestion over HTTP or gRPC.
 
-After enabling the exporter, send traffic through the router and verify that new metric series
-appear in your backend (for example HTTP server/client duration, cache metrics, and supergraph
-metrics). If metrics are missing, start by validating endpoint reachability and auth credentials,
-then check exporter protocol alignment (HTTP vs gRPC).
+After enabling the exporter, generate some traffic through the router and confirm that new metric
+series appear in your backend (for example HTTP server/client latency, cache metrics, and supergraph
+execution metrics).
+
+If metrics do not appear, verify:
+
+- Endpoint reachability (network, DNS, TLS)
+- Authentication credentials or headers
+- Exporter protocol matches the backend (OTLP/HTTP vs OTLP/gRPC)
 
 <Tabs items={["OTLP over HTTP", "OTLP over gRPC"]}>
 
@@ -86,11 +91,12 @@ telemetry:
 
 ### Expose metrics for Prometheus scraping
 
-If your observability stack is Prometheus-first, Hive Router can expose a scrape endpoint that
-Prometheus polls on a schedule.
+If your observability stack is Prometheus-first, Hive Router can expose an HTTP endpoint that
+Prometheus scrapes at its configured interval.
 
-The `port` and `path` settings define where Router serves metrics. Prometheus must be able to reach
-that address from its runtime environment (local network, Kubernetes service, or VM network path).
+The `port` and `path` settings define the address where the Router exposes metrics. Prometheus must
+be able to reach that address from its runtime environment (local network, Kubernetes service, or VM
+network path).
 
 <Callout type="note">
   If `port` is not set, or is the same as the main HTTP server port, the Router exposes metrics
@@ -98,11 +104,10 @@ that address from its runtime environment (local network, Kubernetes service, or
   starts a separate HTTP server dedicated solely to the Prometheus metrics endpoint.
 </Callout>
 
-In production, make sure this endpoint is reachable only by trusted scrapers and that any ingress or
-firewall policy allows Prometheus access. Once configured, confirm the target appears as healthy
-(`UP`) in Prometheus and then verify expected series are present (for example
-`http.server.request.duration`, `http.client.request.duration`, and `hive.router.*` cache/supergraph
-metrics).
+In production, make sure this endpoint is reachable only by trusted scrapers (for example via
+network policy, firewall rules, or private ingress). Once configured, confirm the target appears as
+healthy in Prometheus and then verify expected series are present (for example
+`http.server.request.duration`, `http.client.request.duration`).
 
 ```yaml filename="router.config.yaml"
 telemetry:
@@ -116,8 +121,8 @@ telemetry:
 
 ## Production baseline
 
-For production workloads, start with one primary exporter, a clear service identity, and default
-instrument settings.
+For production workloads, start with a single primary exporter, define a clear service identity, and
+keep default instrumentation settings.
 
 ```yaml filename="router.config.yaml"
 telemetry:
@@ -137,8 +142,9 @@ telemetry:
         max_export_timeout: 5s
 ```
 
-This is a safe baseline and works well before introducing instrumentation-level customization. By
-default, all metrics and labels are exposed.
+This is a safe baseline and works well before introducing instrumentation-level customization.
+Additional exporters can be added later, but starting with one simplifies validation and
+troubleshooting.
 
 ## Customize instrumentation
 
@@ -172,7 +178,8 @@ Attribute override behavior:
 
 ### GraphQL
 
-GraphQL metrics focus on validation, parsing and execution-time errors surfaced by the router.
+GraphQL metrics capture errors surfaced by the router across all stages of a GraphQL request
+lifecycle.
 
 <MetricsSection
   metrics={[
@@ -246,7 +253,7 @@ Supergraph metrics cover polling and processing lifecycle of schema updates.
 
 ### HTTP server
 
-HTTP server metrics describe inbound client traffic handled by the router.
+HTTP server metrics capture inbound client traffic processed by the router.
 
 <MetricsSection
   metrics={[
@@ -358,7 +365,7 @@ HTTP server metrics describe inbound client traffic handled by the router.
 
 ### HTTP client
 
-HTTP client metrics describe outbound requests to the subgraphs, made by the router.
+HTTP client metrics capture outbound requests the router makes to subgraphs.
 
 <MetricsSection
   metrics={[
@@ -495,7 +502,8 @@ HTTP client metrics describe outbound requests to the subgraphs, made by the rou
 
 ### Cache
 
-Cache metrics track lookup behavior and cache size across router caches.
+Cache metrics track lookup behavior and cache size across router caches used during request
+preparation and planning stages.
 
 #### Parsing cache
 
@@ -624,17 +632,17 @@ These labels are shared by cache lookup counters and duration histograms.
 
 ## What to monitor in production
 
-The examples below show what to monitor in production and how to break metrics down so you can
-quickly detect and isolate API, subgraph, cache, and GraphQL issues.
+The examples below show which signals to monitor in production and how to break them down so you can
+quickly isolate API, subgraph, cache, and GraphQL issues.
 
-### Monitor latency of your GraphQL API
+### Monitor end-to-end latency of your GraphQL API
 
 Use [`http.server.request.duration`](#metric-http-server-request-duration) as your primary latency
 signal.
 
 In production, break this metric down by `http.route`, `http.request.method`, and
-`http.response.status_code`, then track p95 and p99 per route and method. Keep successful and failed
-responses separated so error-path latency does not get hidden by healthy traffic.
+`http.response.status_code`, then track p95 and p99 latency per route and method. Keep successful
+and failed responses separated so error-path latency does not get hidden by healthy traffic.
 
 ### Monitor health of your subgraphs
 
@@ -647,15 +655,15 @@ identify which subgraph is driving tail latency or error spikes.
 
 ### Monitor cache effectiveness and planning pressure
 
-Use the cache metrics to evaluate cache behavior over time.
+Use the cache metrics to evaluate cache hit ratio, miss cost, and pressure over time.
 
 For request and duration metrics, split by `result` (`hit` and `miss`) so you can track hit ratio
-and miss latency per cache tier.
+and miss latency per cache kind.
 
 ### Monitor GraphQL errors over time
 
 Use [`hive.router.graphql.errors_total`](#metric-hive-router-graphql-errors-total) and break it down
-by `code` to track both volume and error shape.
+by `code` to track both volume and error distribution.
 
 In production, monitor how error-code distribution changes over time, not only total count, so you
 can separate validation issues from execution failures.

From 45d3c3add6b3efc7e66ae020ada257eba3c2fdee Mon Sep 17 00:00:00 2001
From: Kamil Kisiela <kamil.kisiela@gmail.com>
Date: Fri, 13 Feb 2026 14:16:00 +0100
Subject: [PATCH 07/14] asd

---
 packages/web/docs/src/components/otel-metrics/metric-card.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/web/docs/src/components/otel-metrics/metric-card.tsx b/packages/web/docs/src/components/otel-metrics/metric-card.tsx
index 116937dbb65..5f7d1df83d0 100644
--- a/packages/web/docs/src/components/otel-metrics/metric-card.tsx
+++ b/packages/web/docs/src/components/otel-metrics/metric-card.tsx
@@ -87,7 +87,7 @@ export function MetricCard({ name, type, unit, description, labels }: MetricCard
       className="group scroll-mt-20 overflow-hidden rounded-lg border border-gray-200 bg-white transition-shadow duration-200 hover:shadow-md dark:border-neutral-800 dark:bg-neutral-900 dark:hover:shadow-black/30"
     >
       <div className="p-5">
-        <div className="mb-3 flex items-start justify-between gap-4">
+        <div className="mb-3 flex items-center justify-between gap-4">
           <div className="min-w-0 flex-1">
             <div className="flex items-center gap-1.5">
               <code className="break-all text-sm font-semibold text-gray-900 dark:text-slate-100">

From 45c4300cb0f012a6e7913742569b8ad484c48a19 Mon Sep 17 00:00:00 2001
From: Kamil Kisiela <kamil.kisiela@gmail.com>
Date: Fri, 13 Feb 2026 15:36:10 +0100
Subject: [PATCH 08/14] asd

---
 .../router/configuration/telemetry.mdx        | 47 ++++++++++++++-----
 .../content/router/observability/metrics.mdx  | 32 +++++++++++++
 2 files changed, 67 insertions(+), 12 deletions(-)

diff --git a/packages/web/docs/src/content/router/configuration/telemetry.mdx b/packages/web/docs/src/content/router/configuration/telemetry.mdx
index 32c5e1850fa..29aa3895052 100644
--- a/packages/web/docs/src/content/router/configuration/telemetry.mdx
+++ b/packages/web/docs/src/content/router/configuration/telemetry.mdx
@@ -376,21 +376,40 @@ Controls histogram aggregation and per-instrument overrides.
 
 Set aggregation mode with `aggregation`.
 
-`explicit` aggregation:
+`explicit` aggregation (default):
 
-| Field            | Type       | Default | Notes                       |
-| ---------------- | ---------- | ------- | --------------------------- |
-| `aggregation`    | `string`   | -       | Must be `explicit`.         |
-| `boundaries`     | `number[]` | -       | Explicit bucket boundaries. |
-| `record_min_max` | `boolean`  | `false` | Record min/max values.      |
+| Field             | Type     | Default | Notes                                                               |
+| ----------------- | -------- | ------- | ------------------------------------------------------------------- |
+| `aggregation`     | `string` | -       | Must be `explicit`.                                                 |
+| `seconds`         | `object` | -       | Explicit histogram config for metrics with unit `s`.                |
+| `bytes`           | `object` | -       | Explicit histogram config for metrics with unit `By`.               |
 
-`exponential` aggregation (default):
+`seconds` and `bytes` fields:
+
+| Field            | Type       | Default | Notes                                                        |
+| ---------------- | ---------- | ------- | ------------------------------------------------------------ |
+| `buckets`        | `number[] \| string[]` | varies  | Explicit bucket upper bounds. Must be non-empty and increasing. |
+| `record_min_max` | `boolean`  | `false` | Record min/max values for this unit bucket set.              |
+
+Default explicit buckets:
+
+- `seconds.buckets`: `[0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1, 2.5, 5, 7.5, 10]`
+- `bytes.buckets`: `[128, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576, 2097152, 3145728, 4194304, 5242880]`
+
+Bucket format rules:
+
+- `buckets` can be either all numbers or all strings.
+- mixed arrays are not allowed.
+- for `seconds.buckets`, string values are parsed as durations (for example `"5ms"`, `"1s"`).
+- for `bytes.buckets`, string values are parsed as human-readable sizes (for example `"1KB"`, `"5MB"`).
+
+`exponential` aggregation:
 
 | Field            | Type      | Default | Notes                           |
 | ---------------- | --------- | ------- | ------------------------------- |
 | `aggregation`    | `string`  | -       | Must be `exponential`.          |
-| `max_size`       | `integer` | `160`   | Max bucket count.               |
-| `max_scale`      | `integer` | `20`    | Max scale for bucket precision. |
+| `max_size`       | `integer` | -       | Max bucket count. Required.     |
+| `max_scale`      | `integer` | -       | Max scale for bucket precision. Required. |
 | `record_min_max` | `boolean` | `false` | Record min/max values.          |
 
 </details>
@@ -416,9 +435,13 @@ telemetry:
     instrumentation:
       common:
         histogram:
-          aggregation: exponential
-          max_size: 160
-          max_scale: 20
+          aggregation: explicit
+          seconds:
+            buckets: ["5ms", "10ms", "25ms", "50ms", "75ms", "100ms", "250ms", "500ms", "750ms", "1s", "2.5s", "5s", "7.5s", "10s"]
+            record_min_max: false
+          bytes:
+            buckets: ["128B", "512B", "1KB", "2KB", "4KB", "8KB", "16KB", "32KB", "64KB", "128KB", "256KB", "512KB", "1MB", "2MB", "3MB", "4MB", "5MB"]
+            record_min_max: false
       instruments:
         http.server.request.duration: true
         http.client.request.duration:
diff --git a/packages/web/docs/src/content/router/observability/metrics.mdx b/packages/web/docs/src/content/router/observability/metrics.mdx
index c9a9534d724..68d5a89e78f 100644
--- a/packages/web/docs/src/content/router/observability/metrics.mdx
+++ b/packages/web/docs/src/content/router/observability/metrics.mdx
@@ -174,6 +174,38 @@ Attribute override behavior:
 - `false` - drop label from that metric
 - `true` - keep label (all labels are enabled by default)
 
+Histogram aggregation can also be customized under `telemetry.metrics.instrumentation.common.histogram`.
+
+- `explicit` (default) uses unit-specific bucket sets.
+- `exponential` uses one shared exponential strategy for all histogram metrics.
+- `explicit` lets you configure unit-specific buckets:
+  - `seconds` for histogram unit `s`
+  - `bytes` for histogram unit `By`
+
+Bucket format rules:
+
+- `buckets` can be either all numbers or all strings.
+- mixed arrays are not allowed.
+- `seconds.buckets` string values are parsed as durations (for example `"5ms"`, `"1s"`).
+- `bytes.buckets` string values are parsed as human-readable sizes (for example `"1KB"`, `"5MB"`).
+
+In `explicit` mode, histogram units other than `s` and `By` fail startup.
+
+```yaml filename="router.config.yaml"
+telemetry:
+  metrics:
+    instrumentation:
+      common:
+        histogram:
+          aggregation: explicit
+          seconds:
+            buckets: ["5ms", "10ms", "25ms", "50ms", "75ms", "100ms", "250ms", "500ms", "750ms", "1s", "2.5s", "5s", "7.5s", "10s"]
+            record_min_max: false
+          bytes:
+            buckets: ["128B", "512B", "1KB", "2KB", "4KB", "8KB", "16KB", "32KB", "64KB", "128KB", "256KB", "512KB", "1MB", "2MB", "3MB", "4MB", "5MB"]
+            record_min_max: false
+```
+
 ## Metrics reference
 
 ### GraphQL

From 5f1fe954786e3a2cfa8d0ce4e2f7a1c37a0c8f93 Mon Sep 17 00:00:00 2001
From: Kamil Kisiela <kamil.kisiela@gmail.com>
Date: Fri, 13 Feb 2026 15:52:53 +0100
Subject: [PATCH 09/14] asd

---
 .../router/configuration/telemetry.mdx        | 100 ++++++++++++------
 .../content/router/observability/metrics.mdx  |  53 ++++++++--
 2 files changed, 115 insertions(+), 38 deletions(-)

diff --git a/packages/web/docs/src/content/router/configuration/telemetry.mdx b/packages/web/docs/src/content/router/configuration/telemetry.mdx
index 29aa3895052..9b42ca64555 100644
--- a/packages/web/docs/src/content/router/configuration/telemetry.mdx
+++ b/packages/web/docs/src/content/router/configuration/telemetry.mdx
@@ -249,7 +249,7 @@ Metrics are enabled when at least one exporter is configured and enabled.
 
 | Field             | Type     | Default | Notes                                                                                 |
 | ----------------- | -------- | ------- | ------------------------------------------------------------------------------------- |
-| `exporters`       | `array`  | `[]`    | List of exporters used to send metrics.        |
+| `exporters`       | `array`  | `[]`    | List of exporters used to send metrics.                                               |
 | `instrumentation` | `object` | `{}`    | Instrument behavior for metrics (histogram aggregation and per-instrument overrides). |
 
 <div id="telemetry-metrics-exporters" style={{marginTop: 10}}>
@@ -271,17 +271,17 @@ This reference documents OTLP and Prometheus exporter configuration.
 <details>
   <summary>`otlp`</summary>
 
-| Field                | Type                 | Default      | Notes                                                             |
-| -------------------- | -------------------- | ------------ | ----------------------------------------------------------------- |
-| `kind`               | `string`             | -            | Must be `otlp`.                                                   |
-| `enabled`            | `boolean`            | `true`       | Enables or disables this exporter.                                |
-| `endpoint`           | `StringOrExpression` | -            | OTLP endpoint. Must be set explicitly.                            |
-| `protocol`           | `string`             | -            | OTLP transport protocol. Supported values: `http`, `grpc`.        |
-| `interval`           | `string`             | `60s`        | Interval between OTLP export attempts.                            |
-| `temporality`        | `string`             | `cumulative` | Aggregation temporality. Supported values: `cumulative`, `delta`. |
-| `max_export_timeout` | `string`             | `5s`         | Maximum time for one OTLP export attempt.                         |
-| `http`               | `object`             | -            | HTTP-specific OTLP settings (for `protocol: http`).               |
-| `grpc`               | `object`             | -            | gRPC-specific OTLP settings (for `protocol: grpc`).               |
+| Field                                                                                                               | Type                 | Default      | Notes                                                             |
+| ------------------------------------------------------------------------------------------------------------------- | -------------------- | ------------ | ----------------------------------------------------------------- |
+| `kind`                                                                                                              | `string`             | -            | Must be `otlp`.                                                   |
+| `enabled`                                                                                                           | `boolean`            | `true`       | Enables or disables this exporter.                                |
+| `endpoint`                                                                                                          | `StringOrExpression` | -            | OTLP endpoint. Must be set explicitly.                            |
+| `protocol`                                                                                                          | `string`             | -            | OTLP transport protocol. Supported values: `http`, `grpc`.        |
+| `interval`                                                                                                          | `string`             | `60s`        | Interval between OTLP export attempts.                            |
+| [`temporality`](https://opentelemetry.io/docs/specs/otel/metrics/supplementary-guidelines/#aggregation-temporality) | `string`             | `cumulative` | Aggregation temporality. Supported values: `cumulative`, `delta`. |
+| `max_export_timeout`                                                                                                | `string`             | `5s`         | Maximum time for one OTLP export attempt.                         |
+| `http`                                                                                                              | `object`             | -            | HTTP-specific OTLP settings (for `protocol: http`).               |
+| `grpc`                                                                                                              | `object`             | -            | gRPC-specific OTLP settings (for `protocol: grpc`).               |
 
 OTLP over HTTP:
 
@@ -378,39 +378,42 @@ Set aggregation mode with `aggregation`.
 
 `explicit` aggregation (default):
 
-| Field             | Type     | Default | Notes                                                               |
-| ----------------- | -------- | ------- | ------------------------------------------------------------------- |
-| `aggregation`     | `string` | -       | Must be `explicit`.                                                 |
-| `seconds`         | `object` | -       | Explicit histogram config for metrics with unit `s`.                |
-| `bytes`           | `object` | -       | Explicit histogram config for metrics with unit `By`.               |
+| Field         | Type     | Default | Notes                                                 |
+| ------------- | -------- | ------- | ----------------------------------------------------- |
+| `aggregation` | `string` | -       | Must be `explicit`.                                   |
+| `seconds`     | `object` | -       | Explicit histogram config for metrics with unit `s`.  |
+| `bytes`       | `object` | -       | Explicit histogram config for metrics with unit `By`. |
 
 `seconds` and `bytes` fields:
 
-| Field            | Type       | Default | Notes                                                        |
-| ---------------- | ---------- | ------- | ------------------------------------------------------------ |
+| Field            | Type                   | Default | Notes                                                           |
+| ---------------- | ---------------------- | ------- | --------------------------------------------------------------- |
 | `buckets`        | `number[] \| string[]` | varies  | Explicit bucket upper bounds. Must be non-empty and increasing. |
-| `record_min_max` | `boolean`  | `false` | Record min/max values for this unit bucket set.              |
+| `record_min_max` | `boolean`              | `false` | Record min/max values for this unit bucket set.                 |
 
 Default explicit buckets:
 
 - `seconds.buckets`: `[0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1, 2.5, 5, 7.5, 10]`
-- `bytes.buckets`: `[128, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576, 2097152, 3145728, 4194304, 5242880]`
+- `bytes.buckets`:
+  `[128, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576, 2097152, 3145728, 4194304, 5242880]`
 
 Bucket format rules:
 
 - `buckets` can be either all numbers or all strings.
 - mixed arrays are not allowed.
 - for `seconds.buckets`, string values are parsed as durations (for example `"5ms"`, `"1s"`).
-- for `bytes.buckets`, string values are parsed as human-readable sizes (for example `"1KB"`, `"5MB"`).
+- for `bytes.buckets`, string values are parsed as human-readable sizes (for example `"1KB"`,
+  `"5MB"`).
 
-`exponential` aggregation:
+[`exponential`](https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram)
+aggregation:
 
-| Field            | Type      | Default | Notes                           |
-| ---------------- | --------- | ------- | ------------------------------- |
-| `aggregation`    | `string`  | -       | Must be `exponential`.          |
-| `max_size`       | `integer` | -       | Max bucket count. Required.     |
-| `max_scale`      | `integer` | -       | Max scale for bucket precision. Required. |
-| `record_min_max` | `boolean` | `false` | Record min/max values.          |
+| Field                                                                                                               | Type      | Default | Notes                                     |
+| ------------------------------------------------------------------------------------------------------------------- | --------- | ------- | ----------------------------------------- |
+| `aggregation`                                                                                                       | `string`  | -       | Must be `exponential`.                    |
+| [`max_size`](https://opentelemetry.io/docs/specs/otel/metrics/sdk/#base2-exponential-bucket-histogram-aggregation)  | `integer` | -       | Max bucket count. Required.               |
+| [`max_scale`](https://opentelemetry.io/docs/specs/otel/metrics/sdk/#base2-exponential-bucket-histogram-aggregation) | `integer` | -       | Max scale for bucket precision. Required. |
+| `record_min_max`                                                                                                    | `boolean` | `false` | Record min/max values.                    |
 
 </details>
 
@@ -437,10 +440,45 @@ telemetry:
         histogram:
           aggregation: explicit
           seconds:
-            buckets: ["5ms", "10ms", "25ms", "50ms", "75ms", "100ms", "250ms", "500ms", "750ms", "1s", "2.5s", "5s", "7.5s", "10s"]
+            buckets:
+              [
+                '5ms',
+                '10ms',
+                '25ms',
+                '50ms',
+                '75ms',
+                '100ms',
+                '250ms',
+                '500ms',
+                '750ms',
+                '1s',
+                '2.5s',
+                '5s',
+                '7.5s',
+                '10s'
+              ]
             record_min_max: false
           bytes:
-            buckets: ["128B", "512B", "1KB", "2KB", "4KB", "8KB", "16KB", "32KB", "64KB", "128KB", "256KB", "512KB", "1MB", "2MB", "3MB", "4MB", "5MB"]
+            buckets:
+              [
+                '128B',
+                '512B',
+                '1KB',
+                '2KB',
+                '4KB',
+                '8KB',
+                '16KB',
+                '32KB',
+                '64KB',
+                '128KB',
+                '256KB',
+                '512KB',
+                '1MB',
+                '2MB',
+                '3MB',
+                '4MB',
+                '5MB'
+              ]
             record_min_max: false
       instruments:
         http.server.request.duration: true
diff --git a/packages/web/docs/src/content/router/observability/metrics.mdx b/packages/web/docs/src/content/router/observability/metrics.mdx
index 68d5a89e78f..68b810e5aaa 100644
--- a/packages/web/docs/src/content/router/observability/metrics.mdx
+++ b/packages/web/docs/src/content/router/observability/metrics.mdx
@@ -174,17 +174,21 @@ Attribute override behavior:
 - `false` - drop label from that metric
 - `true` - keep label (all labels are enabled by default)
 
-Histogram aggregation can also be customized under `telemetry.metrics.instrumentation.common.histogram`.
+Histogram aggregation can also be customized under
+`telemetry.metrics.instrumentation.common.histogram`.
 
-- `explicit` (default) uses unit-specific bucket sets.
-- `exponential` uses one shared exponential strategy for all histogram metrics.
-- `explicit` lets you configure unit-specific buckets:
+- [`explicit`](https://opentelemetry.io/docs/specs/otel/metrics/data-model/#histogram) (default)
+  uses unit-specific bucket sets. Lets you configure unit-specific buckets:
   - `seconds` for histogram unit `s`
   - `bytes` for histogram unit `By`
+- [`exponential`](https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram)
+  uses one shared exponential strategy for all histogram metrics.
+- `record_min_max controls whether min and max are reported for histogram points.
 
 Bucket format rules:
 
-- `buckets` can be either all numbers or all strings.
+- [`buckets`](https://opentelemetry.io/docs/specs/otel/metrics/sdk/#explicit-bucket-histogram-aggregation)
+  can be either all numbers or all strings.
 - mixed arrays are not allowed.
 - `seconds.buckets` string values are parsed as durations (for example `"5ms"`, `"1s"`).
 - `bytes.buckets` string values are parsed as human-readable sizes (for example `"1KB"`, `"5MB"`).
@@ -199,10 +203,45 @@ telemetry:
         histogram:
           aggregation: explicit
           seconds:
-            buckets: ["5ms", "10ms", "25ms", "50ms", "75ms", "100ms", "250ms", "500ms", "750ms", "1s", "2.5s", "5s", "7.5s", "10s"]
+            buckets:
+              [
+                '5ms',
+                '10ms',
+                '25ms',
+                '50ms',
+                '75ms',
+                '100ms',
+                '250ms',
+                '500ms',
+                '750ms',
+                '1s',
+                '2.5s',
+                '5s',
+                '7.5s',
+                '10s'
+              ]
             record_min_max: false
           bytes:
-            buckets: ["128B", "512B", "1KB", "2KB", "4KB", "8KB", "16KB", "32KB", "64KB", "128KB", "256KB", "512KB", "1MB", "2MB", "3MB", "4MB", "5MB"]
+            buckets:
+              [
+                '128B',
+                '512B',
+                '1KB',
+                '2KB',
+                '4KB',
+                '8KB',
+                '16KB',
+                '32KB',
+                '64KB',
+                '128KB',
+                '256KB',
+                '512KB',
+                '1MB',
+                '2MB',
+                '3MB',
+                '4MB',
+                '5MB'
+              ]
             record_min_max: false
 ```
 

From 22520f2c16bd2c59b012c2fa2a4e61c396deb0f7 Mon Sep 17 00:00:00 2001
From: Kamil Kisiela <kamil.kisiela@gmail.com>
Date: Fri, 13 Feb 2026 21:40:26 +0100
Subject: [PATCH 10/14] Update metrics.mdx

---
 .../content/router/observability/metrics.mdx  | 26 ++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/packages/web/docs/src/content/router/observability/metrics.mdx b/packages/web/docs/src/content/router/observability/metrics.mdx
index 68b810e5aaa..c7e336f4f7a 100644
--- a/packages/web/docs/src/content/router/observability/metrics.mdx
+++ b/packages/web/docs/src/content/router/observability/metrics.mdx
@@ -340,7 +340,9 @@ HTTP server metrics capture inbound client traffic processed by the router.
         'network.protocol.name',
         'network.protocol.version',
         'url.scheme',
-        'error.type'
+        'error.type',
+        'graphql.operation.name',
+        'graphql.operation.type'
       ]
     },
     {
@@ -355,7 +357,9 @@ HTTP server metrics capture inbound client traffic processed by the router.
         'network.protocol.name',
         'network.protocol.version',
         'url.scheme',
-        'error.type'
+        'error.type',
+        'graphql.operation.name',
+        'graphql.operation.type'
       ]
     },
     {
@@ -370,7 +374,9 @@ HTTP server metrics capture inbound client traffic processed by the router.
         'network.protocol.name',
         'network.protocol.version',
         'url.scheme',
-        'error.type'
+        'error.type',
+        'graphql.operation.name',
+        'graphql.operation.type'
       ]
     },
     {
@@ -430,6 +436,20 @@ HTTP server metrics capture inbound client traffic processed by the router.
       meaning: 'Error classification for failed requests',
       typicalValues: ['status code >= 400'],
       notes: 'Only set for failed requests'
+    },
+    {
+      name: 'graphql.operation.name',
+      meaning: 'GraphQL operation name associated with the HTTP request',
+      typicalValues: ['UsersQuery', 'IntrospectionQuery', 'UNKNOWN'],
+      notes:
+        'Used by http.server.request.duration, http.server.request.body.size, and http.server.response.body.size'
+    },
+    {
+      name: 'graphql.operation.type',
+      meaning: 'GraphQL operation type',
+      typicalValues: ['query', 'mutation', 'subscription'],
+      notes:
+        'Used by http.server.request.duration, http.server.request.body.size, and http.server.response.body.size. Omitted when unknown'
     }
   ]}
 />

From d53624ff9dee4fab73a99d9da64c377f57083f61 Mon Sep 17 00:00:00 2001
From: Kamil Kisiela <kamil.kisiela@gmail.com>
Date: Mon, 16 Feb 2026 12:43:41 +0100
Subject: [PATCH 11/14] Update metrics.mdx

---
 .../content/router/observability/metrics.mdx  | 23 ++++++++++++++-----
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/packages/web/docs/src/content/router/observability/metrics.mdx b/packages/web/docs/src/content/router/observability/metrics.mdx
index c7e336f4f7a..408431f77a4 100644
--- a/packages/web/docs/src/content/router/observability/metrics.mdx
+++ b/packages/web/docs/src/content/router/observability/metrics.mdx
@@ -342,7 +342,8 @@ HTTP server metrics capture inbound client traffic processed by the router.
         'url.scheme',
         'error.type',
         'graphql.operation.name',
-        'graphql.operation.type'
+        'graphql.operation.type',
+        'graphql.response.status'
       ]
     },
     {
@@ -359,7 +360,8 @@ HTTP server metrics capture inbound client traffic processed by the router.
         'url.scheme',
         'error.type',
         'graphql.operation.name',
-        'graphql.operation.type'
+        'graphql.operation.type',
+        'graphql.response.status'
       ]
     },
     {
@@ -376,7 +378,8 @@ HTTP server metrics capture inbound client traffic processed by the router.
         'url.scheme',
         'error.type',
         'graphql.operation.name',
-        'graphql.operation.type'
+        'graphql.operation.type',
+        'graphql.response.status'
       ]
     },
     {
@@ -450,6 +453,13 @@ HTTP server metrics capture inbound client traffic processed by the router.
       typicalValues: ['query', 'mutation', 'subscription'],
       notes:
         'Used by http.server.request.duration, http.server.request.body.size, and http.server.response.body.size. Omitted when unknown'
+    },
+    {
+      name: 'graphql.response.status',
+      meaning: 'GraphQL response status for the request',
+      typicalValues: ['ok', 'error'],
+      notes:
+        'Used by http.server.request.duration, http.server.request.body.size, and http.server.response.body.size. "error" indicates the GraphQL response contains at least one error'
     }
   ]}
 />
@@ -731,9 +741,10 @@ quickly isolate API, subgraph, cache, and GraphQL issues.
 Use [`http.server.request.duration`](#metric-http-server-request-duration) as your primary latency
 signal.
 
-In production, break this metric down by `http.route`, `http.request.method`, and
-`http.response.status_code`, then track p95 and p99 latency per route and method. Keep successful
-and failed responses separated so error-path latency does not get hidden by healthy traffic.
+In production, break this metric down by `http.route`, `http.request.method`,
+`http.response.status_code`, and/or `graphql.response.status`, then track p95 and p99 latency per
+route and method. Keep successful and failed responses separated so error-path latency does not get
+hidden by healthy traffic.
 
 ### Monitor health of your subgraphs
 

From 26bfd56864b278b24e3ff211dd89bc34886d45a7 Mon Sep 17 00:00:00 2001
From: Kamil Kisiela <kamil.kisiela@gmail.com>
Date: Mon, 16 Feb 2026 14:01:41 +0100
Subject: [PATCH 12/14] Update metrics.mdx

---
 .../src/content/router/observability/metrics.mdx | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/packages/web/docs/src/content/router/observability/metrics.mdx b/packages/web/docs/src/content/router/observability/metrics.mdx
index 408431f77a4..c0443ba5dcd 100644
--- a/packages/web/docs/src/content/router/observability/metrics.mdx
+++ b/packages/web/docs/src/content/router/observability/metrics.mdx
@@ -484,7 +484,8 @@ HTTP client metrics capture outbound requests the router makes to subgraphs.
         'url.scheme',
         'subgraph.name',
         'http.response.status_code',
-        'error.type'
+        'error.type',
+        'graphql.response.status'
       ]
     },
     {
@@ -501,7 +502,8 @@ HTTP client metrics capture outbound requests the router makes to subgraphs.
         'url.scheme',
         'subgraph.name',
         'http.response.status_code',
-        'error.type'
+        'error.type',
+        'graphql.response.status'
       ]
     },
     {
@@ -518,7 +520,8 @@ HTTP client metrics capture outbound requests the router makes to subgraphs.
         'url.scheme',
         'subgraph.name',
         'http.response.status_code',
-        'error.type'
+        'error.type',
+        'graphql.response.status'
       ]
     },
     {
@@ -597,6 +600,13 @@ HTTP client metrics capture outbound requests the router makes to subgraphs.
       meaning: 'Error classification',
       typicalValues: ['400', 'SUBGRAPH_REQUEST_FAILURE', '...'],
       notes: 'Numeric status code >= 400 or execution error code string'
+    },
+    {
+      name: 'graphql.response.status',
+      meaning: 'GraphQL response status for the subgraph request',
+      typicalValues: ['ok', 'error'],
+      notes:
+        'Set to "ok" when the parsed subgraph response has no GraphQL errors. Set to "error" when the subgraph response includes GraphQL errors or when transport/deserialization fails.'
     }
   ]}
 />

From d31bf7865d91c02a7a5ab1ed78b4e2c6e5f18be8 Mon Sep 17 00:00:00 2001
From: Kamil Kisiela <kamil.kisiela@gmail.com>
Date: Mon, 16 Feb 2026 16:05:55 +0100
Subject: [PATCH 13/14] cardinality considirations

---
 .../content/router/observability/metrics.mdx  | 37 ++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/packages/web/docs/src/content/router/observability/metrics.mdx b/packages/web/docs/src/content/router/observability/metrics.mdx
index c0443ba5dcd..d31ed9c9d37 100644
--- a/packages/web/docs/src/content/router/observability/metrics.mdx
+++ b/packages/web/docs/src/content/router/observability/metrics.mdx
@@ -146,10 +146,45 @@ This is a safe baseline and works well before introducing instrumentation-level
 Additional exporters can be added later, but starting with one simplifies validation and
 troubleshooting.
 
+### Cardinality considerations
+
+For production workloads, consider disabling `graphql.operation.name` label or even `graphql.operation.type` on high-volume metrics.
+
+<Callout type="warning">
+  `graphql.operation.name` can create very high-cardinality metrics.
+
+  Operation names come from client requests. Without persisted operations, clients can send many
+  distinct operation names (or random names), which can rapidly increase cardinality and cost in
+  Prometheus and OTLP backends.
+</Callout>
+
+```yaml filename="router.config.yaml"
+telemetry:
+  metrics:
+    instrumentation:
+      instruments:
+        http.server.request.duration:
+          attributes:
+            graphql.operation.name: false
+        http.server.request.body.size:
+          attributes:
+            graphql.operation.name: false
+        http.server.response.body.size:
+          attributes:
+            graphql.operation.name: false
+```
+
 ## Customize instrumentation
 
 You can override behavior per metric under `telemetry.metrics.instrumentation.instruments`.
 
+<Callout type="tip">
+    Disable non-essential labels to control cost, since each additional label value increases active time-series cardinality - a primary billing and performance driver in platforms like Grafana and Datadog.
+</Callout>
+
+For production guidance on label cardinality (especially `graphql.operation.name`), see
+[Cardinality considerations](#cardinality-considerations).
+
 - `false` disables a metric.
 - `true` keeps default behavior.
 - object form enables metric + optional attribute overrides.
@@ -445,7 +480,7 @@ HTTP server metrics capture inbound client traffic processed by the router.
       meaning: 'GraphQL operation name associated with the HTTP request',
       typicalValues: ['UsersQuery', 'IntrospectionQuery', 'UNKNOWN'],
       notes:
-        'Used by http.server.request.duration, http.server.request.body.size, and http.server.response.body.size'
+        'Used by http.server.request.duration, http.server.request.body.size, and http.server.response.body.size. High-cardinality risk: value is client-controlled and can explode without persisted operations.'
     },
     {
       name: 'graphql.operation.type',

From a7dc6de7cd0da593a8b7038d6ad153b0e21220a6 Mon Sep 17 00:00:00 2001
From: Kamil Kisiela <kamil.kisiela@gmail.com>
Date: Mon, 16 Feb 2026 16:09:13 +0100
Subject: [PATCH 14/14] Update metrics.mdx

---
 .../src/content/router/observability/metrics.mdx   | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/packages/web/docs/src/content/router/observability/metrics.mdx b/packages/web/docs/src/content/router/observability/metrics.mdx
index d31ed9c9d37..27877394cb5 100644
--- a/packages/web/docs/src/content/router/observability/metrics.mdx
+++ b/packages/web/docs/src/content/router/observability/metrics.mdx
@@ -148,14 +148,16 @@ troubleshooting.
 
 ### Cardinality considerations
 
-For production workloads, consider disabling `graphql.operation.name` label or even `graphql.operation.type` on high-volume metrics.
+For production workloads, consider disabling `graphql.operation.name` label or even
+`graphql.operation.type` on high-volume metrics.
 
 <Callout type="warning">
   `graphql.operation.name` can create very high-cardinality metrics.
 
-  Operation names come from client requests. Without persisted operations, clients can send many
-  distinct operation names (or random names), which can rapidly increase cardinality and cost in
-  Prometheus and OTLP backends.
+Operation names come from client requests. Without persisted operations, clients can send many
+distinct operation names (or random names), which can rapidly increase cardinality and cost in
+Prometheus and OTLP backends.
+
 </Callout>
 
 ```yaml filename="router.config.yaml"
@@ -179,7 +181,9 @@ telemetry:
 You can override behavior per metric under `telemetry.metrics.instrumentation.instruments`.
 
 <Callout type="tip">
-    Disable non-essential labels to control cost, since each additional label value increases active time-series cardinality - a primary billing and performance driver in platforms like Grafana and Datadog.
+  Disable non-essential labels to control cost, since each additional label value increases active
+  time-series cardinality - a primary billing and performance driver in platforms like Grafana and
+  Datadog.
 </Callout>
 
 For production guidance on label cardinality (especially `graphql.operation.name`), see