diff --git a/.github/headers/LICENSE b/.github/headers/LICENSE
index a3f12d28d..7760ae7c6 100644
--- a/.github/headers/LICENSE
+++ b/.github/headers/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
diff --git a/.gitignore b/.gitignore
index d19ccad4e..0c9fbd23a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,13 @@ code-quality-report.json
 go.work
 go.work.sum
 
+# Local tool binaries (managed by api/Makefile)
+api/bin/*
+
+# Server binary output
+bin/
+/device-api-server
+
 # ==============================================================================
 # IDE & Editor Configurations
 # ==============================================================================
@@ -48,3 +55,9 @@ go.work.sum
 # Emacs
 *~
 \#*\#
+
+
+# ==============================================================================
+# Git Worktrees
+# ==============================================================================
+.worktrees/
diff --git a/.versions.yaml b/.versions.yaml
index 122a33f86..15a409121 100644
--- a/.versions.yaml
+++ b/.versions.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -34,6 +34,6 @@ go_tools:
 
 # Protocol Buffers / gRPC
 protobuf:
-  protobuf: 'v33.0'
+  protobuf: 'v33.4'
   protoc_gen_go: 'v1.36.10'
   protoc_gen_go_grpc: 'v1.5.1'
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
index 9dbdcf56a..e7e16ac6f 100644
--- a/DEVELOPMENT.md
+++ b/DEVELOPMENT.md
@@ -1,18 +1,130 @@
-# Development Guide
+# NVIDIA Device API: Development Guide
+
+This guide covers the development setup and workflows for contributing to the NVIDIA Device API.
+
+## Module Structure
+
+This repository is a multi-module monorepo containing multiple Go modules:
+
+| Module | Path | Description |
+|--------|------|-------------|
+| `github.com/nvidia/nvsentinel` | `/` | Device API Server implementation |
+| `github.com/nvidia/nvsentinel/api` | `/api` | API definitions (protobuf and Go types) |
+| `github.com/nvidia/nvsentinel/client-go` | `/client-go` | Kubernetes-style gRPC clients |
+| `github.com/nvidia/nvsentinel/code-generator` | `/code-generator` | Code generation tools |
+
+The API module is designed to be imported independently by consumers who only need the type definitions.
+
+## Architecture
+
+This project bridges **gRPC** (for node-local performance) with **Kubernetes API Machinery** (for developer experience).
+
+1. **Definitions**: `api/proto` (Wire format) and `api/device` (Go types).
+2. **Conversion**: `api/device/${version}/converter.go` maps gRPC messages to K8s-style structs.
+3. **Generation**: A pipeline driven by `code-generator/kube_codegen.sh`, which utilizes a modified `client-gen` to produce gRPC-backed Kubernetes clients in the `client-go` module.
+
+---
+
+## Code Generation Pipeline
+
+The NVIDIA Device API uses a multi-stage pipeline to bridge gRPC with Kubernetes API machinery. For module-specific details, see the [client-go Development Guide](./client-go/DEVELOPMENT.md).
+
+```mermaid
+graph TD
+    API["API Definitions<br/>(nvidia/nvsentinel/api)"] -->|Input| CG(client-gen<br/>*Custom Build*)
+    API -->|Input| LG(lister-gen)
+
+    CG -->|Generates| CLIENT[client/versioned]
+    LG -->|Generates| LISTERS[listers/]
+
+    CLIENT & LISTERS -->|Input| IG(informer-gen)
+    IG -->|Generates| INFORMERS[informers/]
+
+    CLIENT & LISTERS & INFORMERS -->|Final Output| SDK[Ready-to-use SDK]
+```
+
+### Build Sequence
+
+When you run `make code-gen` from the root, the following sequence is executed:
+
+1. **Protoc**: Compiles `.proto` into Go gRPC stubs in `api/gen/`.
+2. **DeepCopy**: Generates `runtime.Object` methods required for K8s compatibility.
+3. **Goverter**: Generates type conversion logic between Protobuf and Go structs.
+4. **Custom client-gen**: Orchestrated by `code-generator/kube_codegen.sh` to produce the versioned Clientset, Informers, and Listers in `client-go/`.
+
+---
+
+## Development Workflow
+
+1. **Modify**: Edit the Protobuf definitions in `api/proto` or Go types in `api/device`.
+2. **Update**: Update the conversion logic in `api/device/${version}/converter.go` to handle changes, if necessary.
+3. **Generate**: Run `make code-gen` from the root. This updates the gRPC stubs, helper methods, and the `client-go` SDK.
+4. **Verify**: Run `make verify-codegen` to ensure the workspace is consistent.
+5. **Test**: Add tests to the affected module and run `make test` from the root.
+
+> [!NOTE] Use the fake clients in `client-go/client/versioned/fake` for testing controllers without a real gRPC server.
+
+---
+
+## Code Standards & Compliance
+
+### Commit Messages & Signing (DCO)
+
+We follow the [Conventional Commits](https://www.conventionalcommits.org) specification. Additionally, all commits **must** be signed off to comply with the Developer Certificate of Origin (DCO).
+
+```bash
+# Example: feat, fix, docs, chore, refactor
+git commit -s -m "feat: add new GPU condition type"
+```
+
+### License Headers
+
+Every source file (.go, .proto, .sh, Makefile) must include the Apache 2.0 license header.
+
+- **Go/Proto Template**: See `api/hack/boilerplate.go.txt`.
+- **Year**: Ensure the copyright year is current.
 
 ---
 
-## Code Generation
+## Troubleshooting
 
-This project relies heavily on generated code to ensure consistency with the Kubernetes API machinery.
+### Tooling Not Found
+
+We use `.versions.yaml` to pin tool versions. Our Makefile attempts to use tools from your system path or download them to your Go bin directory.
+
+- **Verify Installation**: `which protoc` or `which yq`.
+- **Fix**: Ensure your `GOPATH/bin` is in your system `$PATH`:
+  ```bash
+  export PATH=$PATH:$(go env GOPATH)/bin
+  ```
+
+### Generated Code Out of Sync
+
+If the build fails or `make verify-codegen` returns an error, your generated artifacts are likely stale.
+
+```bash
+# Clean all generated files across the monorepo
+make clean
+
+# Re-run the full pipeline
+make code-gen
+```
+
+### Dependency Issues
+
+If you see "module not found" or checksum errors:
+
+```bash
+# Tidy all modules
+make tidy
+```
+
+---
 
-### Generation Pipeline
-The `make code-gen` command orchestrates several tools:
+## Getting Help
 
-1. **Protoc**: Generates gRPC Go bindings from `api/proto`.
-2. **Goverter**: Generates type-safe conversion logic between internal gRPC types and the Kubernetes-style API types defined in `api/device/`.
-3. **K8s Code-Gen**:
-  - Generates `DeepCopy` methods for API types to support standard Kubernetes object manipulation.
-  - Generates a versioned, typed **clientset**, along with **listers** and **informers**, providing a native `client-go` experience for consumers.
+- **Issues**: [Create an issue](https://github.com/NVIDIA/device-api/issues/new)
+- **Questions**: [Start a discussion](https://github.com/NVIDIA/device-api/discussions)
+- **Security**: Please refer to [SECURITY](SECURITY.md) for reporting vulnerabilities.
 
 ---
diff --git a/Makefile b/Makefile
index 79e7c5567..1dba8bcf7 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -21,14 +21,28 @@
 SHELL = /usr/bin/env bash -o pipefail
 .SHELLFLAGS = -ec
 
-VERSION_PKG = github.com/nvidia/nvsentinel/pkg/util/version
-GIT_VERSION := $(shell git describe --tags --always --dirty)
-GIT_COMMIT  := $(shell git rev-parse HEAD)
-BUILD_DATE  := $(shell date -u +'%Y-%m-%dT%H:%M:%SZ')
-
-LDFLAGS := -X $(VERSION_PKG).GitVersion=$(GIT_VERSION) \
-           -X $(VERSION_PKG).GitCommit=$(GIT_COMMIT) \
-           -X $(VERSION_PKG).BuildDate=$(BUILD_DATE)
+# Go build settings
+GOOS ?= $(shell go env GOOS)
+GOARCH ?= $(shell go env GOARCH)
+VERSION ?= $(shell git describe --tags --always --dirty 2>/dev/null || echo "dev")
+GIT_COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo "unknown")
+GIT_TREE_STATE ?= $(shell if git diff --quiet 2>/dev/null; then echo "clean"; else echo "dirty"; fi)
+BUILD_DATE ?= $(shell date -u +"%Y-%m-%dT%H:%M:%SZ")
+
+# Version package path for ldflags
+VERSION_PKG = github.com/nvidia/nvsentinel/pkg/version
+
+# Container settings
+CONTAINER_RUNTIME ?= docker
+IMAGE_REGISTRY ?= ghcr.io/nvidia/nvsentinel
+DOCKERFILE := deployments/container/Dockerfile
+
+# Linker flags
+LDFLAGS = -s -w \
+	-X $(VERSION_PKG).Version=$(VERSION) \
+	-X $(VERSION_PKG).GitCommit=$(GIT_COMMIT) \
+	-X $(VERSION_PKG).GitTreeState=$(GIT_TREE_STATE) \
+	-X $(VERSION_PKG).BuildDate=$(BUILD_DATE)
 
 # ==============================================================================
 # Targets
@@ -59,34 +73,134 @@ verify-codegen: code-gen ## Verify generated code is up-to-date.
 		exit 1; \
 	fi
 
-.PHONY: tidy
-tidy: ## Run go mod tidy
-	go mod tidy
-
-##@ Build & Test
+##@ Build
 
 .PHONY: build
-build: ## Build the device-apiserver binary.
-	go build -ldflags "$(LDFLAGS)" -o bin/device-apiserver ./cmd/device-apiserver
+build: build-modules build-server ## Build all modules and server.
+
+.PHONY: build-modules
+build-modules: ## Build all modules.
+	@for mod in $(MODULES); do \
+		if [ -f $$mod/Makefile ]; then \
+			$(MAKE) -C $$mod build; \
+		fi \
+	done
+
+.PHONY: build-server
+build-server: ## Build the Device API Server
+	@echo "Building device-api-server..."
+	@mkdir -p bin
+	CGO_ENABLED=0 GOOS=$(GOOS) GOARCH=$(GOARCH) go build \
+		-ldflags "$(LDFLAGS)" \
+		-o bin/device-api-server \
+		./cmd/device-api-server
+	@echo "Built bin/device-api-server"
+
+.PHONY: build-nvml-provider
+build-nvml-provider: ## Build the NVML Provider sidecar (requires CGO)
+	@echo "Building nvml-provider..."
+	@mkdir -p bin
+	CGO_ENABLED=1 GOOS=$(GOOS) GOARCH=$(GOARCH) go build \
+		-tags=nvml \
+		-ldflags "$(LDFLAGS)" \
+		-o bin/nvml-provider \
+		./cmd/nvml-provider
+	@echo "Built bin/nvml-provider"
+
+##@ Testing
 
 .PHONY: test
-test: ## Run unit tests.
-	GOTOOLCHAIN=go1.25.5+auto go test -v $$(go list ./... | grep -vE '/pkg/client-go/(client|informers|listers)|/internal/generated/|/test/integration/|/examples/') -cover cover.out
+test: test-modules test-server ## Run tests in all modules.
+
+.PHONY: test-modules
+test-modules: ## Run tests in all modules.
+	@for mod in $(MODULES); do \
+		if [ -f $$mod/Makefile ]; then \
+			$(MAKE) -C $$mod test; \
+		fi \
+	done
+
+.PHONY: test-server
+test-server: ## Run server tests only
+	go test -race -v ./pkg/...
 
 .PHONY: test-integration
-test-integration: ## Run integration tests.
+test-integration: ## Run integration tests
 	go test -v ./test/integration/...
 
+##@ Linting
+
 .PHONY: lint
-lint: ## Run golangci-lint.
-	golangci-lint run ./...
+lint: ## Run linting on all modules.
+	@for mod in $(MODULES); do \
+		if [ -f $$mod/Makefile ]; then \
+			$(MAKE) -C $$mod lint; \
+		fi \
+	done
+	go vet ./...
+
+##@ Container Images
+
+.PHONY: docker-build
+docker-build: docker-build-server docker-build-nvml-provider ## Build all container images
+
+.PHONY: docker-build-server
+docker-build-server: ## Build device-api-server container image
+	$(CONTAINER_RUNTIME) build \
+		--target device-api-server \
+		--build-arg VERSION=$(VERSION) \
+		--build-arg GIT_COMMIT=$(GIT_COMMIT) \
+		--build-arg GIT_TREE_STATE=$(GIT_TREE_STATE) \
+		--build-arg BUILD_DATE=$(BUILD_DATE) \
+		-t $(IMAGE_REGISTRY)/device-api-server:$(VERSION) \
+		-f $(DOCKERFILE) .
+
+.PHONY: docker-build-nvml-provider
+docker-build-nvml-provider: ## Build nvml-provider container image
+	$(CONTAINER_RUNTIME) build \
+		--target nvml-provider \
+		--build-arg VERSION=$(VERSION) \
+		--build-arg GIT_COMMIT=$(GIT_COMMIT) \
+		--build-arg GIT_TREE_STATE=$(GIT_TREE_STATE) \
+		--build-arg BUILD_DATE=$(BUILD_DATE) \
+		-t $(IMAGE_REGISTRY)/nvml-provider:$(VERSION) \
+		-f $(DOCKERFILE) .
+
+.PHONY: docker-push
+docker-push: ## Push all container images
+	$(CONTAINER_RUNTIME) push $(IMAGE_REGISTRY)/device-api-server:$(VERSION)
+	$(CONTAINER_RUNTIME) push $(IMAGE_REGISTRY)/nvml-provider:$(VERSION)
+
+##@ Helm
+
+.PHONY: helm-lint
+helm-lint: ## Lint Helm chart
+	helm lint deployments/helm/device-api-server
+
+.PHONY: helm-template
+helm-template: ## Render Helm chart templates
+	helm template device-api-server deployments/helm/device-api-server
+
+.PHONY: helm-package
+helm-package: ## Package Helm chart
+	@mkdir -p dist/
+	helm package deployments/helm/device-api-server -d dist/
+
+##@ Cleanup
 
 .PHONY: clean
-clean: ## Remove generated artifacts.
-	@echo "Cleaning generated artifacts..."
+clean: ## Clean generated artifacts in all modules.
+	@for mod in $(MODULES); do \
+		if [ -f $$mod/Makefile ]; then \
+			$(MAKE) -C $$mod clean; \
+		fi \
+	done
 	rm -rf bin/
-	rm -rf internal/generated/
-	rm -rf pkg/client-go/client/ pkg/client-go/informers/ pkg/client-go/listers/
-	find api/ -name "zz_generated.deepcopy.go" -delete
-	find api/ -name "zz_generated.goverter.go" -delete
-	rm -f cover.out
+
+.PHONY: tidy
+tidy: ## Run go mod tidy on all modules.
+	@for mod in $(MODULES); do \
+		echo "Tidying $$mod..."; \
+		(cd $$mod && go mod tidy); \
+	done
+	go mod tidy
diff --git a/README.md b/README.md
index b7bbfc818..fcaf95767 100644
--- a/README.md
+++ b/README.md
@@ -1,56 +1,169 @@
 # NVIDIA Device API
 
-**The NVIDIA Device API allows you to query and manipulate the state of node-local resources (such as GPUs) in Kubernetes**. Unlike the cluster-wide Kubernetes API, the Device API operates exclusively at the node level.
+The NVIDIA Device API provides a Kubernetes-idiomatic Go SDK and Protobuf definitions for interacting with NVIDIA device resources.
 
-The core control plane is the Device API server and the gRPC API that it exposes. Node-level agents, local monitoring tools, and external components communicate with one another through this node-local Device API server rather than the central Kubernetes control plane.
+**Node-local GPU device state management for Kubernetes**
 
-NVIDIA provides a [client library](./pkg/client-go) for those looking to write applications using the Device API. This library allows you to query and manipulate node-local resources using standard Kubernetes interfaces. Alternatively, the API can be accessed directly via gRPC.
+The NVIDIA Device API provides a standardized gRPC interface for observing and managing GPU device states in Kubernetes environments. It enables coordination between:
+
+- **Providers** (health monitors like NVSentinel, DCGM) that detect GPU health issues
+- **Consumers** (device plugins, DRA drivers) that need GPU health status for scheduling
+
+## Overview
+
+The Device API Server is a pure Go gRPC server with no hardware dependencies.
+GPU enumeration and health monitoring is provided by external providers (sidecars).
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                        GPU Node                              │
+│                                                              │
+│  ┌─────────────────────────────────────────────────────────┐│
+│  │              Device API Server (DaemonSet)              ││
+│  │                                                         ││
+│  │  ┌─────────────────────────────────────────────────┐   ││
+│  │  │               GpuService (unified)              │   ││
+│  │  │  Read:  GetGpu, ListGpus, WatchGpus             │   ││
+│  │  │  Write: CreateGpu, UpdateGpuStatus, DeleteGpu   │   ││
+│  │  └────────────────────┬────────────────────────────┘   ││
+│  │                       ▼                                 ││
+│  │  ┌──────────────────────────────────────────────────┐  ││
+│  │  │                 GPU Cache (RWMutex)               │  ││
+│  │  └──────────────────────────────────────────────────┘  ││
+│  └─────────────────────────────────────────────────────────┘│
+│                                                              │
+│  Providers (gRPC clients):                                   │
+│  ├── nvml-provider sidecar ─► CreateGpu, UpdateGpuStatus    │
+│  ├── NVSentinel ────────────► CreateGpu, UpdateGpuStatus    │
+│  └── Custom providers ──────► CreateGpu, UpdateGpuStatus    │
+│                                                              │
+│  Consumers (gRPC clients):                                   │
+│  ├── Device Plugins ────────► GetGpu, ListGpus, WatchGpus   │
+│  └── DRA Drivers ───────────► GetGpu, ListGpus, WatchGpus   │
+└─────────────────────────────────────────────────────────────┘
+```
+
+## Key Features
+
+- **Pure Go server**: No hardware dependencies; providers run as separate sidecars
+- **Read-blocking semantics**: Consumer reads block during provider updates to prevent stale data
+- **Multiple provider support**: Aggregate health status from NVSentinel, DCGM, or custom providers
+- **Watch streams**: Real-time GPU state change notifications
+- **Prometheus metrics**: Full observability with alerting rules
+- **Helm chart**: Production-ready Kubernetes deployment
+
+## Repository Structure
+
+| Module | Description |
+| :--- | :--- |
+| [`api/`](./api) | Protobuf definitions and Go types for the Device API. |
+| [`client-go/`](./client-go) | Kubernetes-style generated clients, informers, and listers. |
+| [`code-generator/`](./code-generator) | Tools for generating NVIDIA-specific client logic. |
+| [`cmd/device-api-server/`](./cmd/device-api-server) | Device API Server binary |
+| [`pkg/deviceapiserver/`](./pkg/deviceapiserver) | Server implementation |
+| [`charts/`](./charts) | Helm chart for Kubernetes deployment |
 
 ---
 
 ## Quick Start
 
+### Deploy Device API Server
+
+```bash
+# Install with Helm
+helm install device-api-server ./deployments/helm/device-api-server \
+  --namespace device-api --create-namespace
+```
+
+For GPU enumeration and health monitoring, deploy the nvml-provider sidecar.
+See the [nvml-sidecar demo](demos/nvml-sidecar-demo.sh) for an example deployment.
+
+### Using the Go Client
+
+```bash
+go get github.com/nvidia/device-api/api@latest
+```
+
 ```go
 import (
-    metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-    "github.com/nvidia/nvsentinel/pkg/client-go/clientset/versioned"
-	"github.com/nvidia/nvsentinel/pkg/grpc/client"
+    v1alpha1 "github.com/nvidia/device-api/api/gen/go/device/v1alpha1"
+)
+```
+
+### Example: List GPUs
+
+```go
+package main
+
+import (
+    "context"
+    "log"
+
+    v1alpha1 "github.com/nvidia/device-api/api/gen/go/device/v1alpha1"
+    "google.golang.org/grpc"
+    "google.golang.org/grpc/credentials/insecure"
 )
 
 func main() {
-    ctx := context.Background()
+    // Connect via Unix socket (recommended for node-local access)
+    conn, err := grpc.NewClient(
+        "unix:///var/run/device-api/device.sock",
+        grpc.WithTransportCredentials(insecure.NewCredentials()),
+    )
+    if err != nil {
+        log.Fatalf("failed to connect: %v", err)
+    }
+    defer conn.Close()
 
-    // Connect to the local node's Device API server
-    config := &client.Config{Target: "unix:///var/run/nvidia-device-api/device-api.sock"}
-    clientset := versioned.NewForConfigOrDie(config)
+    client := v1alpha1.NewGpuServiceClient(conn)
 
-    // Standard Kubernetes-style List call
-    gpus, err := clientset.DeviceV1alpha1().GPUs().List(ctx, metav1.ListOptions{})
+    // List all GPUs
+    resp, err := client.ListGpus(context.Background(), &v1alpha1.ListGpusRequest{})
     if err != nil {
-        panic(err)
+        log.Fatalf("failed to list GPUs: %v", err)
+    }
+
+    for _, gpu := range resp.GpuList.Items {
+        log.Printf("GPU: %s (UUID: %s)", gpu.Name, gpu.Spec.Uuid)
+        for _, cond := range gpu.Status.Conditions {
+            log.Printf("  %s: %s (%s)", cond.Type, cond.Status, cond.Reason)
+        }
     }
 }
 ```
 
-See [examples](./examples) for additional details.
+### Using grpcurl
 
----
+```bash
+# List GPUs
+grpcurl -plaintext localhost:50051 nvidia.device.v1alpha1.GpuService/ListGpus
+
+# Watch for changes
+grpcurl -plaintext localhost:50051 nvidia.device.v1alpha1.GpuService/WatchGpus
+```
 
-## Components
+## API Overview
 
-### Device API Server
-The `device-apiserver` is a node-local control plane for NVIDIA devices.
+### GpuService
 
-**Running the server**:
-```bash
-# Build the binary
-make build
+The unified `GpuService` follows Kubernetes API conventions with standard CRUD methods:
 
-# Start the server with a local database
-./bin/device-apiserver \
-    --bind-address="unix:///var/run/nvidia-device-api/device-api.sock" \
-    --datastore-endpoint="sqlite:///var/lib/nvidia-device-api/state.db"
-```
+**Read Operations** (for consumers like device plugins and DRA drivers):
+
+| Method | Description |
+|--------|-------------|
+| `GetGpu` | Retrieves a single GPU resource by its unique name |
+| `ListGpus` | Retrieves a list of all GPU resources |
+| `WatchGpus` | Streams lifecycle events (ADDED, MODIFIED, DELETED) for GPU resources |
+
+**Write Operations** (for providers like health monitors):
+
+| Method | Description |
+|--------|-------------|
+| `CreateGpu` | Register a new GPU with the server |
+| `UpdateGpu` | Replace entire GPU resource |
+| `UpdateGpuStatus` | Update GPU status only (acquires write lock) |
+| `DeleteGpu` | Remove a GPU from the server |
 
 ---
 
@@ -58,29 +171,60 @@ make build
 
 ### Prerequisites
 
-* **Go**: `v1.25+`
-* **Protoc**: Required for protobuf generation.
-* **Make**
+- **Go**: `v1.25+`
+- **Protoc**: Required for protobuf generation
+- **golangci-lint**: Required for code quality checks
+- **Make**: Used for orchestrating build and generation tasks
+- **Helm 3.0+**: For chart development
 
-### Workflow
-The project utilizes a unified generation pipeline. **Avoid editing generated files directly**. If Protobuf definitions (`.proto`) or Go types (`_types.go`) are modified, run the following commands to synchronize the repository:
+### Build
 
 ```bash
-# Sync all gRPC bindings, DeepCopy/Conversion methods, Clients, and Server
+# Build everything
+make build
+
+# Build server only
+make build-server
+
+# Generate protobuf code
 make code-gen
+```
 
-# Run tests
+### Test
+
+```bash
+# Run all tests
 make test
 
-# Verify code quality
-make lint
+# Run server tests only
+make test-server
+```
 
-# Optional: Run integration tests
-make test-integration
+### Lint
+
+```bash
+make lint
 ```
 
 ---
 
+## Documentation
+
+- **[API Reference](docs/api/device-api-server.md)** - Complete gRPC API documentation
+- **[Operations Guide](docs/operations/device-api-server.md)** - Deployment, configuration, monitoring
+- **[Helm Chart](deployments/helm/device-api-server/README.md)** - Chart configuration reference
+- **[Design Documents](docs/design/)** - Architecture and design decisions
+
+The `client-go` module includes several examples for how to use the generated clients:
+
+* **Standard Client**: Basic CRUD operations.
+* **Shared Informers**: High-performance caching for controllers.
+* **Watch**: Real-time event streaming via gRPC.
+
+See the [examples](./client-go/examples) directory for details.
+
+---
+
 ## Contributing
 
 We welcome contributions! Please see:
diff --git a/api/device/v1alpha1/converter.go b/api/device/v1alpha1/converter.go
index ff649f992..14b11b5e0 100644
--- a/api/device/v1alpha1/converter.go
+++ b/api/device/v1alpha1/converter.go
@@ -1,4 +1,4 @@
-// Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -54,6 +54,17 @@ type Converter interface {
 
 	// FromProtobufObjectMeta converts a protobuf ObjectMeta into a metav1.ObjectMeta object.
 	//
+	// The following fields are intentionally excluded from the proto API:
+	// - DeletionTimestamp/GracePeriodSeconds: Managed by server-side deletion logic
+	// - Labels/Annotations: Not needed for device-level proto API; K8s controllers
+	//   should use the native K8s API for label/annotation management
+	// - OwnerReferences/Finalizers: Not exposed in proto to prevent external
+	//   controllers from creating dependency chains via the device API
+	// - ManagedFields/SelfLink: Server-managed metadata, not user-facing
+	//
+	// If labels/annotations support is needed in the future, add them to the
+	// proto ObjectMeta definition and remove the goverter:ignore directives.
+	//
 	// goverter:map Uid UID
 	// goverter:ignore GenerateName DeletionTimestamp DeletionGracePeriodSeconds
 	// goverter:ignore Labels Annotations OwnerReferences Finalizers ManagedFields SelfLink
diff --git a/api/device/v1alpha1/gpu_types.go b/api/device/v1alpha1/gpu_types.go
index e551b85a9..704bea40e 100644
--- a/api/device/v1alpha1/gpu_types.go
+++ b/api/device/v1alpha1/gpu_types.go
@@ -1,4 +1,4 @@
-// Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -48,8 +48,7 @@ type GPUStatus struct {
 //
 // +genclient
 // +genclient:nonNamespaced
-// +genclient:onlyVerbs=get,list,watch,create,update,delete
-// +genclient:noStatus
+// +genclient:onlyVerbs=get,list,watch,create,update,updateStatus,delete
 // +k8s:deepcopy-gen=true
 // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
 type GPU struct {
diff --git a/api/device/v1alpha1/zz_generated.deepcopy.go b/api/device/v1alpha1/zz_generated.deepcopy.go
index 0c399eb3e..f5cf44cb4 100644
--- a/api/device/v1alpha1/zz_generated.deepcopy.go
+++ b/api/device/v1alpha1/zz_generated.deepcopy.go
@@ -1,7 +1,7 @@
 //go:build !ignore_autogenerated
 // +build !ignore_autogenerated
 
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/api/proto/device/v1alpha1/gpu.proto b/api/proto/device/v1alpha1/gpu.proto
index 2641c415e..88577a9c6 100644
--- a/api/proto/device/v1alpha1/gpu.proto
+++ b/api/proto/device/v1alpha1/gpu.proto
@@ -1,4 +1,4 @@
-// Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -194,6 +194,9 @@ service GpuService {
   // UpdateGpu updates a single GPU resource.
   rpc UpdateGpu(UpdateGpuRequest) returns (Gpu);
 
+  // UpdateGpuStatus updates only the status subresource of a GPU.
+  rpc UpdateGpuStatus(UpdateGpuStatusRequest) returns (Gpu);
+
   // DeleteGpu deletes a single GPU resource.
   rpc DeleteGpu(DeleteGpuRequest) returns (google.protobuf.Empty);
 }
@@ -289,6 +292,18 @@ message UpdateGpuRequest {
   UpdateOptions opts = 2;
 }
 
+// UpdateGpuStatusRequest specifies the GPU whose status should be updated.
+// Only metadata (name, namespace, resource_version) and status fields are used.
+message UpdateGpuStatusRequest {
+  // gpu is the GPU resource with updated status.
+  // The server reads metadata.name, metadata.namespace, metadata.resource_version
+  // and status from this object. All other fields are ignored.
+  Gpu gpu = 1;
+
+  // opts contains the options for the update.
+  UpdateOptions opts = 2;
+}
+
 message DeleteGpuRequest {
   // The unique resource name of the GPU to delete.
   string name = 1;
diff --git a/cmd/device-api-server/main.go b/cmd/device-api-server/main.go
new file mode 100644
index 000000000..91f61b039
--- /dev/null
+++ b/cmd/device-api-server/main.go
@@ -0,0 +1,186 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package main implements the Device API Server.
+//
+// The Device API Server is a node-local gRPC cache server deployed as a
+// Kubernetes DaemonSet. It acts as an intermediary between providers
+// (health monitors) that update GPU device states and consumers
+// (device plugins, DRA drivers) that read device states.
+//
+// Key features:
+//   - Read-blocking semantics: Reads are blocked during provider updates
+//     to prevent consumers from reading stale data
+//   - Multiple provider support: Multiple health monitors can update
+//     different conditions on the same GPUs
+//   - Multiple consumer support: Device plugins, DRA drivers, and other
+//     consumers can read and watch GPU states
+//   - Observability: Prometheus metrics, structured logging with klog/v2
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/signal"
+	"syscall"
+
+	"github.com/spf13/pflag"
+	"golang.org/x/sync/errgroup"
+	cliflag "k8s.io/component-base/cli/flag"
+	"k8s.io/klog/v2"
+
+	"github.com/nvidia/nvsentinel/pkg/controlplane/apiserver"
+	"github.com/nvidia/nvsentinel/pkg/controlplane/apiserver/options"
+	"github.com/nvidia/nvsentinel/pkg/storage/storagebackend"
+	"github.com/nvidia/nvsentinel/pkg/version"
+
+	// Import service providers so their init() functions register them.
+	_ "github.com/nvidia/nvsentinel/pkg/services/device/v1alpha1"
+)
+
+const (
+	// ComponentName is the name of this component for logging.
+	ComponentName = "device-api-server"
+)
+
+func main() {
+	opts := options.NewOptions()
+
+	fss := cliflag.NamedFlagSets{}
+	opts.AddFlags(&fss)
+
+	// Add a version flag to the global flag set.
+	showVersion := pflag.Bool("version", false, "Show version and exit")
+
+	// Merge all named flag sets into the global pflag command line.
+	for _, fs := range fss.FlagSets {
+		pflag.CommandLine.AddFlagSet(fs)
+	}
+
+	pflag.Parse()
+
+	// Handle version flag before any other initialization.
+	if *showVersion {
+		v := version.Get()
+		enc := json.NewEncoder(os.Stdout)
+		enc.SetIndent("", "  ")
+		if err := enc.Encode(v); err != nil {
+			fmt.Fprintf(os.Stderr, "Failed to encode version: %v\n", err)
+			os.Exit(1)
+		}
+		os.Exit(0)
+	}
+
+	// Set up signal handling for graceful shutdown.
+	ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
+	defer cancel()
+
+	// Complete fills in defaults and resolves environment overrides.
+	completedOpts, err := opts.Complete(ctx)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "Failed to complete options: %v\n", err)
+		os.Exit(1)
+	}
+
+	// Validate rejects invalid flag combinations.
+	if errs := completedOpts.Validate(); len(errs) > 0 {
+		for _, e := range errs {
+			fmt.Fprintf(os.Stderr, "Invalid configuration: %v\n", e)
+		}
+		os.Exit(1)
+	}
+
+	// Create root logger with component name.
+	logger := klog.Background().WithName(ComponentName)
+	ctx = klog.NewContext(ctx, logger)
+
+	versionInfo := version.Get()
+	logger.Info("Starting server",
+		"version", versionInfo.Version,
+		"commit", versionInfo.GitCommit,
+		"buildDate", versionInfo.BuildDate,
+	)
+
+	// Build the apiserver configuration from completed options.
+	apiserverConfig, err := apiserver.NewConfig(ctx, completedOpts)
+	if err != nil {
+		logger.Error(err, "Failed to create apiserver config")
+		os.Exit(1)
+	}
+
+	completedAPIServerConfig, err := apiserverConfig.Complete()
+	if err != nil {
+		logger.Error(err, "Failed to complete apiserver config")
+		os.Exit(1)
+	}
+
+	// Build the storage backend configuration from completed options.
+	storageConfig, err := storagebackend.NewConfig(ctx, completedOpts.Storage)
+	if err != nil {
+		logger.Error(err, "Failed to create storage config")
+		os.Exit(1)
+	}
+
+	completedStorageConfig, err := storageConfig.Complete()
+	if err != nil {
+		logger.Error(err, "Failed to complete storage config")
+		os.Exit(1)
+	}
+
+	storage, err := completedStorageConfig.New()
+	if err != nil {
+		logger.Error(err, "Failed to create storage backend")
+		os.Exit(1)
+	}
+
+	preparedStorage, err := storage.PrepareRun(ctx)
+	if err != nil {
+		logger.Error(err, "Failed to prepare storage backend")
+		os.Exit(1)
+	}
+
+	// Create, prepare the device API server before starting the run loop.
+	server, err := completedAPIServerConfig.New(storage)
+	if err != nil {
+		logger.Error(err, "Failed to create device API server")
+		os.Exit(1)
+	}
+
+	prepared, err := server.PrepareRun(ctx)
+	if err != nil {
+		logger.Error(err, "Failed to prepare device API server")
+		os.Exit(1)
+	}
+
+	// Run storage and server concurrently. If either fails, the errgroup
+	// cancels the shared context so the other component shuts down.
+	g, gctx := errgroup.WithContext(ctx)
+
+	g.Go(func() error {
+		return preparedStorage.Run(gctx)
+	})
+
+	g.Go(func() error {
+		return prepared.Run(gctx)
+	})
+
+	if err := g.Wait(); err != nil {
+		logger.Error(err, "Server error")
+		os.Exit(1)
+	}
+
+	logger.Info("Server stopped gracefully")
+}
diff --git a/cmd/device-apiserver/apiserver.go b/cmd/device-apiserver/apiserver.go
deleted file mode 100644
index 3d2f8352a..000000000
--- a/cmd/device-apiserver/apiserver.go
+++ /dev/null
@@ -1,29 +0,0 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-//
-//  Licensed under the Apache License, Version 2.0 (the "License");
-//  you may not use this file except in compliance with the License.
-//  You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-//  Unless required by applicable law or agreed to in writing, software
-//  distributed under the License is distributed on an "AS IS" BASIS,
-//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-//  See the License for the specific language governing permissions and
-//  limitations under the License.
-
-package main
-
-import (
-	"os"
-
-	"k8s.io/component-base/cli"
-
-	"github.com/nvidia/nvsentinel/cmd/device-apiserver/app"
-)
-
-func main() {
-	command := app.NewAPIServerCommand()
-	code := cli.Run(command)
-	os.Exit(code)
-}
diff --git a/cmd/device-apiserver/app/config.go b/cmd/device-apiserver/app/config.go
deleted file mode 100644
index 520b4c0c2..000000000
--- a/cmd/device-apiserver/app/config.go
+++ /dev/null
@@ -1,86 +0,0 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-//
-//  Licensed under the Apache License, Version 2.0 (the "License");
-//  you may not use this file except in compliance with the License.
-//  You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-//  Unless required by applicable law or agreed to in writing, software
-//  distributed under the License is distributed on an "AS IS" BASIS,
-//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-//  See the License for the specific language governing permissions and
-//  limitations under the License.
-
-package app
-
-import (
-	"context"
-
-	"github.com/nvidia/nvsentinel/cmd/device-apiserver/app/options"
-	controlplane "github.com/nvidia/nvsentinel/pkg/controlplane/apiserver"
-	"github.com/nvidia/nvsentinel/pkg/storage/storagebackend"
-)
-
-type Config struct {
-	Options options.CompletedOptions
-
-	Storage *storagebackend.Config
-	APIs    *controlplane.Config
-}
-
-type completedConfig struct {
-	Options options.CompletedOptions
-
-	Storage storagebackend.CompletedConfig
-	APIs    controlplane.CompletedConfig
-}
-
-type CompletedConfig struct {
-	*completedConfig
-}
-
-func NewConfig(ctx context.Context, opts options.CompletedOptions) (*Config, error) {
-	c := &Config{
-		Options: opts,
-	}
-
-	storageConfig, err := storagebackend.NewConfig(ctx, opts.Storage)
-	if err != nil {
-		return nil, err
-	}
-
-	c.Storage = storageConfig
-
-	controlPlaneConfig, err := controlplane.NewConfig(ctx, opts.CompletedOptions)
-	if err != nil {
-		return nil, err
-	}
-
-	c.APIs = controlPlaneConfig
-
-	return c, nil
-}
-
-func (c *Config) Complete() (CompletedConfig, error) {
-	if c == nil || c.Storage == nil || c.APIs == nil {
-		return CompletedConfig{}, nil
-	}
-
-	completedStorage, err := c.Storage.Complete()
-	if err != nil {
-		return CompletedConfig{}, err
-	}
-
-	completedAPIs, err := c.APIs.Complete()
-	if err != nil {
-		return CompletedConfig{}, err
-	}
-
-	return CompletedConfig{&completedConfig{
-		Options: c.Options,
-
-		Storage: completedStorage,
-		APIs:    completedAPIs,
-	}}, nil
-}
diff --git a/cmd/device-apiserver/app/config_test.go b/cmd/device-apiserver/app/config_test.go
deleted file mode 100644
index a02d0ec64..000000000
--- a/cmd/device-apiserver/app/config_test.go
+++ /dev/null
@@ -1,76 +0,0 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-//
-//  Licensed under the Apache License, Version 2.0 (the "License");
-//  you may not use this file except in compliance with the License.
-//  You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-//  Unless required by applicable law or agreed to in writing, software
-//  distributed under the License is distributed on an "AS IS" BASIS,
-//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-//  See the License for the specific language governing permissions and
-//  limitations under the License.
-
-package app
-
-import (
-	"context"
-	"testing"
-
-	"github.com/nvidia/nvsentinel/cmd/device-apiserver/app/options"
-)
-
-func TestConfig(t *testing.T) {
-	ctx, cancel := context.WithCancel(context.Background())
-	defer cancel()
-
-	opts := options.NewServerRunOptions()
-
-	completedOpts, err := opts.Complete(ctx)
-	if err != nil {
-		t.Fatalf("Failed to complete options: %v", err)
-	}
-
-	cfg, err := NewConfig(ctx, completedOpts)
-	if err != nil {
-		t.Fatalf("NewConfig failed: %v", err)
-	}
-
-	if cfg.Storage == nil {
-		t.Error("NewConfig did not initialize Storage config")
-	}
-	if cfg.APIs == nil {
-		t.Error("NewConfig did not initialize APIs config")
-	}
-
-	t.Run("Complete", func(t *testing.T) {
-		completedCfg, err := cfg.Complete()
-		if err != nil {
-			t.Fatalf("Complete failed: %v", err)
-		}
-
-		if completedCfg.completedConfig == nil {
-			t.Fatal("CompletedConfig internal pointer is nil")
-		}
-
-		validationErrors := completedCfg.Options.Validate()
-		if len(validationErrors) > 0 {
-			t.Errorf("CompletedConfig is invalid: %v", validationErrors)
-		}
-	})
-
-	t.Run("NilSafety", func(t *testing.T) {
-		var nilCfg *Config
-		_, err := nilCfg.Complete()
-		if err != nil {
-			t.Errorf("Complete() on nil config should not return error, got: %v", err)
-		}
-
-		partialCfg := &Config{}
-		_, err = partialCfg.Complete()
-		if err != nil {
-			t.Errorf("Complete() on empty config should handle nil sub-fields gracefully, got: %v", err)
-		}
-	})
-}
diff --git a/cmd/device-apiserver/app/main_test.go b/cmd/device-apiserver/app/main_test.go
deleted file mode 100644
index b1f6de7de..000000000
--- a/cmd/device-apiserver/app/main_test.go
+++ /dev/null
@@ -1,11 +0,0 @@
-package app
-
-import (
-	"testing"
-
-	"github.com/nvidia/nvsentinel/pkg/util/testutils"
-)
-
-func TestMain(m *testing.M) {
-	testutils.VerifyTestMain(m)
-}
diff --git a/cmd/device-apiserver/app/options/options.go b/cmd/device-apiserver/app/options/options.go
deleted file mode 100644
index 498edc89f..000000000
--- a/cmd/device-apiserver/app/options/options.go
+++ /dev/null
@@ -1,76 +0,0 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-//
-//  Licensed under the Apache License, Version 2.0 (the "License");
-//  you may not use this file except in compliance with the License.
-//  You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-//  Unless required by applicable law or agreed to in writing, software
-//  distributed under the License is distributed on an "AS IS" BASIS,
-//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-//  See the License for the specific language governing permissions and
-//  limitations under the License.
-
-package options
-
-import (
-	"context"
-
-	cp "github.com/nvidia/nvsentinel/pkg/controlplane/apiserver/options"
-	cliflag "k8s.io/component-base/cli/flag"
-)
-
-type ServerRunOptions struct {
-	*cp.Options
-}
-
-type completedOptions struct {
-	cp.CompletedOptions
-}
-
-type CompletedOptions struct {
-	*completedOptions
-}
-
-func NewServerRunOptions() *ServerRunOptions {
-	return &ServerRunOptions{
-		Options: cp.NewOptions(),
-	}
-}
-
-func (s *ServerRunOptions) Flags() cliflag.NamedFlagSets {
-	fss := cliflag.NamedFlagSets{}
-	if s == nil || s.Options == nil {
-		return fss
-	}
-
-	s.AddFlags(&fss)
-
-	return fss
-}
-
-func (o *ServerRunOptions) Complete(ctx context.Context) (CompletedOptions, error) {
-	if o == nil {
-		return CompletedOptions{completedOptions: &completedOptions{}}, nil
-	}
-
-	controlplane, err := o.Options.Complete(ctx)
-	if err != nil {
-		return CompletedOptions{}, err
-	}
-
-	completed := completedOptions{
-		CompletedOptions: controlplane,
-	}
-
-	return CompletedOptions{
-		completedOptions: &completed,
-	}, nil
-}
-
-func (o completedOptions) Validate() []error {
-	errs := o.CompletedOptions.Validate()
-
-	return errs
-}
diff --git a/cmd/device-apiserver/app/options/options_test.go b/cmd/device-apiserver/app/options/options_test.go
deleted file mode 100644
index b81e5ac95..000000000
--- a/cmd/device-apiserver/app/options/options_test.go
+++ /dev/null
@@ -1,67 +0,0 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-//
-//  Licensed under the Apache License, Version 2.0 (the "License");
-//  you may not use this file except in compliance with the License.
-//  You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-//  Unless required by applicable law or agreed to in writing, software
-//  distributed under the License is distributed on an "AS IS" BASIS,
-//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-//  See the License for the specific language governing permissions and
-//  limitations under the License.
-
-package options
-
-import (
-	"context"
-	"testing"
-)
-
-func TestServerRunOptions(t *testing.T) {
-	opts := NewServerRunOptions()
-	if opts == nil || opts.Options == nil {
-		t.Fatal("NewServerRunOptions failed to initialize internal options")
-	}
-
-	fss := opts.Flags()
-	if len(fss.FlagSets) == 0 {
-		t.Error("Flags() returned empty NamedFlagSets; expected flags from internal options")
-	}
-
-	var nilOpts *ServerRunOptions
-	nilFss := nilOpts.Flags()
-	if len(nilFss.FlagSets) != 0 {
-		t.Error("Flags() on nil options should return empty flag sets")
-	}
-
-	t.Run("CompleteAndValidate", func(t *testing.T) {
-		ctx := context.Background()
-
-		completed, err := opts.Complete(ctx)
-		if err != nil {
-			t.Fatalf("Complete failed: %v", err)
-		}
-
-		if completed.completedOptions == nil {
-			t.Fatal("CompletedOptions internal pointer is nil")
-		}
-
-		errs := completed.Validate()
-		if len(errs) > 0 {
-			t.Logf("Note: Default validation returned %d errors (this is expected if defaults require setup)", len(errs))
-		}
-	})
-
-	t.Run("CompleteNil", func(t *testing.T) {
-		var nilOpts *ServerRunOptions
-		completed, err := nilOpts.Complete(context.Background())
-		if err != nil {
-			t.Errorf("Complete() on nil options should not return error, got: %v", err)
-		}
-		if completed.completedOptions == nil {
-			t.Error("Complete() on nil options should return a valid wrapper")
-		}
-	})
-}
diff --git a/cmd/device-apiserver/app/server.go b/cmd/device-apiserver/app/server.go
deleted file mode 100644
index be9165554..000000000
--- a/cmd/device-apiserver/app/server.go
+++ /dev/null
@@ -1,157 +0,0 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-//
-//  Licensed under the Apache License, Version 2.0 (the "License");
-//  you may not use this file except in compliance with the License.
-//  You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-//  Unless required by applicable law or agreed to in writing, software
-//  distributed under the License is distributed on an "AS IS" BASIS,
-//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-//  See the License for the specific language governing permissions and
-//  limitations under the License.
-
-package app
-
-import (
-	"context"
-	"os"
-
-	"github.com/nvidia/nvsentinel/cmd/device-apiserver/app/options"
-	_ "github.com/nvidia/nvsentinel/pkg/services/device/v1alpha1"
-	"github.com/nvidia/nvsentinel/pkg/util/verflag"
-	utilversion "github.com/nvidia/nvsentinel/pkg/util/version"
-	"github.com/spf13/cobra"
-	"golang.org/x/sync/errgroup"
-	utilerrors "k8s.io/apimachinery/pkg/util/errors"
-	genericapiserver "k8s.io/apiserver/pkg/server"
-	cliflag "k8s.io/component-base/cli/flag"
-	"k8s.io/component-base/cli/globalflag"
-	"k8s.io/component-base/logs"
-	logsapi "k8s.io/component-base/logs/api/v1"
-	"k8s.io/component-base/term"
-	"k8s.io/klog/v2"
-)
-
-// NewAPIServerCommand creates a *cobra.Command object with default parameters
-func NewAPIServerCommand() *cobra.Command {
-	s := options.NewServerRunOptions()
-	ctx := genericapiserver.SetupSignalContext()
-
-	cmd := &cobra.Command{
-		Use: "device-apiserver",
-		Long: `The Device API server validates and configures data
-for the api objects which include gpus and others. The API Server services
-gRPC operations and provides the frontend to a node's shared state through
-which all other node-local components interact.`,
-
-		RunE: func(cmd *cobra.Command, args []string) error {
-			verflag.PrintAndExitIfRequested()
-
-			fs := cmd.Flags()
-			// Activate logging as soon as possible, after that
-			// show flags with the final logging configuration.
-			logsapi.ReapplyHandling = logsapi.ReapplyHandlingIgnoreUnchanged
-			if err := logsapi.ValidateAndApply(s.Logs, nil); err != nil {
-				return err
-			}
-
-			cliflag.PrintFlags(fs)
-
-			// set default options
-			completedOptions, err := s.Complete(ctx)
-			if err != nil {
-				return err
-			}
-
-			// validate options
-			if errs := completedOptions.Validate(); len(errs) != 0 {
-				return utilerrors.NewAggregate(errs)
-			}
-
-			return Run(ctx, completedOptions)
-		},
-		Args: cobra.NoArgs,
-	}
-	cmd.SetContext(ctx)
-
-	fs := cmd.Flags()
-	namedFlagSets := s.Flags()
-	verflag.AddFlags(namedFlagSets.FlagSet("global"))
-	globalflag.AddGlobalFlags(namedFlagSets.FlagSet("global"), cmd.Name(), logs.SkipLoggingConfigurationFlags())
-
-	for _, f := range namedFlagSets.FlagSets {
-		fs.AddFlagSet(f)
-	}
-
-	cols, _, _ := term.TerminalSize(cmd.OutOrStdout())
-	cliflag.SetUsageAndHelpFunc(cmd, namedFlagSets, cols)
-
-	return cmd
-}
-
-// Run runs the specified APIServer. This should never exit.
-func Run(ctx context.Context, opts options.CompletedOptions) error {
-	logger := klog.FromContext(ctx).WithValues("node", opts.NodeName)
-	ctx = klog.NewContext(ctx, logger)
-
-	logger.Info("Initializing Device API Server", "version", utilversion.Get())
-	logger.V(2).Info("Golang settings",
-		"GOGC", os.Getenv("GOGC"),
-		"GOMAXPROCS", os.Getenv("GOMAXPROCS"),
-		"GOTRACEBACK", os.Getenv("GOTRACEBACK"),
-	)
-
-	config, err := NewConfig(ctx, opts)
-	if err != nil {
-		return err
-	}
-
-	completed, err := config.Complete()
-	if err != nil {
-		return err
-	}
-
-	// Initialize and prepare storage to be injected into the server for readiness.
-	storage, err := completed.Storage.New()
-	if err != nil {
-		return err
-	}
-
-	// Inject storage into the server to coordinate startup.
-	server, err := completed.APIs.New(storage)
-	if err != nil {
-		return err
-	}
-
-	g, ctx := errgroup.WithContext(ctx)
-
-	g.Go(func() error {
-		preparedStorage, err := storage.PrepareRun(ctx)
-		if err != nil {
-			return err
-		}
-
-		return preparedStorage.Run(ctx)
-	})
-
-	g.Go(func() error {
-		preparedServer, err := server.PrepareRun(ctx)
-		if err != nil {
-			return err
-		}
-
-		return preparedServer.Run(ctx)
-	})
-
-	err = g.Wait()
-	if err != nil {
-		logger.Error(err, "internal error: Device API Server exited with error")
-		return err
-	}
-
-	logger.Info("Device API Server shut down gracefully")
-
-	return nil
-}
diff --git a/cmd/device-apiserver/app/server_test.go b/cmd/device-apiserver/app/server_test.go
deleted file mode 100644
index a81dac2da..000000000
--- a/cmd/device-apiserver/app/server_test.go
+++ /dev/null
@@ -1,117 +0,0 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-//  Licensed under the Apache License, Version 2.0 (the "License");
-//  you may not use this file except in compliance with the License.
-//  You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-//  Unless required by applicable law or agreed to in writing, software
-//  distributed under the License is distributed on an "AS IS" BASIS,
-//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-//  See the License for the specific language governing permissions and
-//  limitations under the License.
-
-package app
-
-import (
-	"context"
-	"fmt"
-	"os"
-	"path/filepath"
-	"strings"
-	"testing"
-	"time"
-
-	"github.com/nvidia/nvsentinel/cmd/device-apiserver/app/options"
-	"github.com/nvidia/nvsentinel/pkg/util/testutils"
-)
-
-func TestRun(t *testing.T) {
-	opts := options.NewServerRunOptions()
-
-	localSocket := testutils.NewUnixAddr(t)
-	kineSocket := fmt.Sprintf("unix://%s", testutils.NewUnixAddr(t))
-	healthAddr := testutils.GetFreeTCPAddress(t)
-
-	opts.GRPC.BindAddress = "unix://" + localSocket
-	opts.HealthAddress = healthAddr
-	opts.NodeName = "test-node"
-
-	tmpDir := t.TempDir()
-	opts.Storage.DatabaseDir = tmpDir
-	opts.Storage.DatabasePath = tmpDir + "state.db"
-	opts.Storage.KineSocketPath = kineSocket
-	opts.Storage.KineConfig.Endpoint = fmt.Sprintf("sqlite://%s/db.sqlite", tmpDir)
-	opts.Storage.KineConfig.Listener = kineSocket
-
-	ctx, cancel := context.WithCancel(context.Background())
-	defer cancel()
-
-	completedOpts, err := opts.Complete(ctx)
-	if err != nil {
-		t.Fatalf("Failed to complete options: %v", err)
-	}
-
-	errCh := make(chan error, 1)
-	go func() {
-		errCh <- Run(ctx, completedOpts)
-	}()
-
-	testutils.WaitForStatus(t, healthAddr, "", 5*time.Second, testutils.IsServing)
-
-	cancel()
-
-	select {
-	case err := <-errCh:
-		if err != nil && err != context.Canceled {
-			t.Errorf("exited with unexpected error: %v", err)
-		}
-	case <-time.After(5 * time.Second):
-		t.Fatal("Failed to shut down within grace period")
-	}
-
-	if _, err := os.Stat(localSocket); err == nil {
-		t.Errorf("socket file %q still exists after shutdown", localSocket)
-	}
-}
-
-func TestRun_StorageFailure(t *testing.T) {
-	opts := options.NewServerRunOptions()
-
-	tmpDir := t.TempDir()
-	readOnlyDir := filepath.Join(tmpDir, "readonly")
-	if err := os.Mkdir(readOnlyDir, 0444); err != nil {
-		t.Fatal(err)
-	}
-
-	opts.NodeName = "test-node"
-	opts.Storage.DatabaseDir = readOnlyDir
-	opts.Storage.DatabasePath = readOnlyDir + "state.db"
-	opts.Storage.KineSocketPath = filepath.Join(readOnlyDir, "kine.sock")
-	opts.Storage.KineConfig.Endpoint = fmt.Sprintf("sqlite://%s/db.sqlite", readOnlyDir)
-
-	opts.HealthAddress = testutils.GetFreeTCPAddress(t)
-	opts.GRPC.BindAddress = "unix://" + filepath.Join(tmpDir, "api.sock")
-
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
-
-	completedOpts, _ := opts.Complete(ctx)
-
-	errCh := make(chan error, 1)
-	go func() {
-		errCh <- Run(ctx, completedOpts)
-	}()
-
-	select {
-	case err := <-errCh:
-		if err == nil {
-			t.Error("Expected server to fail due to storage error, but it exited with nil")
-		}
-		if !strings.Contains(err.Error(), "storage") && !strings.Contains(err.Error(), "permission denied") {
-			t.Errorf("Expected storage or permission error, got: %v", err)
-		}
-	case <-time.After(5 * time.Second):
-		t.Fatal("Server should have failed immediately on storage error, but it timed out/hung")
-	}
-}
diff --git a/cmd/nvml-provider/main.go b/cmd/nvml-provider/main.go
new file mode 100644
index 000000000..57ec0f835
--- /dev/null
+++ b/cmd/nvml-provider/main.go
@@ -0,0 +1,726 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build nvml
+
+// Command nvml-provider is a standalone NVML-based GPU health provider that
+// connects to a device-api-server instance via gRPC.
+//
+// This is designed to run as a sidecar container alongside device-api-server,
+// providing GPU enumeration and health monitoring via NVML.
+//
+// Usage:
+//
+//	nvml-provider --server-address=localhost:9001 --driver-root=/run/nvidia/driver
+package main
+
+import (
+	"context"
+	"flag"
+	"fmt"
+	"net"
+	"net/http"
+	"os"
+	"os/signal"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials/insecure"
+	"google.golang.org/grpc/health/grpc_health_v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/klog/v2"
+
+	devicev1alpha1 "github.com/nvidia/nvsentinel/api/device/v1alpha1"
+	clientset "github.com/nvidia/nvsentinel/pkg/client-go/client/versioned"
+	gpuclient "github.com/nvidia/nvsentinel/pkg/client-go/client/versioned/typed/device/v1alpha1"
+	nvmlpkg "github.com/nvidia/nvsentinel/pkg/providers/nvml"
+)
+
+const (
+	// DefaultProviderID is the default identifier for this provider.
+	DefaultProviderID = "nvml-provider-sidecar"
+
+	// HeartbeatInterval is how often to send heartbeats.
+	HeartbeatInterval = 10 * time.Second
+
+	// HealthCheckPort is the HTTP port for health checks.
+	HealthCheckPort = 8082
+
+	// EventTimeout is the timeout for NVML event wait (in milliseconds).
+	EventTimeout = 5000
+
+	// DefaultServerAddress is the default device-api-server address.
+	DefaultServerAddress = "localhost:9001"
+
+	// ConnectionRetryInterval is how long to wait between connection attempts.
+	ConnectionRetryInterval = 5 * time.Second
+
+	// MaxConnectionRetries is the maximum number of connection attempts.
+	MaxConnectionRetries = 60
+)
+
+// Config holds the provider configuration.
+type Config struct {
+	ServerAddress      string
+	ProviderID         string
+	DriverRoot         string
+	HealthCheckEnabled bool
+	HealthCheckPort    int
+	IgnoredXids        []uint64
+}
+
+// DefaultConfig returns a Config with sensible defaults.
+func DefaultConfig() Config {
+	return Config{
+		ServerAddress:      DefaultServerAddress,
+		ProviderID:         DefaultProviderID,
+		DriverRoot:         "/run/nvidia/driver",
+		HealthCheckEnabled: true,
+		HealthCheckPort:    HealthCheckPort,
+	}
+}
+
+// Provider is the standalone NVML provider that connects to device-api-server.
+type Provider struct {
+	config Config
+	logger klog.Logger
+
+	// gRPC clients
+	conn         *grpc.ClientConn
+	gpuClient    gpuclient.GPUInterface
+	healthClient grpc_health_v1.HealthClient
+
+	// NVML
+	nvmllib  nvml.Interface
+	eventSet nvml.EventSet
+
+	// State
+	mu             sync.RWMutex
+	gpuUUIDs       []string
+	initialized    bool
+	connected      bool
+	healthy        bool
+	monitorRunning bool
+
+	// Lifecycle
+	ctx    context.Context
+	cancel context.CancelFunc
+	wg     sync.WaitGroup
+}
+
+// NewProvider creates a new standalone NVML provider.
+func NewProvider(cfg Config, logger klog.Logger) *Provider {
+	return &Provider{
+		config: cfg,
+		logger: logger.WithName("nvml-provider"),
+	}
+}
+
+func main() {
+	// Initialize logging flags first
+	klog.InitFlags(nil)
+
+	cfg := parseFlags()
+	// flag.Parse() is called inside parseFlags()
+
+	logger := klog.Background()
+	logger.Info("Starting NVML provider sidecar",
+		"serverAddress", cfg.ServerAddress,
+		"providerID", cfg.ProviderID,
+		"driverRoot", cfg.DriverRoot,
+		"healthCheckEnabled", cfg.HealthCheckEnabled,
+	)
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	// Handle signals
+	sigCh := make(chan os.Signal, 1)
+	signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
+	go func() {
+		sig := <-sigCh
+		logger.Info("Received signal, shutting down", "signal", sig)
+		cancel()
+	}()
+
+	// Create and run provider
+	provider := NewProvider(cfg, logger)
+	if err := provider.Run(ctx); err != nil {
+		logger.Error(err, "Provider failed")
+		os.Exit(1)
+	}
+
+	logger.Info("NVML provider shutdown complete")
+}
+
+func parseFlags() Config {
+	cfg := DefaultConfig()
+
+	flag.StringVar(&cfg.ServerAddress, "server-address", cfg.ServerAddress,
+		"Address of device-api-server gRPC endpoint")
+	flag.StringVar(&cfg.ProviderID, "provider-id", cfg.ProviderID,
+		"Unique identifier for this provider")
+	flag.StringVar(&cfg.DriverRoot, "driver-root", cfg.DriverRoot,
+		"Root path for NVIDIA driver libraries")
+	flag.BoolVar(&cfg.HealthCheckEnabled, "health-check", cfg.HealthCheckEnabled,
+		"Enable XID event monitoring for health checks")
+	flag.IntVar(&cfg.HealthCheckPort, "health-port", cfg.HealthCheckPort,
+		"HTTP port for health check endpoints")
+
+	// Parse flags
+	flag.Parse()
+
+	// Track which flags were explicitly set on the command line.
+	explicitFlags := make(map[string]bool)
+	flag.Visit(func(f *flag.Flag) {
+		explicitFlags[f.Name] = true
+	})
+
+	// Environment variables are used as fallback when the corresponding
+	// flag was not explicitly provided on the command line.
+	if !explicitFlags["server-address"] {
+		if addr := os.Getenv("PROVIDER_SERVER_ADDRESS"); addr != "" {
+			cfg.ServerAddress = addr
+		}
+	}
+	if !explicitFlags["provider-id"] {
+		if id := os.Getenv("PROVIDER_ID"); id != "" {
+			cfg.ProviderID = id
+		}
+	}
+	if !explicitFlags["driver-root"] {
+		// NVIDIA_DRIVER_ROOT follows the NVIDIA Container Toolkit convention.
+		// See: https://github.com/NVIDIA/nvidia-container-toolkit
+		if root := os.Getenv("NVIDIA_DRIVER_ROOT"); root != "" {
+			cfg.DriverRoot = root
+		}
+	}
+
+	return cfg
+}
+
+// Run starts the provider and blocks until the context is cancelled.
+func (p *Provider) Run(ctx context.Context) error {
+	p.ctx, p.cancel = context.WithCancel(ctx)
+	defer p.cancel()
+
+	// Start health check server
+	p.wg.Add(1)
+	go p.runHealthServer()
+
+	// Initialize NVML
+	if err := p.initNVML(); err != nil {
+		return fmt.Errorf("failed to initialize NVML: %w", err)
+	}
+	defer p.shutdownNVML()
+
+	// Connect to server with retry
+	if err := p.connectWithRetry(); err != nil {
+		return fmt.Errorf("failed to connect to server: %w", err)
+	}
+	defer p.disconnect()
+
+	// Enumerate and register GPUs (or reconcile if reconnecting)
+	if err := p.enumerateAndRegisterGPUs(); err != nil {
+		return fmt.Errorf("failed to enumerate GPUs: %w", err)
+	}
+
+	// Reconcile state (handles restart/reconnection scenarios)
+	if err := p.ReconcileState(p.ctx); err != nil {
+		// Reconciliation failure is not fatal - log and continue
+		p.logger.Error(err, "State reconciliation failed, continuing")
+	}
+
+	// Start heartbeat loop
+	p.wg.Add(1)
+	go p.runHeartbeatLoop()
+
+	// Start health monitoring if enabled
+	if p.config.HealthCheckEnabled && len(p.gpuUUIDs) > 0 {
+		p.wg.Add(1)
+		go p.runHealthMonitor()
+	}
+
+	// Mark as healthy
+	p.setHealthy(true)
+
+	// Wait for shutdown
+	<-p.ctx.Done()
+
+	// Graceful shutdown
+	p.setHealthy(false)
+	p.wg.Wait()
+
+	return nil
+}
+
+// initNVML initializes the NVML library.
+func (p *Provider) initNVML() error {
+	// Find NVML library
+	libraryPath := nvmlpkg.FindDriverLibrary(p.config.DriverRoot)
+	if libraryPath != "" {
+		p.logger.V(2).Info("Using NVML library", "path", libraryPath)
+		p.nvmllib = nvml.New(nvml.WithLibraryPath(libraryPath))
+	} else {
+		p.logger.V(2).Info("Using system default NVML library")
+		p.nvmllib = nvml.New()
+	}
+
+	// Initialize
+	ret := p.nvmllib.Init()
+	if ret != nvml.SUCCESS {
+		return fmt.Errorf("NVML init failed: %v", nvml.ErrorString(ret))
+	}
+
+	// Log driver version
+	if version, ret := p.nvmllib.SystemGetDriverVersion(); ret == nvml.SUCCESS {
+		p.logger.Info("NVML initialized", "driverVersion", version)
+	}
+
+	p.initialized = true
+	return nil
+}
+
+// shutdownNVML shuts down the NVML library.
+func (p *Provider) shutdownNVML() {
+	if !p.initialized {
+		return
+	}
+
+	if p.eventSet != nil {
+		p.eventSet.Free()
+		p.eventSet = nil
+	}
+
+	p.nvmllib.Shutdown()
+	p.initialized = false
+	p.logger.V(1).Info("NVML shutdown complete")
+}
+
+
+// isLocalhostAddress returns true if the address refers to the local machine.
+func isLocalhostAddress(addr string) bool {
+	// Unix socket paths are inherently local.
+	if strings.HasPrefix(addr, "unix://") || strings.HasPrefix(addr, "/") {
+		return true
+	}
+	host := addr
+	if h, _, err := net.SplitHostPort(addr); err == nil {
+		host = h
+	}
+	return host == "localhost" || host == "127.0.0.1" || host == "::1" || host == ""
+}
+
+// connectWithRetry connects to the device-api-server with retry logic.
+func (p *Provider) connectWithRetry() error {
+	// Validate that ServerAddress is localhost when using insecure credentials.
+	// This prevents accidental exposure of unencrypted gRPC traffic over the network.
+	if !isLocalhostAddress(p.config.ServerAddress) {
+		return fmt.Errorf("insecure credentials require localhost address, got %q; "+
+			"set --server-address to localhost:<port> or use TLS", p.config.ServerAddress)
+	}
+
+	var lastErr error
+
+	for i := 0; i < MaxConnectionRetries; i++ {
+		select {
+		case <-p.ctx.Done():
+			return p.ctx.Err()
+		default:
+		}
+
+		// Insecure credentials are acceptable here: the provider connects to
+		// device-api-server via localhost within the same pod (sidecar pattern).
+		conn, err := grpc.NewClient(
+			p.config.ServerAddress,
+			grpc.WithTransportCredentials(insecure.NewCredentials()),
+		)
+		if err != nil {
+			lastErr = err
+			p.logger.V(1).Info("Connection attempt failed, retrying",
+				"attempt", i+1,
+				"error", err,
+			)
+			time.Sleep(ConnectionRetryInterval)
+			continue
+		}
+
+		p.conn = conn
+		cs := clientset.New(conn)
+		p.gpuClient = cs.DeviceV1alpha1().GPUs()
+		p.healthClient = grpc_health_v1.NewHealthClient(conn)
+
+		// Wait for server to be ready
+		if err := p.waitForServerReady(); err != nil {
+			conn.Close()
+			lastErr = err
+			p.logger.V(1).Info("Server not ready, retrying",
+				"attempt", i+1,
+				"error", err,
+			)
+			time.Sleep(ConnectionRetryInterval)
+			continue
+		}
+
+		p.connected = true
+		p.logger.Info("Connected to device-api-server", "address", p.config.ServerAddress)
+		return nil
+	}
+
+	return fmt.Errorf("failed to connect after %d attempts: %w", MaxConnectionRetries, lastErr)
+}
+
+// waitForServerReady waits for the server to report healthy.
+func (p *Provider) waitForServerReady() error {
+	ctx, cancel := context.WithTimeout(p.ctx, 5*time.Second)
+	defer cancel()
+
+	resp, err := p.healthClient.Check(ctx, &grpc_health_v1.HealthCheckRequest{})
+	if err != nil {
+		return fmt.Errorf("health check failed: %w", err)
+	}
+
+	if resp.Status != grpc_health_v1.HealthCheckResponse_SERVING {
+		return fmt.Errorf("server not serving: %v", resp.Status)
+	}
+
+	return nil
+}
+
+// disconnect closes the gRPC connection.
+func (p *Provider) disconnect() {
+	if p.conn != nil {
+		p.conn.Close()
+		p.conn = nil
+	}
+	p.connected = false
+}
+
+// enumerateAndRegisterGPUs discovers GPUs via NVML and registers them.
+func (p *Provider) enumerateAndRegisterGPUs() error {
+	count, ret := p.nvmllib.DeviceGetCount()
+	if ret != nvml.SUCCESS {
+		return fmt.Errorf("failed to get device count: %v", nvml.ErrorString(ret))
+	}
+
+	if count == 0 {
+		p.logger.Info("No GPUs found on this node")
+		return nil
+	}
+
+	p.logger.Info("Enumerating GPUs", "count", count)
+	uuids := make([]string, 0, count)
+
+	for i := 0; i < count; i++ {
+		device, ret := p.nvmllib.DeviceGetHandleByIndex(i)
+		if ret != nvml.SUCCESS {
+			p.logger.Error(nil, "Failed to get device handle", "index", i, "error", nvml.ErrorString(ret))
+			continue
+		}
+
+		uuid, ret := device.GetUUID()
+		if ret != nvml.SUCCESS {
+			p.logger.Error(nil, "Failed to get device UUID", "index", i, "error", nvml.ErrorString(ret))
+			continue
+		}
+
+		// Get device info for registration
+		productName, _ := device.GetName()
+		var memoryBytes uint64
+		if memInfo, ret := device.GetMemoryInfo(); ret == nvml.SUCCESS {
+			memoryBytes = memInfo.Total
+		}
+
+		// Register GPU with server
+		if err := p.registerGPU(uuid, productName, memoryBytes); err != nil {
+			p.logger.Error(err, "Failed to register GPU", "uuid", uuid)
+			continue
+		}
+
+		uuids = append(uuids, uuid)
+		p.logger.Info("Registered GPU",
+			"uuid", uuid,
+			"productName", productName,
+			"memory", nvmlpkg.FormatBytes(memoryBytes),
+		)
+	}
+
+	p.mu.Lock()
+	p.gpuUUIDs = uuids
+	p.mu.Unlock()
+
+	p.logger.Info("GPU enumeration complete", "registered", len(uuids))
+	return nil
+}
+
+// registerGPU registers a single GPU with the device-api-server using Create.
+func (p *Provider) registerGPU(uuid, productName string, memoryBytes uint64) error {
+	ctx, cancel := context.WithTimeout(p.ctx, 5*time.Second)
+	defer cancel()
+
+	gpu := &devicev1alpha1.GPU{
+		ObjectMeta: metav1.ObjectMeta{Name: uuid},
+		Spec:       devicev1alpha1.GPUSpec{UUID: uuid},
+		Status: devicev1alpha1.GPUStatus{
+			Conditions: []metav1.Condition{
+				{
+					Type:               nvmlpkg.ConditionTypeNVMLReady,
+					Status:             metav1.ConditionStatus(nvmlpkg.ConditionStatusTrue),
+					Reason:             "Initialized",
+					Message:            fmt.Sprintf("GPU enumerated via NVML: %s (%s)", productName, nvmlpkg.FormatBytes(memoryBytes)),
+					LastTransitionTime: metav1.Now(),
+				},
+			},
+		},
+	}
+
+	_, err := p.gpuClient.Create(ctx, gpu, metav1.CreateOptions{})
+	return err
+}
+
+// runHeartbeatLoop sends periodic heartbeats to the server.
+func (p *Provider) runHeartbeatLoop() {
+	defer p.wg.Done()
+
+	ticker := time.NewTicker(HeartbeatInterval)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-p.ctx.Done():
+			return
+		case <-ticker.C:
+			if err := p.sendHeartbeat(); err != nil {
+				p.logger.Error(err, "Failed to send heartbeat")
+			}
+		}
+	}
+}
+
+// sendHeartbeat performs a health check on the server connection.
+// Note: The Heartbeat RPC was removed. We now just verify the server is reachable.
+func (p *Provider) sendHeartbeat() error {
+	ctx, cancel := context.WithTimeout(p.ctx, 5*time.Second)
+	defer cancel()
+
+	// Verify server connectivity by checking gRPC health
+	resp, err := p.healthClient.Check(ctx, &grpc_health_v1.HealthCheckRequest{})
+	if err != nil {
+		return err
+	}
+
+	if resp.Status != grpc_health_v1.HealthCheckResponse_SERVING {
+		return fmt.Errorf("server not serving: %v", resp.Status)
+	}
+
+	p.mu.RLock()
+	gpuCount := len(p.gpuUUIDs)
+	p.mu.RUnlock()
+
+	p.logger.V(4).Info("Health check passed", "gpuCount", gpuCount)
+	return nil
+}
+
+// runHealthMonitor monitors NVML events for GPU health changes.
+func (p *Provider) runHealthMonitor() {
+	defer p.wg.Done()
+
+	p.mu.Lock()
+	p.monitorRunning = true
+	p.mu.Unlock()
+
+	defer func() {
+		p.mu.Lock()
+		p.monitorRunning = false
+		p.mu.Unlock()
+	}()
+
+	// Create event set
+	eventSet, ret := p.nvmllib.EventSetCreate()
+	if ret != nvml.SUCCESS {
+		p.logger.Error(nil, "Failed to create event set", "error", nvml.ErrorString(ret))
+		return
+	}
+	defer eventSet.Free()
+	p.eventSet = eventSet
+
+	// Register devices for XID events
+	deviceCount, ret := p.nvmllib.DeviceGetCount()
+	if ret != nvml.SUCCESS {
+		p.logger.Error(nil, "Failed to get device count", "error", nvml.ErrorString(ret))
+		return
+	}
+
+	for i := 0; i < deviceCount; i++ {
+		device, ret := p.nvmllib.DeviceGetHandleByIndex(i)
+		if ret != nvml.SUCCESS {
+			continue
+		}
+		ret = device.RegisterEvents(nvml.EventTypeXidCriticalError|nvml.EventTypeSingleBitEccError|nvml.EventTypeDoubleBitEccError, eventSet)
+		if ret != nvml.SUCCESS {
+			p.logger.V(1).Info("Failed to register events for device", "index", i, "error", nvml.ErrorString(ret))
+		}
+	}
+
+	p.logger.Info("Health monitor started")
+
+	// Event loop
+	for {
+		select {
+		case <-p.ctx.Done():
+			return
+		default:
+		}
+
+		data, ret := eventSet.Wait(EventTimeout)
+		if ret == nvml.ERROR_TIMEOUT {
+			continue
+		}
+		if ret != nvml.SUCCESS {
+			p.logger.V(1).Info("Event wait error", "error", nvml.ErrorString(ret))
+			continue
+		}
+
+		p.handleXIDEvent(data)
+	}
+}
+
+// handleXIDEvent processes an XID error event.
+func (p *Provider) handleXIDEvent(data nvml.EventData) {
+	if data.Device == nil {
+		p.logger.Error(nil, "Received XID event with nil device handle")
+		return
+	}
+
+	uuid, ret := data.Device.GetUUID()
+	if ret != nvml.SUCCESS {
+		p.logger.Error(nil, "Failed to get device UUID from event")
+		return
+	}
+
+	xid := data.EventData
+	p.logger.Info("XID event received",
+		"uuid", uuid,
+		"xid", xid,
+		"eventType", data.EventType,
+	)
+
+	// Skip ignored XIDs (application-level errors, not hardware failures).
+	// This matches the in-process provider behavior in pkg/providers/nvml/health_monitor.go.
+	if nvmlpkg.IsDefaultIgnored(xid) {
+		p.logger.V(2).Info("Ignoring non-critical XID",
+			"uuid", uuid,
+			"xid", xid,
+		)
+		return
+	}
+
+	// Only critical XIDs trigger a health state change.
+	// Non-critical, non-ignored XIDs are logged but do not update GPU status,
+	// matching the in-process provider behavior in pkg/providers/nvml/health_monitor.go.
+	if !nvmlpkg.IsCriticalXid(xid) {
+		p.logger.V(2).Info("Non-critical XID, skipping status update",
+			"uuid", uuid,
+			"xid", xid,
+		)
+		return
+	}
+
+	p.logger.Info("Critical XID error detected",
+		"uuid", uuid,
+		"xid", xid,
+	)
+
+	ctx, cancel := context.WithTimeout(p.ctx, 5*time.Second)
+	defer cancel()
+
+	gpu := &devicev1alpha1.GPU{
+		ObjectMeta: metav1.ObjectMeta{Name: uuid},
+		Status: devicev1alpha1.GPUStatus{
+			Conditions: []metav1.Condition{
+				{
+					Type:               nvmlpkg.ConditionTypeNVMLReady,
+					Status:             metav1.ConditionStatus(nvmlpkg.ConditionStatusFalse),
+					Reason:             "XIDError",
+					Message:            fmt.Sprintf("Critical XID error: %d", xid),
+					LastTransitionTime: metav1.Now(),
+				},
+			},
+		},
+	}
+
+	if _, err := p.gpuClient.UpdateStatus(ctx, gpu, metav1.UpdateOptions{}); err != nil {
+		p.logger.Error(err, "Failed to update GPU status", "uuid", uuid)
+	}
+}
+
+// runHealthServer runs the HTTP health check server.
+func (p *Provider) runHealthServer() {
+	defer p.wg.Done()
+
+	mux := http.NewServeMux()
+	mux.HandleFunc("/healthz", p.handleHealthz)
+	mux.HandleFunc("/readyz", p.handleReadyz)
+	mux.HandleFunc("/livez", p.handleHealthz)
+
+	server := &http.Server{
+		Addr:              fmt.Sprintf(":%d", p.config.HealthCheckPort),
+		Handler:           mux,
+		ReadHeaderTimeout: 5 * time.Second,
+		ReadTimeout:       10 * time.Second,
+		WriteTimeout:      10 * time.Second,
+	}
+
+	go func() {
+		<-p.ctx.Done()
+		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+		defer cancel()
+		server.Shutdown(ctx)
+	}()
+
+	p.logger.Info("Health server started", "port", p.config.HealthCheckPort)
+	if err := server.ListenAndServe(); err != http.ErrServerClosed {
+		p.logger.Error(err, "Health server error")
+	}
+}
+
+func (p *Provider) handleHealthz(w http.ResponseWriter, _ *http.Request) {
+	w.WriteHeader(http.StatusOK)
+	w.Write([]byte("ok\n"))
+}
+
+func (p *Provider) handleReadyz(w http.ResponseWriter, _ *http.Request) {
+	p.mu.RLock()
+	healthy := p.healthy
+	p.mu.RUnlock()
+
+	if healthy {
+		w.WriteHeader(http.StatusOK)
+		w.Write([]byte("ok\n"))
+	} else {
+		w.WriteHeader(http.StatusServiceUnavailable)
+		w.Write([]byte("not ready\n"))
+	}
+}
+
+func (p *Provider) setHealthy(healthy bool) {
+	p.mu.Lock()
+	p.healthy = healthy
+	p.mu.Unlock()
+}
+
diff --git a/cmd/nvml-provider/reconciler.go b/cmd/nvml-provider/reconciler.go
new file mode 100644
index 000000000..af5f68b6c
--- /dev/null
+++ b/cmd/nvml-provider/reconciler.go
@@ -0,0 +1,308 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build nvml
+
+package main
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	devicev1alpha1 "github.com/nvidia/nvsentinel/api/device/v1alpha1"
+	nvmlpkg "github.com/nvidia/nvsentinel/pkg/providers/nvml"
+)
+
+// ReconcileState reconciles the provider's state with the device-api-server.
+//
+// This is called on startup and after reconnection to ensure:
+// 1. GPUs that were removed while disconnected are unregistered
+// 2. GPUs that were added while disconnected are registered
+// 3. GPU health states are reconciled with current NVML state
+//
+// This handles scenarios like:
+// - Provider crash and restart
+// - Network partition recovery
+// - GPU hotplug/removal during provider downtime
+func (p *Provider) ReconcileState(ctx context.Context) error {
+	p.logger.Info("Starting state reconciliation")
+
+	// Step 1: Get current state from server
+	cachedGPUs, err := p.listCachedGPUs(ctx)
+	if err != nil {
+		return fmt.Errorf("failed to list cached GPUs: %w", err)
+	}
+
+	p.logger.V(1).Info("Retrieved cached GPU state", "count", len(cachedGPUs))
+
+	// Step 2: Get current GPU UUIDs from NVML
+	currentUUIDs, err := p.getCurrentGPUUUIDs()
+	if err != nil {
+		return fmt.Errorf("failed to get current GPU UUIDs: %w", err)
+	}
+
+	p.logger.V(1).Info("Current GPUs from NVML", "count", len(currentUUIDs))
+
+	// Build lookup maps
+	cachedUUIDSet := make(map[string]*devicev1alpha1.GPU)
+	for i := range cachedGPUs {
+		gpu := &cachedGPUs[i]
+		cachedUUIDSet[gpu.Spec.UUID] = gpu
+	}
+
+	currentUUIDSet := make(map[string]bool)
+	for _, uuid := range currentUUIDs {
+		currentUUIDSet[uuid] = true
+	}
+
+	// Step 3: Find and unregister removed GPUs
+	for uuid := range cachedUUIDSet {
+		if !currentUUIDSet[uuid] {
+			p.logger.Info("GPU was removed, unregistering", "uuid", uuid)
+			if err := p.unregisterGPU(ctx, uuid); err != nil {
+				p.logger.Error(err, "Failed to unregister removed GPU", "uuid", uuid)
+				// Continue with other GPUs
+			}
+		}
+	}
+
+	// Step 4: Find and register new GPUs
+	for _, uuid := range currentUUIDs {
+		if _, exists := cachedUUIDSet[uuid]; !exists {
+			p.logger.Info("New GPU found, registering", "uuid", uuid)
+			if err := p.registerNewGPU(ctx, uuid); err != nil {
+				p.logger.Error(err, "Failed to register new GPU", "uuid", uuid)
+				// Continue with other GPUs
+			}
+		}
+	}
+
+	// Step 5: Reconcile health state for existing GPUs
+	for _, uuid := range currentUUIDs {
+		if cachedGPU, exists := cachedUUIDSet[uuid]; exists {
+			if err := p.reconcileGPUHealth(ctx, uuid, cachedGPU); err != nil {
+				p.logger.Error(err, "Failed to reconcile GPU health", "uuid", uuid)
+				// Continue with other GPUs
+			}
+		}
+	}
+
+	// Step 6: Update local GPU list
+	p.mu.Lock()
+	p.gpuUUIDs = currentUUIDs
+	p.mu.Unlock()
+
+	p.logger.Info("State reconciliation complete",
+		"totalGPUs", len(currentUUIDs),
+	)
+
+	return nil
+}
+
+// listCachedGPUs retrieves the list of GPUs from the server cache.
+//
+// Note: This lists ALL GPUs, not just those from this provider.
+// TODO: Add provider_id filtering to ListGpus RPC for efficiency.
+func (p *Provider) listCachedGPUs(ctx context.Context) ([]devicev1alpha1.GPU, error) {
+	// Note: If the parent context has a shorter deadline, WithTimeout
+	// inherits the parent's deadline. This is the correct behavior:
+	// reconciliation should respect the overall operation timeout.
+	ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
+	defer cancel()
+
+	gpuList, err := p.gpuClient.List(ctx, metav1.ListOptions{})
+	if err != nil {
+		return nil, err
+	}
+
+	// Filter to only GPUs that might belong to this provider
+	// For now, we assume all GPUs belong to us since we're the only provider
+	// A more robust solution would use provider_id filtering
+	return gpuList.Items, nil
+}
+
+// getCurrentGPUUUIDs gets the list of GPU UUIDs currently visible to NVML.
+func (p *Provider) getCurrentGPUUUIDs() ([]string, error) {
+	count, ret := p.nvmllib.DeviceGetCount()
+	if ret != nvml.SUCCESS {
+		return nil, fmt.Errorf("failed to get device count: %v", nvml.ErrorString(ret))
+	}
+
+	uuids := make([]string, 0, count)
+	for i := 0; i < count; i++ {
+		device, ret := p.nvmllib.DeviceGetHandleByIndex(i)
+		if ret != nvml.SUCCESS {
+			continue
+		}
+
+		uuid, ret := device.GetUUID()
+		if ret != nvml.SUCCESS {
+			continue
+		}
+
+		uuids = append(uuids, uuid)
+	}
+
+	return uuids, nil
+}
+
+// unregisterGPU removes a GPU from the server using Delete.
+func (p *Provider) unregisterGPU(ctx context.Context, uuid string) error {
+	// Note: If the parent context has a shorter deadline, WithTimeout
+	// inherits the parent's deadline. This is the correct behavior:
+	// reconciliation should respect the overall operation timeout.
+	ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
+	defer cancel()
+
+	return p.gpuClient.Delete(ctx, uuid, metav1.DeleteOptions{})
+}
+
+// registerNewGPU registers a newly discovered GPU.
+func (p *Provider) registerNewGPU(ctx context.Context, uuid string) error {
+	// Get device info from NVML
+	productName := "Unknown"
+	var memoryBytes uint64
+
+	// Find the device by UUID
+	count, ret := p.nvmllib.DeviceGetCount()
+	if ret == nvml.SUCCESS {
+		for i := 0; i < count; i++ {
+			device, ret := p.nvmllib.DeviceGetHandleByIndex(i)
+			if ret != nvml.SUCCESS {
+				continue
+			}
+			deviceUUID, ret := device.GetUUID()
+			if ret != nvml.SUCCESS || deviceUUID != uuid {
+				continue
+			}
+
+			// Found the device
+			if name, ret := device.GetName(); ret == nvml.SUCCESS {
+				productName = name
+			}
+			if memInfo, ret := device.GetMemoryInfo(); ret == nvml.SUCCESS {
+				memoryBytes = memInfo.Total
+			}
+			break
+		}
+	}
+
+	return p.registerGPU(uuid, productName, memoryBytes)
+}
+
+// reconcileGPUHealth compares cached health state with current NVML state.
+//
+// If the GPU was marked as Unknown (due to provider timeout) but is now
+// healthy per NVML, we update it back to healthy.
+func (p *Provider) reconcileGPUHealth(ctx context.Context, uuid string, cachedGPU *devicev1alpha1.GPU) error {
+	// Check if the cached state shows Unknown (from heartbeat timeout)
+	var cachedCondition *metav1.Condition
+	for i := range cachedGPU.Status.Conditions {
+		cond := &cachedGPU.Status.Conditions[i]
+		if cond.Type == "Ready" || cond.Type == nvmlpkg.ConditionTypeNVMLReady {
+			cachedCondition = cond
+			break
+		}
+	}
+
+	// If the condition is Unknown, query NVML and update if healthy
+	if cachedCondition != nil && string(cachedCondition.Status) == nvmlpkg.ConditionStatusUnknown {
+		p.logger.Info("GPU has Unknown status, checking current NVML state", "uuid", uuid)
+
+		// For now, if we can enumerate the GPU via NVML, consider it healthy
+		// A more sophisticated check would query specific health indicators
+		healthy, err := p.isGPUHealthy(uuid)
+		if err != nil {
+			return fmt.Errorf("failed to check GPU health: %w", err)
+		}
+
+		if healthy {
+			p.logger.Info("GPU is healthy per NVML, updating status", "uuid", uuid)
+			return p.updateGPUCondition(ctx, uuid, nvmlpkg.ConditionStatusTrue, "Recovered", "GPU recovered after provider reconnection")
+		}
+	}
+
+	return nil
+}
+
+// isGPUHealthy checks if a GPU is healthy via NVML.
+func (p *Provider) isGPUHealthy(uuid string) (bool, error) {
+	// Find device by UUID
+	count, ret := p.nvmllib.DeviceGetCount()
+	if ret != nvml.SUCCESS {
+		return false, fmt.Errorf("failed to get device count: %v", nvml.ErrorString(ret))
+	}
+
+	for i := 0; i < count; i++ {
+		device, ret := p.nvmllib.DeviceGetHandleByIndex(i)
+		if ret != nvml.SUCCESS {
+			continue
+		}
+		deviceUUID, ret := device.GetUUID()
+		if ret != nvml.SUCCESS || deviceUUID != uuid {
+			continue
+		}
+
+		// Device found - check basic health indicators
+		// 1. Can we get memory info? (basic liveness check)
+		if _, ret := device.GetMemoryInfo(); ret != nvml.SUCCESS {
+			return false, nil
+		}
+
+		// 2. Check for pending page retirements (ECC errors)
+		if pending, ret := device.GetRetiredPagesPendingStatus(); ret == nvml.SUCCESS {
+			if pending == nvml.FEATURE_ENABLED {
+				p.logger.V(1).Info("GPU has pending page retirements", "uuid", uuid)
+				return false, nil
+			}
+		}
+
+		// Device is accessible and no pending issues
+		return true, nil
+	}
+
+	// Device not found - not healthy
+	return false, nil
+}
+
+// updateGPUCondition updates a GPU's status via UpdateStatus.
+func (p *Provider) updateGPUCondition(ctx context.Context, uuid, status, reason, message string) error {
+	// Note: If the parent context has a shorter deadline, WithTimeout
+	// inherits the parent's deadline. This is the correct behavior:
+	// reconciliation should respect the overall operation timeout.
+	ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
+	defer cancel()
+
+	gpu := &devicev1alpha1.GPU{
+		ObjectMeta: metav1.ObjectMeta{Name: uuid},
+		Status: devicev1alpha1.GPUStatus{
+			Conditions: []metav1.Condition{
+				{
+					Type:               nvmlpkg.ConditionTypeNVMLReady,
+					Status:             metav1.ConditionStatus(status),
+					Reason:             reason,
+					Message:            message,
+					LastTransitionTime: metav1.Now(),
+				},
+			},
+		},
+	}
+
+	_, err := p.gpuClient.UpdateStatus(ctx, gpu, metav1.UpdateOptions{})
+	return err
+}
diff --git a/code-generator/cmd/client-gen/generators/generator_for_type.go b/code-generator/cmd/client-gen/generators/generator_for_type.go
index dc4a11bef..028a65658 100644
--- a/code-generator/cmd/client-gen/generators/generator_for_type.go
+++ b/code-generator/cmd/client-gen/generators/generator_for_type.go
@@ -15,7 +15,7 @@ limitations under the License.
 */
 
 /*
-Portions Copyright (c) 2025 NVIDIA CORPORATION. All rights reserved.
+Portions Copyright (c) 2026 NVIDIA CORPORATION. All rights reserved.
 
 Modified from the original to support gRPC transport.
 Origin: https://github.com/kubernetes/code-generator/blob/v0.34.1/cmd/client-gen/generators/generator_for_type.go
@@ -401,9 +401,24 @@ func (c *$.type|allLowercasePlural$) Update(ctx $.context|raw$, $.type|allLowerc
 `
 
 var updateStatusTemplate = `
-// TODO: Implement UpdateStatus support.
+// UpdateStatus updates only the status subresource of a $.type|public$.
 func (c *$.type|allLowercasePlural$) UpdateStatus(ctx $.context|raw$, $.type|allLowercase$ *$.type|raw$, opts $.UpdateOptions|raw$) (*$.type|raw$, error) {
-	return nil, $.fmtErrorf|raw$("UpdateStatus not implemented")
+	resp, err := c.client.Update$.ProtoType$Status(ctx, &$.pb$.Update$.ProtoType$StatusRequest{
+		$.ProtoType$: $.ToProto|raw$($.type|allLowercase$),
+		Opts:         &$.pb$.UpdateOptions{},
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	obj := $.FromProto|raw$(resp)
+	c.logger.V(2).Info("Updated $.type|public$ status",
+		"name", obj.GetName(),
+		"namespace", c.getNamespace(),
+		"resource-version", obj.GetResourceVersion(),
+	)
+
+	return obj, nil
 }
 `
 
diff --git a/demos/nvml-sidecar-demo.sh b/demos/nvml-sidecar-demo.sh
new file mode 100755
index 000000000..cb5ffe06d
--- /dev/null
+++ b/demos/nvml-sidecar-demo.sh
@@ -0,0 +1,752 @@
+#!/bin/bash
+# NVML Provider Sidecar Demo
+# Demonstrates the NVML provider sidecar architecture for GPU enumeration
+#
+# Prerequisites:
+#   - kubectl configured with GPU cluster access
+#   - docker with buildx for building images
+#   - helm 3.x installed
+#   - GPU nodes with RuntimeClass 'nvidia'
+#
+# Usage: ./demos/nvml-sidecar-demo.sh [kubeconfig]
+#
+# Environment Variables (all optional):
+#   KUBECONFIG       - Path to kubeconfig file (default: $HOME/.kube/config)
+#   NAMESPACE        - Kubernetes namespace (default: device-api)
+#   RELEASE_NAME     - Helm release name (default: device-api-server)
+#   IMAGE_REGISTRY   - Container registry (default: ttl.sh)
+#   IMAGE_TAG        - Image tag (default: 2h for ttl.sh expiry)
+#   SERVER_IMAGE     - Full device-api-server image (default: $IMAGE_REGISTRY/device-api-server:$IMAGE_TAG)
+#   SIDECAR_IMAGE    - Full sidecar image (default: $IMAGE_REGISTRY/device-api-server-sidecar:$IMAGE_TAG)
+#   BUILD_PLATFORM   - Target platform for builds (default: linux/amd64)
+#   GPU_NODE_SELECTOR - Label selector for GPU nodes (default: nvidia.com/gpu.present=true)
+#   CHART_PATH       - Path to Helm chart (default: deployments/helm/device-api-server)
+#   VALUES_FILE      - Path to values file (default: deployments/helm/values-sidecar-test.yaml)
+#   DOCKERFILE       - Path to Dockerfile (default: deployments/container/Dockerfile)
+#   APP_NAME         - Helm chart app name for pod selectors (default: device-api-server)
+#   CONTAINER_NAME   - Main container name (default: device-api-server)
+#   SIDECAR_CONTAINER_NAME - Sidecar container name (default: nvml-provider)
+#   INTERACTIVE      - Enable interactive mode with prompts (default: true)
+#   SKIP_DESTRUCTIVE - Skip destructive ops in non-interactive mode (default: true)
+#   SKIP_BUILD       - Skip image building entirely (default: false)
+#
+# Examples:
+#   # Use default settings with ttl.sh
+#   ./demos/nvml-sidecar-demo.sh
+#
+#   # Use custom kubeconfig
+#   KUBECONFIG=~/.kube/config-aws-gpu ./demos/nvml-sidecar-demo.sh
+#
+#   # Use custom registry
+#   IMAGE_REGISTRY=ghcr.io/nvidia IMAGE_TAG=latest ./demos/nvml-sidecar-demo.sh
+#
+#   # Non-interactive mode (for CI/automation)
+#   INTERACTIVE=false KUBECONFIG=~/.kube/config ./demos/nvml-sidecar-demo.sh
+
+set -euo pipefail
+
+# ==============================================================================
+# Configuration (all values configurable via environment variables)
+# ==============================================================================
+
+# Kubernetes configuration
+KUBECONFIG="${KUBECONFIG:-${1:-$HOME/.kube/config}}"
+NAMESPACE="${NAMESPACE:-device-api}"
+RELEASE_NAME="${RELEASE_NAME:-device-api-server}"
+
+# Paths (relative to repo root)
+CHART_PATH="${CHART_PATH:-deployments/helm/device-api-server}"
+VALUES_FILE="${VALUES_FILE:-deployments/helm/values-sidecar-test.yaml}"
+DOCKERFILE="${DOCKERFILE:-deployments/container/Dockerfile}"
+
+# Image registry settings
+IMAGE_REGISTRY="${IMAGE_REGISTRY:-ttl.sh}"
+IMAGE_TAG="${IMAGE_TAG:-2h}"
+
+# Image names (using ttl.sh ephemeral registry by default - images expire based on tag)
+SERVER_IMAGE="${SERVER_IMAGE:-${IMAGE_REGISTRY}/device-api-server:${IMAGE_TAG}}"
+SIDECAR_IMAGE="${SIDECAR_IMAGE:-${IMAGE_REGISTRY}/device-api-server-sidecar:${IMAGE_TAG}}"
+
+# Build settings
+BUILD_PLATFORM="${BUILD_PLATFORM:-linux/amd64}"
+
+# Node selection (for listing GPU nodes)
+GPU_NODE_SELECTOR="${GPU_NODE_SELECTOR:-nvidia.com/gpu.present=true}"
+
+# Interactive mode (set to false for CI/automated runs)
+INTERACTIVE="${INTERACTIVE:-true}"
+
+# Skip destructive demos in non-interactive mode
+SKIP_DESTRUCTIVE="${SKIP_DESTRUCTIVE:-true}"
+
+# Skip image building entirely (use pre-built images)
+SKIP_BUILD="${SKIP_BUILD:-false}"
+
+# Helm chart app name (used for pod selectors and container names)
+APP_NAME="${APP_NAME:-device-api-server}"
+CONTAINER_NAME="${CONTAINER_NAME:-device-api-server}"
+SIDECAR_CONTAINER_NAME="${SIDECAR_CONTAINER_NAME:-nvml-provider}"
+
+# ==============================================================================
+# Terminal Colors (buildah-style)
+# ==============================================================================
+
+if [[ -t 1 ]]; then
+    red=$(tput setaf 1)
+    green=$(tput setaf 2)
+    yellow=$(tput setaf 3)
+    blue=$(tput setaf 4)
+    magenta=$(tput setaf 5)
+    cyan=$(tput setaf 6)
+    white=$(tput setaf 7)
+    bold=$(tput bold)
+    reset=$(tput sgr0)
+else
+    red=""
+    green=""
+    yellow=""
+    blue=""
+    magenta=""
+    cyan=""
+    white=""
+    bold=""
+    reset=""
+fi
+
+# ==============================================================================
+# Helper Functions
+# ==============================================================================
+
+banner() {
+    echo ""
+    echo "${bold}${blue}============================================================${reset}"
+    echo "${bold}${blue}  $1${reset}"
+    echo "${bold}${blue}============================================================${reset}"
+    echo ""
+}
+
+step() {
+    echo ""
+    echo "${bold}${green}>>> $1${reset}"
+    echo ""
+}
+
+info() {
+    echo "${cyan}    $1${reset}"
+}
+
+warn() {
+    echo "${yellow}    WARNING: $1${reset}"
+}
+
+error() {
+    echo "${red}    ERROR: $1${reset}"
+}
+
+run_cmd() {
+    echo "${magenta}    \$ $*${reset}"
+    "$@"
+}
+
+pause() {
+    if [[ "${INTERACTIVE}" == "true" ]]; then
+        echo ""
+        read -r -p "${yellow}Press ENTER to continue...${reset}"
+        echo ""
+    fi
+}
+
+confirm() {
+    if [[ "${INTERACTIVE}" != "true" ]]; then
+        # Auto-confirm in non-interactive mode
+        info "Auto-confirming: $1"
+        return 0
+    fi
+    echo ""
+    read -r -p "${yellow}$1 [y/N] ${reset}" response
+    case "$response" in
+        [yY][eE][sS]|[yY]) return 0 ;;
+        *) return 1 ;;
+    esac
+}
+
+# Confirm for destructive operations (skipped in non-interactive mode if SKIP_DESTRUCTIVE=true)
+confirm_destructive() {
+    if [[ "${INTERACTIVE}" != "true" && "${SKIP_DESTRUCTIVE}" == "true" ]]; then
+        info "Skipping destructive operation in non-interactive mode: $1"
+        return 1
+    fi
+    confirm "$1"
+}
+
+check_prereqs() {
+    local missing=()
+
+    command -v kubectl &>/dev/null || missing+=("kubectl")
+    command -v helm &>/dev/null || missing+=("helm")
+    command -v docker &>/dev/null || missing+=("docker")
+
+    if [[ ${#missing[@]} -gt 0 ]]; then
+        error "Missing prerequisites: ${missing[*]}"
+        exit 1
+    fi
+
+    # Check for buildx (required for cross-platform builds)
+    if ! docker buildx version &>/dev/null; then
+        warn "docker buildx not available - cross-platform builds may fail"
+        warn "Run: docker buildx create --use --name multiarch"
+    else
+        info "Docker buildx: $(docker buildx version | head -1)"
+    fi
+}
+
+# ==============================================================================
+# Demo Sections
+# ==============================================================================
+
+show_intro() {
+    [[ "${INTERACTIVE}" == "true" ]] && clear
+    banner "NVML Provider Sidecar Architecture Demo"
+
+    echo "${white}This demo showcases the sidecar-based NVML provider for device-api-server.${reset}"
+    echo ""
+    echo "${white}Architecture:${reset}"
+    echo "${cyan}  ┌─────────────────────────────────────────────────────────┐${reset}"
+    echo "${cyan}  │                     Pod                                 │${reset}"
+    echo "${cyan}  │  ┌──────────────────┐    ┌──────────────────┐          │${reset}"
+    echo "${cyan}  │  │ device-api-server│    │  nvml-provider   │          │${reset}"
+    echo "${cyan}  │  │   (pure Go)      │◄───│  (CGO + NVML)    │          │${reset}"
+    echo "${cyan}  │  │   Unix Socket    │gRPC│  Health :8082    │          │${reset}"
+    echo "${cyan}  │  │  Health :8081    │    │  RuntimeClass:   │          │${reset}"
+    echo "${cyan}  │  │  Metrics :9090   │    │    nvidia        │          │${reset}"
+    echo "${cyan}  │  └──────────────────┘    └──────────────────┘          │${reset}"
+    echo "${cyan}  └─────────────────────────────────────────────────────────┘${reset}"
+    echo ""
+    echo "${white}Benefits:${reset}"
+    echo "${green}  ✓ Separation of concerns (API server vs NVML access)${reset}"
+    echo "${green}  ✓ Independent scaling and updates${reset}"
+    echo "${green}  ✓ Better testability (mock providers)${reset}"
+    echo "${green}  ✓ Crash isolation (NVML crashes don't kill API server)${reset}"
+    echo ""
+
+    pause
+}
+
+show_config() {
+    banner "Configuration"
+
+    echo "${white}Current settings (override via environment variables):${reset}"
+    echo ""
+    echo "${cyan}  Kubernetes:${reset}"
+    echo "    KUBECONFIG       = ${KUBECONFIG}"
+    echo "    NAMESPACE        = ${NAMESPACE}"
+    echo "    RELEASE_NAME     = ${RELEASE_NAME}"
+    echo ""
+    echo "${cyan}  Paths:${reset}"
+    echo "    CHART_PATH       = ${CHART_PATH}"
+    echo "    VALUES_FILE      = ${VALUES_FILE}"
+    echo "    DOCKERFILE       = ${DOCKERFILE}"
+    echo ""
+    echo "${cyan}  Images:${reset}"
+    echo "    IMAGE_REGISTRY   = ${IMAGE_REGISTRY}"
+    echo "    IMAGE_TAG        = ${IMAGE_TAG}"
+    echo "    SERVER_IMAGE     = ${SERVER_IMAGE}"
+    echo "    SIDECAR_IMAGE    = ${SIDECAR_IMAGE}"
+    echo ""
+    echo "${cyan}  Build:${reset}"
+    echo "    BUILD_PLATFORM     = ${BUILD_PLATFORM}"
+    echo ""
+    echo "${cyan}  Cluster:${reset}"
+    echo "    GPU_NODE_SELECTOR  = ${GPU_NODE_SELECTOR}"
+    echo ""
+    echo "${cyan}  Helm Chart:${reset}"
+    echo "    APP_NAME               = ${APP_NAME}"
+    echo "    CONTAINER_NAME         = ${CONTAINER_NAME}"
+    echo "    SIDECAR_CONTAINER_NAME = ${SIDECAR_CONTAINER_NAME}"
+    echo ""
+    echo "${cyan}  Mode:${reset}"
+    echo "    INTERACTIVE            = ${INTERACTIVE}"
+    echo "    SKIP_DESTRUCTIVE       = ${SKIP_DESTRUCTIVE}"
+    echo "    SKIP_BUILD             = ${SKIP_BUILD}"
+    echo ""
+
+    pause
+}
+
+show_cluster_info() {
+    banner "Step 1: Verify Cluster Connectivity"
+
+    step "Check cluster connection"
+    run_cmd kubectl --kubeconfig="${KUBECONFIG}" cluster-info
+
+    pause
+
+    step "List GPU nodes (selector: ${GPU_NODE_SELECTOR})"
+    run_cmd kubectl --kubeconfig="${KUBECONFIG}" get nodes -l "${GPU_NODE_SELECTOR}" -o wide || {
+        warn "No nodes found with selector '${GPU_NODE_SELECTOR}'"
+        info "Listing all nodes instead:"
+        run_cmd kubectl --kubeconfig="${KUBECONFIG}" get nodes -o wide
+    }
+
+    pause
+
+    step "Verify nvidia RuntimeClass exists"
+    run_cmd kubectl --kubeconfig="${KUBECONFIG}" get runtimeclass nvidia -o yaml || {
+        warn "RuntimeClass 'nvidia' not found. GPU access may not work."
+    }
+
+    pause
+}
+
+check_image_exists() {
+    local image="$1"
+    # Try to inspect the manifest - if it exists, the image is available
+    docker buildx imagetools inspect "${image}" &>/dev/null 2>&1
+}
+
+build_images() {
+    banner "Step 2: Build Container Images"
+
+    if [[ "${SKIP_BUILD}" == "true" ]]; then
+        info "SKIP_BUILD=true, skipping image builds"
+        info "Using pre-built images:"
+        info "  SERVER_IMAGE:  ${SERVER_IMAGE}"
+        info "  SIDECAR_IMAGE: ${SIDECAR_IMAGE}"
+        return 0
+    fi
+
+    info "Building images for registry: ${IMAGE_REGISTRY}"
+    info "Using unified multi-target Dockerfile at ${DOCKERFILE}"
+    info "Target platform: ${BUILD_PLATFORM}"
+    echo ""
+
+    # Ensure buildx is available for cross-platform builds
+    if ! docker buildx version &>/dev/null; then
+        error "docker buildx is required for cross-platform builds"
+        error "Install Docker Desktop or run: docker buildx create --use"
+        exit 1
+    fi
+
+    # Check if images already exist
+    local need_server=true
+    local need_sidecar=true
+
+    if check_image_exists "${SERVER_IMAGE}"; then
+        info "Image ${SERVER_IMAGE} already exists"
+        if ! confirm "Rebuild device-api-server image?"; then
+            need_server=false
+        fi
+    fi
+
+    if check_image_exists "${SIDECAR_IMAGE}"; then
+        info "Image ${SIDECAR_IMAGE} already exists"
+        if ! confirm "Rebuild device-api-server-sidecar image?"; then
+            need_sidecar=false
+        fi
+    fi
+
+    if [[ "${need_server}" == "true" ]]; then
+        step "Build and push device-api-server image (CGO_ENABLED=0)"
+        info "This is a pure Go binary with no NVML dependencies"
+        info "Building for ${BUILD_PLATFORM} and pushing directly..."
+        run_cmd docker buildx build \
+            --platform "${BUILD_PLATFORM}" \
+            --target device-api-server \
+            -t "${SERVER_IMAGE}" \
+            -f "${DOCKERFILE}" \
+            --push \
+            .
+        pause
+    else
+        info "Skipping device-api-server build"
+    fi
+
+    if [[ "${need_sidecar}" == "true" ]]; then
+        step "Build and push device-api-server-sidecar image (CGO_ENABLED=1)"
+        info "This is the NVML provider sidecar with glibc runtime"
+        info "Building for ${BUILD_PLATFORM} and pushing directly..."
+        run_cmd docker buildx build \
+            --platform "${BUILD_PLATFORM}" \
+            --target nvml-provider \
+            -t "${SIDECAR_IMAGE}" \
+            -f "${DOCKERFILE}" \
+            --push \
+            .
+        pause
+    else
+        info "Skipping device-api-server-sidecar build"
+    fi
+}
+
+show_values_file() {
+    banner "Step 3: Review Helm Values"
+
+    info "The sidecar architecture is enabled via Helm values"
+    echo ""
+
+    step "Key configuration in ${VALUES_FILE}:"
+    echo ""
+    echo "${cyan}# Disable built-in NVML provider${reset}"
+    echo "${white}nvml:${reset}"
+    echo "${white}  enabled: false${reset}"
+    echo ""
+    echo "${cyan}# Enable NVML Provider sidecar${reset}"
+    echo "${white}nvmlProvider:${reset}"
+    echo "${white}  enabled: true${reset}"
+    echo "${white}  image:${reset}"
+    echo "${white}    repository: ${IMAGE_REGISTRY}/device-api-server-sidecar${reset}"
+    echo "${white}    tag: \"${IMAGE_TAG}\"${reset}"
+    echo "${white}  # Sidecar connects via shared unix socket volume${reset}"
+    echo "${white}  runtimeClassName: nvidia${reset}"
+    echo ""
+
+    if [[ -f "${VALUES_FILE}" ]]; then
+        step "Full values file:"
+        run_cmd cat "${VALUES_FILE}"
+    fi
+
+    pause
+}
+
+deploy_sidecar() {
+    banner "Step 4: Deploy with Sidecar Architecture"
+
+    step "Create namespace if not exists"
+    echo "${magenta}    \$ kubectl create namespace ${NAMESPACE} --dry-run=client -o yaml | kubectl apply -f -${reset}"
+    kubectl --kubeconfig="${KUBECONFIG}" create namespace "${NAMESPACE}" --dry-run=client -o yaml | \
+        kubectl --kubeconfig="${KUBECONFIG}" apply -f -
+
+    pause
+
+    # Check if release already exists
+    # Build --set overrides to ensure Helm uses the same images we just built,
+    # regardless of what the values file says.
+    IMAGE_OVERRIDES=(
+        --set "image.repository=${IMAGE_REGISTRY}/device-api-server"
+        --set "image.tag=${IMAGE_TAG}"
+        --set "nvmlProvider.image.repository=${IMAGE_REGISTRY}/device-api-server-sidecar"
+        --set "nvmlProvider.image.tag=${IMAGE_TAG}"
+    )
+
+    if helm status "${RELEASE_NAME}" --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" &>/dev/null; then
+        info "Release '${RELEASE_NAME}' already exists"
+        step "Upgrading existing release..."
+        run_cmd helm upgrade "${RELEASE_NAME}" "${CHART_PATH}" \
+            --kubeconfig="${KUBECONFIG}" \
+            --namespace "${NAMESPACE}" \
+            -f "${VALUES_FILE}" \
+            "${IMAGE_OVERRIDES[@]}"
+
+        step "Restarting pods to pick up changes..."
+        run_cmd kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" rollout restart daemonset "${RELEASE_NAME}"
+    else
+        step "Installing new release..."
+        run_cmd helm install "${RELEASE_NAME}" "${CHART_PATH}" \
+            --kubeconfig="${KUBECONFIG}" \
+            --namespace "${NAMESPACE}" \
+            -f "${VALUES_FILE}" \
+            "${IMAGE_OVERRIDES[@]}"
+    fi
+
+    pause
+
+    step "Waiting for pods to be ready (timeout 2m)..."
+    if ! kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" rollout status daemonset "${RELEASE_NAME}" --timeout=2m; then
+        warn "Rollout not complete within timeout. Checking status..."
+    fi
+
+    step "Current pod status"
+    run_cmd kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" get pods -l app.kubernetes.io/name=${APP_NAME} -o wide
+
+    pause
+
+    step "Verify both containers are running in each pod"
+    info "Each pod should have 2/2 containers ready"
+    run_cmd kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" get pods -l app.kubernetes.io/name=${APP_NAME} \
+        -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.status.phase}{"\t"}{range .status.containerStatuses[*]}{.name}:{.ready}{" "}{end}{"\n"}{end}'
+
+    pause
+}
+
+verify_gpu_registration() {
+    banner "Step 5: Verify GPU Registration"
+
+    step "Wait for pods to be ready"
+    info "Waiting up to 60 seconds for pods to start..."
+    if ! kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" wait --for=condition=ready pod -l app.kubernetes.io/name=${APP_NAME} --timeout=60s 2>/dev/null; then
+        warn "Pods may not be ready yet. Checking status..."
+        run_cmd kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" get pods -l app.kubernetes.io/name=${APP_NAME} -o wide
+        run_cmd kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" describe pods -l app.kubernetes.io/name=${APP_NAME} | tail -30
+        error "Pods not ready. Check the output above for issues."
+        return 1
+    fi
+
+    pause
+
+    step "Verify DaemonSet coverage on all GPU nodes"
+    local gpu_nodes_ready
+    local gpu_nodes_total
+    local daemonset_desired
+    local daemonset_ready
+
+    gpu_nodes_total=$(kubectl --kubeconfig="${KUBECONFIG}" get nodes -l "${GPU_NODE_SELECTOR}" --no-headers 2>/dev/null | wc -l | tr -d ' ')
+    gpu_nodes_ready=$(kubectl --kubeconfig="${KUBECONFIG}" get nodes -l "${GPU_NODE_SELECTOR}" --no-headers 2>/dev/null | grep -c " Ready" || true)
+    # Ensure gpu_nodes_ready is a valid number (grep -c returns 0 with exit code 1 when no matches)
+    [[ -z "${gpu_nodes_ready}" ]] && gpu_nodes_ready=0
+    daemonset_desired=$(kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" get daemonset "${RELEASE_NAME}" -o jsonpath='{.status.desiredNumberScheduled}' 2>/dev/null || echo "0")
+    daemonset_ready=$(kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" get daemonset "${RELEASE_NAME}" -o jsonpath='{.status.numberReady}' 2>/dev/null || echo "0")
+
+    echo ""
+    info "GPU Nodes (total):      ${gpu_nodes_total}"
+    info "GPU Nodes (Ready):      ${gpu_nodes_ready}"
+    info "DaemonSet (desired):    ${daemonset_desired}"
+    info "DaemonSet (ready):      ${daemonset_ready}"
+    echo ""
+
+    if [[ "${daemonset_ready}" -eq "${gpu_nodes_ready}" && "${daemonset_ready}" -gt 0 ]]; then
+        echo "${green}  ✓ DaemonSet running on all ${daemonset_ready} Ready GPU nodes${reset}"
+    else
+        warn "DaemonSet coverage mismatch! Expected ${gpu_nodes_ready} pods, got ${daemonset_ready}"
+        run_cmd kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" get daemonset "${RELEASE_NAME}"
+    fi
+
+    pause
+
+    step "List all pods and their nodes"
+    run_cmd kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" get pods -l app.kubernetes.io/name=${APP_NAME} -o wide
+
+    pause
+
+    step "Get a pod name for testing"
+    POD=$(kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" get pods -l app.kubernetes.io/name=${APP_NAME} -o jsonpath='{.items[0].metadata.name}')
+    if [[ -z "${POD}" ]]; then
+        error "No pods found. DaemonSet may not be scheduling on any nodes."
+        run_cmd kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" get daemonset
+        return 1
+    fi
+    NODE=$(kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" get pod "${POD}" -o jsonpath='{.spec.nodeName}')
+    info "Using pod: ${POD} (on node: ${NODE})"
+
+    pause
+
+    step "Check device-api-server logs for provider connection"
+    run_cmd kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" logs "${POD}" -c "${CONTAINER_NAME}" --tail=20 || true
+
+    pause
+
+    step "Check nvml-provider sidecar logs"
+    run_cmd kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" logs "${POD}" -c "${SIDECAR_CONTAINER_NAME}" --tail=20 || true
+
+    pause
+
+    verify_gpu_uuid_match "${POD}" "${NODE}"
+}
+
+verify_gpu_uuid_match() {
+    local pod="$1"
+    local node="$2"
+
+    banner "Step 5b: Verify GPU UUID Match"
+
+    info "Comparing GPU UUIDs from nvidia-smi with device-api-server registered GPUs"
+    info "Pod: ${pod} | Node: ${node}"
+    echo ""
+
+    step "Get GPU UUID from nvidia-smi on the node (via sidecar container)"
+    local nvidia_smi_uuids
+    nvidia_smi_uuids=$(kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" exec "${pod}" -c "${SIDECAR_CONTAINER_NAME}" -- \
+        nvidia-smi --query-gpu=uuid --format=csv,noheader 2>/dev/null || echo "")
+
+    if [[ -z "${nvidia_smi_uuids}" ]]; then
+        warn "Could not get GPU UUIDs from nvidia-smi"
+        return 1
+    fi
+
+    echo "${cyan}    nvidia-smi GPU UUIDs:${reset}"
+    echo "${nvidia_smi_uuids}" | while read -r uuid; do
+        echo "      - ${uuid}"
+    done
+    echo ""
+
+    pause
+
+    step "Get registered GPU UUIDs from device-api-server logs"
+    local registered_uuids
+    registered_uuids=$(kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" logs "${pod}" -c "${SIDECAR_CONTAINER_NAME}" 2>/dev/null | \
+        grep -o 'uuid="GPU-[^"]*"' | sed 's/uuid="//;s/"$//' | sort -u || echo "")
+
+    if [[ -z "${registered_uuids}" ]]; then
+        warn "Could not find registered GPU UUIDs in logs"
+        return 1
+    fi
+
+    echo "${cyan}    Registered GPU UUIDs:${reset}"
+    echo "${registered_uuids}" | while read -r uuid; do
+        echo "      - ${uuid}"
+    done
+    echo ""
+
+    pause
+
+    step "Compare UUIDs"
+    local match_count=0
+    local total_count=0
+
+    while read -r smi_uuid; do
+        [[ -z "${smi_uuid}" ]] && continue
+        total_count=$((total_count + 1))
+        if echo "${registered_uuids}" | grep -q "${smi_uuid}"; then
+            echo "${green}    ✓ ${smi_uuid} - MATCHED${reset}"
+            match_count=$((match_count + 1))
+        else
+            echo "${red}    ✗ ${smi_uuid} - NOT FOUND in registered GPUs${reset}"
+        fi
+    done <<< "${nvidia_smi_uuids}"
+
+    echo ""
+    if [[ "${match_count}" -eq "${total_count}" && "${total_count}" -gt 0 ]]; then
+        echo "${green}  ✓ All ${total_count} GPU(s) from nvidia-smi are registered in device-api-server${reset}"
+    else
+        warn "UUID mismatch: ${match_count}/${total_count} GPUs matched"
+    fi
+
+    pause
+}
+
+demonstrate_crash_recovery() {
+    banner "Step 6: Demonstrate Crash Recovery"
+
+    info "The sidecar architecture provides crash isolation."
+    info "If the NVML provider crashes, the API server continues running"
+    info "and will reconnect when the provider restarts."
+    echo ""
+
+    step "Get current pod"
+    POD=$(kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" get pods -l app.kubernetes.io/name=${APP_NAME} -o jsonpath='{.items[0].metadata.name}')
+    info "Using pod: ${POD}"
+
+    pause
+
+    if confirm_destructive "Kill the nvml-provider container to demonstrate crash recovery?"; then
+        step "Killing nvml-provider container..."
+        run_cmd kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" exec "${POD}" -c "${SIDECAR_CONTAINER_NAME}" -- kill 1 || true
+
+        info "Waiting for container restart..."
+        sleep 5
+
+        step "Check pod status (should show restart count)"
+        run_cmd kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" get pod "${POD}" -o wide
+
+        step "Verify API server continued running"
+        run_cmd kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" logs "${POD}" -c "${CONTAINER_NAME}" --tail=10 || true
+
+        step "Verify provider reconnected"
+        run_cmd kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" logs "${POD}" -c "${SIDECAR_CONTAINER_NAME}" --tail=10 || true
+    else
+        info "Skipping crash recovery demonstration"
+    fi
+
+    pause
+}
+
+show_metrics() {
+    banner "Step 7: View Provider Metrics"
+
+    step "Get pod for port-forward"
+    POD=$(kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" get pods -l app.kubernetes.io/name=${APP_NAME} -o jsonpath='{.items[0].metadata.name}')
+
+    step "Fetch metrics from the API server"
+    info "Key metrics to look for:"
+    info "  - device_apiserver_service_status: Whether services are serving"
+    info "  - device_apiserver_build_info: Build information"
+    info "  - grpc_server_*: gRPC request/stream metrics"
+    echo ""
+
+    run_cmd kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" exec "${POD}" -c "${CONTAINER_NAME}" -- \
+        wget -qO- http://localhost:9090/metrics 2>/dev/null | grep -E "^(device_apiserver_|grpc_server_handled_total)" | sort || {
+        run_cmd kubectl --kubeconfig="${KUBECONFIG}" -n "${NAMESPACE}" exec "${POD}" -c "${CONTAINER_NAME}" -- \
+            curl -s http://localhost:9090/metrics 2>/dev/null | grep -E "^(device_apiserver_|grpc_server_handled_total)" | sort || true
+    }
+
+    pause
+}
+
+cleanup() {
+    banner "Cleanup"
+
+    if confirm_destructive "Remove the sidecar deployment and restore default?"; then
+        step "Uninstalling Helm release..."
+        run_cmd helm uninstall "${RELEASE_NAME}" \
+            --kubeconfig="${KUBECONFIG}" \
+            --namespace "${NAMESPACE}" || true
+
+        info "Cleanup complete!"
+    else
+        info "Skipping cleanup. Release '${RELEASE_NAME}' left in namespace '${NAMESPACE}'"
+    fi
+}
+
+show_summary() {
+    banner "Demo Complete!"
+
+    echo "${white}What we demonstrated:${reset}"
+    echo "${green}  ✓ Built separate images for device-api-server and device-api-server-sidecar${reset}"
+    echo "${green}  ✓ Deployed as sidecar architecture via Helm${reset}"
+    echo "${green}  ✓ Verified DaemonSet runs on ALL GPU nodes${reset}"
+    echo "${green}  ✓ Verified GPU UUIDs match between nvidia-smi and device-api-server${reset}"
+    echo "${green}  ✓ Showed crash isolation and recovery${reset}"
+    echo "${green}  ✓ Explored provider metrics${reset}"
+    echo ""
+    echo "${white}Images built:${reset}"
+    echo "${cyan}  - ${SERVER_IMAGE}${reset}"
+    echo "${cyan}  - ${SIDECAR_IMAGE}${reset}"
+    echo ""
+    echo "${white}Key files:${reset}"
+    echo "${cyan}  - ${DOCKERFILE}              # Multi-target container build${reset}"
+    echo "${cyan}  - ${VALUES_FILE}             # Helm values for sidecar mode${reset}"
+    echo "${cyan}  - ${CHART_PATH}/             # Helm chart with sidecar support${reset}"
+    echo ""
+    echo "${white}Environment variables for customization:${reset}"
+    echo "${cyan}  KUBECONFIG, NAMESPACE, RELEASE_NAME, IMAGE_REGISTRY, IMAGE_TAG,${reset}"
+    echo "${cyan}  SERVER_IMAGE, SIDECAR_IMAGE, BUILD_PLATFORM, GPU_NODE_SELECTOR,${reset}"
+    echo "${cyan}  CHART_PATH, VALUES_FILE, DOCKERFILE${reset}"
+    echo ""
+}
+
+# ==============================================================================
+# Main
+# ==============================================================================
+
+main() {
+    export KUBECONFIG
+
+    show_intro
+    show_config
+    check_prereqs
+    show_cluster_info
+
+    if confirm "Build and push container images?"; then
+        build_images
+    else
+        info "Skipping image build. Using existing images at ${IMAGE_REGISTRY}"
+    fi
+
+    show_values_file
+
+    if confirm "Deploy the sidecar architecture to the cluster?"; then
+        deploy_sidecar
+        verify_gpu_registration
+        demonstrate_crash_recovery
+        show_metrics
+        cleanup
+    else
+        info "Skipping deployment"
+    fi
+
+    show_summary
+}
+
+# Run main if script is executed (not sourced)
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+    main "$@"
+fi
diff --git a/deployments/container/Dockerfile b/deployments/container/Dockerfile
new file mode 100644
index 000000000..d322f3a2f
--- /dev/null
+++ b/deployments/container/Dockerfile
@@ -0,0 +1,190 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Multi-target Dockerfile for NVSentinel components
+#
+# Targets:
+#   device-api-server  - Pure Go server (no NVML, uses sidecar provider)
+#   nvml-provider      - NVML provider sidecar (CGO, requires RuntimeClass nvidia)
+#
+# Build examples:
+#   # Build device-api-server (default, pure Go)
+#   docker build --target device-api-server -t nvsentinel/device-api-server .
+#
+#   # Build nvml-provider sidecar
+#   docker build --target nvml-provider -t nvsentinel/nvml-provider .
+#
+# Note: NVML provider requires glibc runtime (Debian) for RTLD_DEEPBIND support
+
+# TODO: Add Cosign image signing and SBOM generation to CI/CD pipeline.
+# See: https://docs.sigstore.dev/signing/quickstart/
+# Steps:
+# 1. Sign images with cosign: cosign sign --key <key> <image>
+# 2. Generate SBOM: syft <image> -o cyclonedx-json > sbom.json
+# 3. Attach SBOM: cosign attach sbom --sbom sbom.json <image>
+
+# ==============================================================================
+# Build Arguments
+# ==============================================================================
+
+ARG GOLANG_VERSION=1.25
+ARG VERSION=dev
+ARG GIT_COMMIT=unknown
+ARG GIT_TREE_STATE=dirty
+ARG BUILD_DATE
+
+# ==============================================================================
+# Base Builder - Pure Go (Alpine)
+# ==============================================================================
+
+FROM golang:${GOLANG_VERSION}-alpine AS builder-alpine
+
+ARG VERSION
+ARG GIT_COMMIT
+ARG GIT_TREE_STATE
+ARG BUILD_DATE
+
+WORKDIR /workspace
+
+# Install build dependencies
+RUN apk add --no-cache git make
+
+# Copy go mod files first for caching
+COPY go.mod go.sum ./
+
+# Download dependencies
+RUN go mod download
+
+# Copy source code
+COPY . .
+
+# Version package path
+ARG VERSION_PKG=github.com/nvidia/nvsentinel/pkg/version
+
+# Build device-api-server (CGO disabled, pure Go)
+RUN CGO_ENABLED=0 GOOS=linux go build \
+    -ldflags "-s -w \
+        -X ${VERSION_PKG}.Version=${VERSION} \
+        -X ${VERSION_PKG}.GitCommit=${GIT_COMMIT} \
+        -X ${VERSION_PKG}.GitTreeState=${GIT_TREE_STATE} \
+        -X ${VERSION_PKG}.BuildDate=${BUILD_DATE}" \
+    -o /build/device-api-server \
+    ./cmd/device-api-server
+
+# ==============================================================================
+# Base Builder - CGO (Debian/glibc)
+# ==============================================================================
+
+FROM golang:${GOLANG_VERSION}-bookworm AS builder-debian
+
+ARG VERSION
+ARG GIT_COMMIT
+ARG GIT_TREE_STATE
+ARG BUILD_DATE
+
+WORKDIR /workspace
+
+# Install build dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
+    make \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy go mod files first for caching
+COPY go.mod go.sum ./
+
+# Download dependencies
+RUN go mod download
+
+# Copy source code
+COPY . .
+
+# Version package path
+ARG VERSION_PKG=github.com/nvidia/nvsentinel/pkg/version
+
+# Build nvml-provider (CGO enabled for go-nvml)
+RUN CGO_ENABLED=1 go build \
+    -tags=nvml \
+    -ldflags "-s -w \
+        -X ${VERSION_PKG}.Version=${VERSION} \
+        -X ${VERSION_PKG}.GitCommit=${GIT_COMMIT} \
+        -X ${VERSION_PKG}.GitTreeState=${GIT_TREE_STATE} \
+        -X ${VERSION_PKG}.BuildDate=${BUILD_DATE}" \
+    -o /build/nvml-provider \
+    ./cmd/nvml-provider
+
+# ==============================================================================
+# Target: device-api-server
+# ==============================================================================
+# Pure Go server with no NVML dependencies. Uses sidecar provider for GPU access.
+# Small image size, fast startup, works on any architecture.
+
+# Pinned to digest for reproducible builds. Update with:
+# docker manifest inspect alpine:3.21 | jq '.manifests[] | select(.platform.architecture=="amd64") | .digest'
+FROM alpine:3.21@sha256:22e0ec13c0db6b3e1ba3280e831fc50ba7bffe58e81f31670a64b1afede247bc AS device-api-server
+
+LABEL org.opencontainers.image.source="https://github.com/nvidia/nvsentinel"
+LABEL org.opencontainers.image.description="NVSentinel Device API Server - Node-local GPU device state cache"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.title="device-api-server"
+
+# Add ca-certificates for HTTPS
+RUN apk add --no-cache ca-certificates
+
+WORKDIR /
+
+COPY --from=builder-alpine --chmod=755 /build/device-api-server /device-api-server
+
+# Run as non-root user (nobody)
+USER 65534:65534
+
+# Health probe port (configurable via --health-probe-bind-address)
+EXPOSE 8081
+# Metrics port (configurable via --metrics-bind-address)
+EXPOSE 9090
+
+ENTRYPOINT ["/device-api-server"]
+
+# ==============================================================================
+# Target: nvml-provider
+# ==============================================================================
+# NVML provider sidecar for GPU enumeration and health monitoring.
+# Requires glibc runtime (Debian) for RTLD_DEEPBIND support.
+# Must run with RuntimeClass: nvidia to access NVML libraries.
+
+# Pinned to digest for reproducible builds. Update with:
+# docker manifest inspect debian:bookworm-slim | jq '.manifests[] | select(.platform.architecture=="amd64") | .digest'
+FROM debian:bookworm-slim@sha256:6458e6ce2b6448e31bfdced4be7d8aa88d389e6694ab09f5a718a694abe147f4 AS nvml-provider
+
+LABEL org.opencontainers.image.source="https://github.com/nvidia/nvsentinel"
+LABEL org.opencontainers.image.description="NVSentinel NVML Provider - GPU enumeration and health monitoring sidecar"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.title="nvml-provider"
+
+# Add ca-certificates for HTTPS
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /
+
+COPY --from=builder-debian --chmod=755 /build/nvml-provider /nvml-provider
+
+# Run as non-root user
+USER 65534:65534
+
+# Health check port
+EXPOSE 8082
+
+ENTRYPOINT ["/nvml-provider"]
diff --git a/deployments/helm/device-api-server/Chart.yaml b/deployments/helm/device-api-server/Chart.yaml
new file mode 100644
index 000000000..10f76a543
--- /dev/null
+++ b/deployments/helm/device-api-server/Chart.yaml
@@ -0,0 +1,51 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: v2
+name: device-api-server
+description: |
+  Device API Server - Node-local GPU device state cache server for Kubernetes.
+
+  The Device API Server acts as an intermediary between providers (health monitors)
+  that update GPU device states and consumers (device plugins, DRA drivers) that
+  read device states for scheduling decisions.
+
+  Key features:
+  - Read-blocking semantics during provider updates
+  - Sidecar architecture for NVML isolation
+  - Multiple provider and consumer support
+  - Prometheus metrics and alerting
+  - Health-based GPU scheduling decisions
+type: application
+version: 0.1.0
+appVersion: "0.1.0"
+kubeVersion: ">=1.25.0-0"
+keywords:
+  - nvidia
+  - gpu
+  - device
+  - nvml
+  - health
+  - daemonset
+  - grpc
+home: https://github.com/nvidia/nvsentinel
+sources:
+  - https://github.com/nvidia/nvsentinel
+maintainers:
+  - name: NVIDIA
+    url: https://github.com/nvidia
+icon: https://www.nvidia.com/favicon.ico
+annotations:
+  artifacthub.io/license: Apache-2.0
+  artifacthub.io/category: monitoring-logging
diff --git a/deployments/helm/device-api-server/README.md b/deployments/helm/device-api-server/README.md
new file mode 100644
index 000000000..b8990a413
--- /dev/null
+++ b/deployments/helm/device-api-server/README.md
@@ -0,0 +1,263 @@
+# Device API Server Helm Chart
+
+Node-local GPU device state cache server for Kubernetes.
+
+## Introduction
+
+The Device API Server is a DaemonSet that runs on each GPU node, providing a local gRPC cache for GPU device states. It acts as an intermediary between:
+
+- **Providers** (health monitors) that update GPU device states
+- **Consumers** (device plugins, DRA drivers) that read device states for scheduling decisions
+
+Key features:
+
+- Read-blocking semantics during provider updates
+- Multiple provider and consumer support
+- Optional NVML fallback provider for GPU enumeration and XID monitoring
+- Prometheus metrics and alerting
+- Unix socket for node-local communication
+
+## Prerequisites
+
+- Kubernetes 1.25+
+- Helm 3.0+
+- (Optional) NVIDIA GPU Operator for NVML provider support
+- (Optional) Prometheus Operator for ServiceMonitor/PrometheusRule
+
+## Installation
+
+### Quick Start
+
+```bash
+# Add the Helm repository (when published)
+helm repo add nvsentinel https://nvidia.github.io/nvsentinel
+helm repo update
+
+# Install with default configuration
+helm install device-api-server nvsentinel/device-api-server \
+  --namespace device-api --create-namespace
+```
+
+### Install from Local Chart
+
+```bash
+helm install device-api-server ./deployments/helm/device-api-server \
+  --namespace device-api --create-namespace
+```
+
+### Install with NVML Provider
+
+To enable built-in GPU enumeration and health monitoring via NVML:
+
+```bash
+helm install device-api-server ./deployments/helm/device-api-server \
+  --namespace device-api --create-namespace \
+  --set nvmlProvider.enabled=true
+```
+
+> **Note**: NVML provider requires the `nvidia` RuntimeClass. Install the NVIDIA GPU Operator or create it manually.
+
+### Install with Prometheus Monitoring
+
+```bash
+helm install device-api-server ./deployments/helm/device-api-server \
+  --namespace device-api --create-namespace \
+  --set metrics.serviceMonitor.enabled=true \
+  --set metrics.prometheusRule.enabled=true
+```
+
+## Configuration
+
+See [values.yaml](values.yaml) for the full list of configurable parameters.
+
+### Key Parameters
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `image.repository` | Image repository | `ghcr.io/nvidia/device-api-server` |
+| `image.tag` | Image tag | Chart appVersion |
+| `server.grpcAddress` | gRPC server address | `:50051` |
+| `server.unixSocket` | Unix socket path | `/var/run/device-api/device.sock` |
+| `server.healthPort` | Health endpoint port | `8081` |
+| `server.metricsPort` | Metrics endpoint port | `9090` |
+| `nvmlProvider.enabled` | Enable NVML provider sidecar | `false` |
+| `nvmlProvider.driverRoot` | NVIDIA driver library root | `/run/nvidia/driver` |
+| `nvmlProvider.healthCheckEnabled` | Enable XID event monitoring | `true` |
+| `runtimeClassName` | Pod RuntimeClass | `""` |
+| `nodeSelector` | Node selector | `nvidia.com/gpu.present: "true"` |
+| `metrics.serviceMonitor.enabled` | Create ServiceMonitor | `false` |
+| `metrics.prometheusRule.enabled` | Create PrometheusRule | `false` |
+
+### Resource Configuration
+
+```yaml
+resources:
+  requests:
+    cpu: 50m
+    memory: 64Mi
+  limits:
+    cpu: 200m
+    memory: 256Mi
+```
+
+### NVML Provider Configuration
+
+```yaml
+nvmlProvider:
+  enabled: true
+  driverRoot: /run/nvidia/driver
+  healthCheckEnabled: true
+```
+
+Default ignored XIDs (application errors): 13, 31, 43, 45, 68, 109
+
+### Node Scheduling
+
+By default, the DaemonSet schedules only on nodes with `nvidia.com/gpu.present=true` label:
+
+```yaml
+nodeSelector:
+  nvidia.com/gpu.present: "true"
+
+tolerations:
+  - key: nvidia.com/gpu
+    operator: Exists
+    effect: NoSchedule
+```
+
+Override for custom environments:
+
+```bash
+helm install device-api-server ./deployments/helm/device-api-server \
+  --set 'nodeSelector.node-type=gpu' \
+  --set 'nodeSelector.nvidia\.com/gpu\.present=null'
+```
+
+## Metrics
+
+The server exposes Prometheus metrics at `/metrics` on the configured `metricsPort`.
+
+### Available Metrics
+
+| Metric | Type | Description |
+|--------|------|-------------|
+| `device_api_server_info` | Gauge | Server information |
+| `device_api_server_cache_gpus_total` | Gauge | Total GPUs in cache |
+| `device_api_server_cache_gpus_healthy` | Gauge | Healthy GPUs |
+| `device_api_server_cache_gpus_unhealthy` | Gauge | Unhealthy GPUs |
+| `device_api_server_cache_updates_total` | Counter | Cache update operations |
+| `device_api_server_watch_streams_active` | Gauge | Active watch streams |
+| `device_api_server_watch_events_total` | Counter | Watch events sent |
+| `device_api_server_nvml_provider_enabled` | Gauge | NVML provider status |
+| `device_api_server_nvml_gpu_count` | Gauge | GPUs discovered by NVML |
+
+### Alerting Rules
+
+When `metrics.prometheusRule.enabled=true`, the following alerts are configured:
+
+| Alert | Severity | Description |
+|-------|----------|-------------|
+| `DeviceAPIServerDown` | Critical | Server unreachable for 5m |
+| `DeviceAPIServerHighLatency` | Warning | P99 latency > 500ms |
+| `DeviceAPIServerHighErrorRate` | Warning | Error rate > 10% |
+| `DeviceAPIServerUnhealthyGPUs` | Warning | Unhealthy GPUs detected |
+| `DeviceAPIServerNoGPUs` | Warning | No GPUs registered for 10m |
+| `DeviceAPIServerNVMLProviderDown` | Warning | NVML provider not running |
+
+## Client Connection
+
+Clients on the same node can connect via:
+
+### Unix Socket (Recommended)
+
+```go
+conn, err := grpc.Dial(
+    "unix:///var/run/device-api/device.sock",
+    grpc.WithInsecure(),
+)
+```
+
+### TCP
+
+```go
+conn, err := grpc.Dial(
+    "localhost:50051",
+    grpc.WithInsecure(),
+)
+```
+
+### grpcurl Examples
+
+```bash
+# List available services
+grpcurl -plaintext localhost:50051 list
+
+# List GPUs
+grpcurl -plaintext localhost:50051 nvidia.device.v1alpha1.GpuService/ListGpus
+
+# Watch GPU changes
+grpcurl -plaintext localhost:50051 nvidia.device.v1alpha1.GpuService/WatchGpus
+```
+
+## Upgrading
+
+```bash
+helm upgrade device-api-server ./deployments/helm/device-api-server \
+  --namespace device-api \
+  --reuse-values \
+  --set image.tag=v0.2.0
+```
+
+## Uninstallation
+
+```bash
+helm uninstall device-api-server --namespace device-api
+```
+
+## Troubleshooting
+
+### Pod Not Scheduling
+
+Check node labels:
+
+```bash
+kubectl get nodes --show-labels | grep gpu
+```
+
+Ensure nodes have `nvidia.com/gpu.present=true` or override `nodeSelector`.
+
+### NVML Provider Fails to Start
+
+1. Verify RuntimeClass exists:
+
+   ```bash
+   kubectl get runtimeclass nvidia
+   ```
+
+2. Check NVIDIA driver is installed on nodes:
+
+   ```bash
+   kubectl debug node/<node-name> -it --image=nvidia/cuda:12.0-base -- nvidia-smi
+   ```
+
+3. Check pod logs for NVML errors:
+
+   ```bash
+   kubectl logs -n device-api -l app.kubernetes.io/name=device-api-server
+   ```
+
+### Permission Denied on Unix Socket
+
+If using custom security contexts, ensure the socket directory is writable:
+
+```yaml
+securityContext:
+  runAsUser: 0  # May be needed for hostPath access
+  runAsNonRoot: false
+```
+
+## License
+
+Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
+
+Licensed under the Apache License, Version 2.0.
diff --git a/deployments/helm/device-api-server/chart_test.go b/deployments/helm/device-api-server/chart_test.go
new file mode 100644
index 000000000..cc5a42864
--- /dev/null
+++ b/deployments/helm/device-api-server/chart_test.go
@@ -0,0 +1,181 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package chart_test
+
+import (
+	"os"
+	"os/exec"
+	"strings"
+	"testing"
+)
+
+// chartDir returns the path to the Helm chart directory.
+func chartDir(t *testing.T) string {
+	t.Helper()
+	// When running from the chart directory itself
+	if _, err := os.Stat("Chart.yaml"); err == nil {
+		wd, _ := os.Getwd()
+		return wd
+	}
+	t.Fatal("Chart.yaml not found; run tests from the chart directory")
+	return ""
+}
+
+// helmTemplate runs helm template with optional --set overrides and returns stdout.
+func helmTemplate(t *testing.T, sets ...string) string {
+	t.Helper()
+	args := []string{"template", "test-release", chartDir(t)}
+	for _, s := range sets {
+		args = append(args, "--set", s)
+	}
+	cmd := exec.Command("helm", args...)
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		t.Fatalf("helm template failed: %v\n%s", err, string(out))
+	}
+	return string(out)
+}
+
+func TestChart_DefaultRenders(t *testing.T) {
+	out := helmTemplate(t)
+	if len(out) == 0 {
+		t.Fatal("helm template produced no output")
+	}
+	// Should contain a DaemonSet
+	if !strings.Contains(out, "kind: DaemonSet") {
+		t.Error("Expected DaemonSet in rendered output")
+	}
+	// Should contain a ServiceAccount
+	if !strings.Contains(out, "kind: ServiceAccount") {
+		t.Error("Expected ServiceAccount in rendered output")
+	}
+}
+
+func TestChart_TerminationGracePeriod_Default(t *testing.T) {
+	out := helmTemplate(t)
+	// Default: shutdownDelay(5) + shutdownGracePeriod(25) + 5 = 35
+	if !strings.Contains(out, "terminationGracePeriodSeconds: 35") {
+		t.Errorf("Expected terminationGracePeriodSeconds: 35 with defaults, got:\n%s",
+			extractLine(out, "terminationGracePeriodSeconds"))
+	}
+}
+
+func TestChart_TerminationGracePeriod_CustomValues(t *testing.T) {
+	out := helmTemplate(t,
+		"server.shutdownDelay=10",
+		"server.shutdownGracePeriod=60",
+	)
+	// 10 + 60 + 5 = 75
+	if !strings.Contains(out, "terminationGracePeriodSeconds: 75") {
+		t.Errorf("Expected terminationGracePeriodSeconds: 75 with custom values, got:\n%s",
+			extractLine(out, "terminationGracePeriodSeconds"))
+	}
+}
+
+func TestChart_NoNVMLSidecar_ByDefault(t *testing.T) {
+	out := helmTemplate(t)
+	if strings.Contains(out, "name: nvml-provider") {
+		t.Error("NVML provider sidecar should not be present by default")
+	}
+}
+
+func TestChart_NVMLSidecar_WhenEnabled(t *testing.T) {
+	out := helmTemplate(t, "nvmlProvider.enabled=true")
+	if !strings.Contains(out, "name: nvml-provider") {
+		t.Error("NVML provider sidecar should be present when enabled")
+	}
+	// Should have NVIDIA_VISIBLE_DEVICES env var
+	if !strings.Contains(out, "NVIDIA_VISIBLE_DEVICES") {
+		t.Error("Expected NVIDIA_VISIBLE_DEVICES env var in nvml-provider sidecar")
+	}
+}
+
+func TestChart_BindAddress(t *testing.T) {
+	out := helmTemplate(t)
+	// Default binds to unix socket
+	if !strings.Contains(out, "--bind-address=unix:///var/run/device-api/device.sock") {
+		t.Error("Expected default --bind-address=unix:///var/run/device-api/device.sock")
+	}
+}
+
+func TestChart_SecurityContext(t *testing.T) {
+	out := helmTemplate(t)
+	if !strings.Contains(out, "readOnlyRootFilesystem: true") {
+		t.Error("Expected readOnlyRootFilesystem: true in security context")
+	}
+	if !strings.Contains(out, "runAsNonRoot: true") {
+		t.Error("Expected runAsNonRoot: true in security context")
+	}
+	if !strings.Contains(out, "allowPrivilegeEscalation: false") {
+		t.Error("Expected allowPrivilegeEscalation: false in security context")
+	}
+}
+
+func TestChart_SocketVolume(t *testing.T) {
+	out := helmTemplate(t)
+	if !strings.Contains(out, "name: socket-dir") {
+		t.Error("Expected socket-dir volume")
+	}
+	if !strings.Contains(out, "/var/run/device-api") {
+		t.Error("Expected socket directory path /var/run/device-api")
+	}
+}
+
+func TestChart_MetricsPort_WhenEnabled(t *testing.T) {
+	out := helmTemplate(t, "metrics.enabled=true")
+	if !strings.Contains(out, "name: metrics") {
+		t.Error("Expected metrics port when metrics are enabled")
+	}
+}
+
+func TestChart_MetricsPort_WhenDisabled(t *testing.T) {
+	out := helmTemplate(t, "metrics.enabled=false")
+	// The metrics port should not appear in containerPort definitions
+	lines := strings.Split(out, "\n")
+	for i, line := range lines {
+		if strings.Contains(line, "name: metrics") &&
+			i > 0 && strings.Contains(lines[i-1], "containerPort") {
+			t.Error("Metrics port should not be present when metrics are disabled")
+		}
+	}
+}
+
+func TestChart_NodeSelector(t *testing.T) {
+	out := helmTemplate(t)
+	if !strings.Contains(out, "nvidia.com/gpu.present") {
+		t.Error("Expected GPU node selector by default")
+	}
+}
+
+func TestChart_PreStopHook(t *testing.T) {
+	out := helmTemplate(t)
+	// preStop sleep should match shutdownDelay
+	if !strings.Contains(out, `command: ["sleep", "5"]`) {
+		// Try alternate format
+		if !strings.Contains(out, "sleep") {
+			t.Error("Expected preStop sleep hook")
+		}
+	}
+}
+
+// extractLine returns the first line containing the given substring.
+func extractLine(s, substr string) string {
+	for _, line := range strings.Split(s, "\n") {
+		if strings.Contains(line, substr) {
+			return strings.TrimSpace(line)
+		}
+	}
+	return "<not found>"
+}
diff --git a/deployments/helm/device-api-server/templates/NOTES.txt b/deployments/helm/device-api-server/templates/NOTES.txt
new file mode 100644
index 000000000..bf22b58ef
--- /dev/null
+++ b/deployments/helm/device-api-server/templates/NOTES.txt
@@ -0,0 +1,126 @@
+{{/*
+Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/}}
+===============================================================================
+  NVIDIA Device API Server has been installed!
+===============================================================================
+
+Release: {{ .Release.Name }}
+Namespace: {{ .Release.Namespace }}
+Chart Version: {{ .Chart.Version }}
+App Version: {{ .Chart.AppVersion }}
+
+-------------------------------------------------------------------------------
+  Configuration Summary
+-------------------------------------------------------------------------------
+
+Unix Socket: {{ .Values.server.unixSocket }}
+Health Port: {{ .Values.server.healthPort }}
+Metrics Port: {{ .Values.server.metricsPort }}
+{{- if .Values.nvmlProvider.enabled }}
+NVML Provider Sidecar: Enabled
+  - Connects via: unix://{{ .Values.server.unixSocket }}
+  - Driver Root: {{ .Values.nvmlProvider.driverRoot }}
+  - Health Check: {{ .Values.nvmlProvider.healthCheckEnabled }}
+{{- else }}
+NVML Provider Sidecar: Disabled
+{{- end }}
+
+-------------------------------------------------------------------------------
+  Verify Installation
+-------------------------------------------------------------------------------
+
+1. Check that DaemonSet pods are running on GPU nodes:
+
+   kubectl get pods -n {{ .Release.Namespace }} -l app.kubernetes.io/instance={{ .Release.Name }} -o wide
+
+2. Check pod logs:
+
+   kubectl logs -n {{ .Release.Namespace }} -l app.kubernetes.io/instance={{ .Release.Name }} -f
+
+3. Verify health endpoint (from within the cluster):
+
+   kubectl run -n {{ .Release.Namespace }} --rm -it --restart=Never --image=curlimages/curl:latest curl -- \
+     curl -s http://{{ include "device-api-server.fullname" . }}-metrics.{{ .Release.Namespace }}.svc:{{ .Values.server.metricsPort }}/metrics | head -20
+
+{{- if .Values.metrics.enabled }}
+
+-------------------------------------------------------------------------------
+  Metrics & Monitoring
+-------------------------------------------------------------------------------
+
+Metrics endpoint: http://<pod-ip>:{{ .Values.server.metricsPort }}/metrics
+
+{{- if .Values.metrics.serviceMonitor.enabled }}
+ServiceMonitor: Enabled (Prometheus will auto-discover)
+{{- else }}
+ServiceMonitor: Disabled
+  To enable Prometheus auto-discovery, upgrade with:
+  --set metrics.serviceMonitor.enabled=true
+{{- end }}
+
+{{- if .Values.metrics.prometheusRule.enabled }}
+PrometheusRule: Enabled (alerts configured)
+{{- else }}
+PrometheusRule: Disabled
+  To enable alerting rules, upgrade with:
+  --set metrics.prometheusRule.enabled=true
+{{- end }}
+{{- end }}
+
+-------------------------------------------------------------------------------
+  Client Connection
+-------------------------------------------------------------------------------
+
+Providers and consumers on the same node can connect via:
+
+  - Unix Socket: unix://{{ .Values.server.unixSocket }}
+
+Example using grpcurl:
+
+  # List available services (via health/admin port)
+  grpcurl -plaintext localhost:{{ .Values.server.healthPort }} list
+
+  # List GPUs (via unix socket, requires grpcurl with unix support)
+  grpcurl -plaintext -unix {{ .Values.server.unixSocket }} \
+    nvidia.device.v1alpha1.GpuService/ListGpus
+
+{{- if .Values.nvmlProvider.enabled }}
+
+-------------------------------------------------------------------------------
+  NVML Provider Sidecar Notes
+-------------------------------------------------------------------------------
+
+The NVML provider sidecar requires:
+  1. RuntimeClass "nvidia" must exist in the cluster
+  2. NVIDIA GPU Operator or Container Toolkit installed
+  3. Nodes must have NVIDIA GPUs
+
+Verify RuntimeClass exists:
+  kubectl get runtimeclass nvidia
+
+If not present, create it or install the NVIDIA GPU Operator:
+  https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/
+
+{{- end }}
+
+-------------------------------------------------------------------------------
+  Support
+-------------------------------------------------------------------------------
+
+Documentation: https://github.com/nvidia/nvsentinel
+Issues: https://github.com/nvidia/nvsentinel/issues
+
+===============================================================================
diff --git a/deployments/helm/device-api-server/templates/_helpers.tpl b/deployments/helm/device-api-server/templates/_helpers.tpl
new file mode 100644
index 000000000..8771b2ec9
--- /dev/null
+++ b/deployments/helm/device-api-server/templates/_helpers.tpl
@@ -0,0 +1,95 @@
+{{/*
+Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/}}
+
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "device-api-server.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "device-api-server.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "device-api-server.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "device-api-server.labels" -}}
+helm.sh/chart: {{ include "device-api-server.chart" . }}
+{{ include "device-api-server.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+app.kubernetes.io/part-of: device-api
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "device-api-server.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "device-api-server.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+app.kubernetes.io/component: device-api-server
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "device-api-server.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "device-api-server.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create the image name
+*/}}
+{{- define "device-api-server.image" -}}
+{{- $tag := default .Chart.AppVersion .Values.image.tag -}}
+{{- printf "%s:%s" .Values.image.repository $tag }}
+{{- end }}
+
+{{/*
+Socket directory path
+*/}}
+{{- define "device-api-server.socketDir" -}}
+{{- .Values.server.unixSocket | dir }}
+{{- end }}
diff --git a/deployments/helm/device-api-server/templates/daemonset.yaml b/deployments/helm/device-api-server/templates/daemonset.yaml
new file mode 100644
index 000000000..7143ddb5f
--- /dev/null
+++ b/deployments/helm/device-api-server/templates/daemonset.yaml
@@ -0,0 +1,222 @@
+{{/*
+Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/}}
+{{- if not (or (hasPrefix "/var/run/" .Values.server.unixSocket) (hasPrefix "/tmp/" .Values.server.unixSocket)) }}
+{{- fail "server.unixSocket must be an absolute path under /var/run/ or /tmp/" }}
+{{- end }}
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: {{ include "device-api-server.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "device-api-server.labels" . | nindent 4 }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "device-api-server.selectorLabels" . | nindent 6 }}
+  updateStrategy:
+    {{- toYaml .Values.updateStrategy | nindent 4 }}
+  template:
+    metadata:
+      annotations:
+        {{- with .Values.podAnnotations }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+      labels:
+        {{- include "device-api-server.labels" . | nindent 8 }}
+        {{- with .Values.podLabels }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      serviceAccountName: {{ include "device-api-server.serviceAccountName" . }}
+      automountServiceAccountToken: {{ .Values.serviceAccount.automountServiceAccountToken }}
+      {{- with .Values.podSecurityContext }}
+      securityContext:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.priorityClassName }}
+      priorityClassName: {{ . }}
+      {{- end }}
+      {{- with .Values.runtimeClassName }}
+      runtimeClassName: {{ . }}
+      {{- end }}
+      initContainers:
+        # Set restrictive permissions on the socket directory
+        - name: init-socket-dir
+          image: {{ include "device-api-server.image" . }}
+          command: ["sh", "-c", "mkdir -p {{ include "device-api-server.socketDir" . }} && chmod 0750 {{ include "device-api-server.socketDir" . }}"]
+          securityContext:
+            runAsUser: 0
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+                - ALL
+          volumeMounts:
+            - name: socket-dir
+              mountPath: {{ include "device-api-server.socketDir" . }}
+        {{- with .Values.initContainers }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+      containers:
+        - name: {{ .Chart.Name }}
+          image: {{ include "device-api-server.image" . }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          args:
+            - --bind-address=unix://{{ .Values.server.unixSocket }}
+            - --health-probe-bind-address=:{{ .Values.server.healthPort }}
+            - --metrics-bind-address=:{{ .Values.server.metricsPort }}
+            - --shutdown-grace-period={{ .Values.server.shutdownGracePeriod }}s
+            - -v={{ .Values.logging.verbosity }}
+          env:
+            - name: NODE_NAME
+              valueFrom:
+                fieldRef:
+                  fieldPath: spec.nodeName
+            - name: POD_NAME
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.name
+            - name: POD_NAMESPACE
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.namespace
+            {{- with .Values.env }}
+            {{- toYaml . | nindent 12 }}
+            {{- end }}
+          lifecycle:
+            preStop:
+              exec:
+                # Sleep to allow k8s to propagate endpoint removal
+                command: ["sleep", "{{ .Values.server.shutdownDelay }}"]
+          ports:
+            - name: health
+              containerPort: {{ .Values.server.healthPort }}
+              protocol: TCP
+            {{- if .Values.metrics.enabled }}
+            - name: metrics
+              containerPort: {{ .Values.server.metricsPort }}
+              protocol: TCP
+            {{- end }}
+          # Health probes use the TCP admin port (gRPC health service).
+          # The server's health monitor checks both storage readiness and
+          # service readiness before reporting SERVING, so a passing probe
+          # implies the device socket is functional. K8s does not support
+          # Unix domain socket probes natively.
+          {{- with .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.securityContext }}
+          securityContext:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.resources }}
+          resources:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          volumeMounts:
+            - name: socket-dir
+              mountPath: {{ include "device-api-server.socketDir" . }}
+            {{- with .Values.extraVolumeMounts }}
+            {{- toYaml . | nindent 12 }}
+            {{- end }}
+        {{- if .Values.nvmlProvider.enabled }}
+        # NVML Provider sidecar container
+        - name: nvml-provider
+          image: "{{ .Values.nvmlProvider.image.repository }}:{{ .Values.nvmlProvider.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.nvmlProvider.image.pullPolicy }}
+          args:
+            - --server-address=unix://{{ .Values.server.unixSocket }}
+            - --provider-id={{ .Values.nvmlProvider.providerID }}
+            - --driver-root={{ .Values.nvmlProvider.driverRoot }}
+            - --health-check={{ .Values.nvmlProvider.healthCheckEnabled }}
+            - --health-port={{ .Values.nvmlProvider.healthPort }}
+          env:
+            - name: NODE_NAME
+              valueFrom:
+                fieldRef:
+                  fieldPath: spec.nodeName
+            # NVIDIA Container Toolkit environment variables
+            - name: NVIDIA_VISIBLE_DEVICES
+              value: "all"
+            - name: NVIDIA_DRIVER_CAPABILITIES
+              value: "utility"
+          ports:
+            - name: provider-health
+              containerPort: {{ .Values.nvmlProvider.healthPort }}
+              protocol: TCP
+          livenessProbe:
+            httpGet:
+              path: /healthz
+              port: provider-health
+            initialDelaySeconds: 10
+            periodSeconds: 10
+            timeoutSeconds: 5
+            failureThreshold: 3
+          readinessProbe:
+            httpGet:
+              path: /readyz
+              port: provider-health
+            initialDelaySeconds: 5
+            periodSeconds: 5
+            timeoutSeconds: 5
+            failureThreshold: 3
+          {{- with .Values.nvmlProvider.securityContext }}
+          securityContext:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.nvmlProvider.resources }}
+          resources:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          volumeMounts:
+            - name: socket-dir
+              mountPath: {{ include "device-api-server.socketDir" . }}
+        {{- end }}
+        {{- with .Values.sidecars }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+      volumes:
+        - name: socket-dir
+          hostPath:
+            path: {{ include "device-api-server.socketDir" . }}
+            type: DirectoryOrCreate
+        {{- with .Values.extraVolumes }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      # terminationGracePeriodSeconds = preStop sleep + shutdown grace period + buffer
+      # preStop and SIGTERM run concurrently in k8s, so we use one shutdownDelay, not two.
+      terminationGracePeriodSeconds: {{ add .Values.server.shutdownDelay .Values.server.shutdownGracePeriod 5 }}
diff --git a/deployments/helm/device-api-server/templates/prometheusrule.yaml b/deployments/helm/device-api-server/templates/prometheusrule.yaml
new file mode 100644
index 000000000..3a82faca6
--- /dev/null
+++ b/deployments/helm/device-api-server/templates/prometheusrule.yaml
@@ -0,0 +1,93 @@
+{{/*
+Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/}}
+{{- if and .Values.metrics.enabled .Values.metrics.prometheusRule.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+  name: {{ include "device-api-server.fullname" . }}
+  namespace: {{ .Values.metrics.prometheusRule.namespace | default .Release.Namespace }}
+  labels:
+    {{- include "device-api-server.labels" . | nindent 4 }}
+    {{- with .Values.metrics.prometheusRule.labels }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+spec:
+  groups:
+    - name: device-api-server
+      rules:
+        # Server availability
+        - alert: DeviceAPIServerDown
+          expr: up{job="{{ include "device-api-server.fullname" . }}-metrics"} == 0
+          for: 5m
+          labels:
+            severity: critical
+          annotations:
+            summary: "Device API Server is down on {{ "{{ $labels.instance }}" }}"
+            description: "Device API Server has been unreachable for more than 5 minutes."
+            runbook_url: "https://github.com/nvidia/device-api/blob/main/docs/operations/device-api-server.md#alert-deviceapiserverdown"
+
+        # High latency
+        - alert: DeviceAPIServerHighLatency
+          expr: |
+            histogram_quantile(0.99,
+              sum(rate(grpc_server_handling_seconds_bucket{
+                grpc_service="nvidia.device.v1alpha1.GpuService"
+              }[5m])) by (le, instance)
+            ) > 0.5
+          for: 5m
+          labels:
+            severity: warning
+          annotations:
+            summary: "Device API Server high latency on {{ "{{ $labels.instance }}" }}"
+            description: "P99 latency is above 500ms for more than 5 minutes."
+            runbook_url: "https://github.com/nvidia/device-api/blob/main/docs/operations/device-api-server.md#alert-deviceapiserverhighlatency"
+
+        # High error rate
+        - alert: DeviceAPIServerHighErrorRate
+          expr: |
+            sum(rate(grpc_server_handled_total{
+              grpc_code!="OK",
+              grpc_service=~"nvidia.device.v1alpha1.*"
+            }[5m])) by (instance)
+            /
+            sum(rate(grpc_server_handled_total{
+              grpc_service=~"nvidia.device.v1alpha1.*"
+            }[5m])) by (instance)
+            > 0.1
+          for: 5m
+          labels:
+            severity: warning
+          annotations:
+            summary: "Device API Server high error rate on {{ "{{ $labels.instance }}" }}"
+            description: "Error rate is above 10% for more than 5 minutes."
+            runbook_url: "https://github.com/nvidia/device-api/blob/main/docs/operations/device-api-server.md#alert-deviceapiserverhigherrorrate"
+
+        # High memory usage
+        - alert: DeviceAPIServerHighMemory
+          expr: |
+            process_resident_memory_bytes{job="{{ include "device-api-server.fullname" . }}-metrics"} > 512 * 1024 * 1024
+          for: 10m
+          labels:
+            severity: warning
+          annotations:
+            summary: "Device API Server high memory usage on {{ "{{ $labels.instance }}" }}"
+            description: "Memory usage is above 512MB for more than 10 minutes."
+            runbook_url: "https://github.com/nvidia/device-api/blob/main/docs/operations/device-api-server.md#alert-deviceapiserverhighmemory"
+
+        {{- with .Values.metrics.prometheusRule.additionalRules }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+{{- end }}
diff --git a/deployments/helm/device-api-server/templates/service.yaml b/deployments/helm/device-api-server/templates/service.yaml
new file mode 100644
index 000000000..64ee33c40
--- /dev/null
+++ b/deployments/helm/device-api-server/templates/service.yaml
@@ -0,0 +1,37 @@
+{{/*
+Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/}}
+{{- if .Values.metrics.enabled }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "device-api-server.fullname" . }}-metrics
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "device-api-server.labels" . | nindent 4 }}
+  {{- with .Values.service.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.server.metricsPort }}
+      targetPort: metrics
+      protocol: TCP
+      name: metrics
+  selector:
+    {{- include "device-api-server.selectorLabels" . | nindent 4 }}
+{{- end }}
diff --git a/deployments/helm/device-api-server/templates/serviceaccount.yaml b/deployments/helm/device-api-server/templates/serviceaccount.yaml
new file mode 100644
index 000000000..e4c0a6091
--- /dev/null
+++ b/deployments/helm/device-api-server/templates/serviceaccount.yaml
@@ -0,0 +1,29 @@
+{{/*
+Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/}}
+{{- if .Values.serviceAccount.create }}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ include "device-api-server.serviceAccountName" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "device-api-server.labels" . | nindent 4 }}
+  {{- with .Values.serviceAccount.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+automountServiceAccountToken: {{ .Values.serviceAccount.automountServiceAccountToken }}
+{{- end }}
diff --git a/deployments/helm/device-api-server/templates/servicemonitor.yaml b/deployments/helm/device-api-server/templates/servicemonitor.yaml
new file mode 100644
index 000000000..cb378ae22
--- /dev/null
+++ b/deployments/helm/device-api-server/templates/servicemonitor.yaml
@@ -0,0 +1,47 @@
+{{/*
+Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/}}
+{{- if and .Values.metrics.enabled .Values.metrics.serviceMonitor.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "device-api-server.fullname" . }}
+  namespace: {{ .Values.metrics.serviceMonitor.namespace | default .Release.Namespace }}
+  labels:
+    {{- include "device-api-server.labels" . | nindent 4 }}
+    {{- with .Values.metrics.serviceMonitor.labels }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "device-api-server.selectorLabels" . | nindent 6 }}
+  namespaceSelector:
+    matchNames:
+      - {{ .Release.Namespace }}
+  endpoints:
+    - port: metrics
+      interval: {{ .Values.metrics.serviceMonitor.interval }}
+      scrapeTimeout: {{ .Values.metrics.serviceMonitor.scrapeTimeout }}
+      path: /metrics
+      {{- with .Values.metrics.serviceMonitor.metricRelabelings }}
+      metricRelabelings:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.metrics.serviceMonitor.relabelings }}
+      relabelings:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+{{- end }}
diff --git a/deployments/helm/device-api-server/values.yaml b/deployments/helm/device-api-server/values.yaml
new file mode 100644
index 000000000..9c9dbb907
--- /dev/null
+++ b/deployments/helm/device-api-server/values.yaml
@@ -0,0 +1,255 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Default values for device-api-server.
+# This is a YAML-formatted file.
+
+# -- Number of replicas (ignored for DaemonSet, kept for consistency)
+replicaCount: 1
+
+# -- Image configuration
+image:
+  # -- Image repository
+  repository: ghcr.io/nvidia/device-api-server
+  # -- Image pull policy
+  pullPolicy: IfNotPresent
+  # -- Image tag (defaults to Chart appVersion)
+  tag: ""
+
+# -- Image pull secrets
+imagePullSecrets: []
+
+# -- Override the name of the chart
+nameOverride: ""
+
+# -- Override the full name of the chart
+fullnameOverride: ""
+
+# -- Server configuration
+server:
+  # -- Unix socket path for gRPC API (device service)
+  # Must be an absolute path under /var/run/ or /tmp/.
+  # WARNING: Arbitrary paths may expose host filesystem risks.
+  unixSocket: /var/run/device-api/device.sock
+  # -- HTTP port for health/admin gRPC endpoints
+  healthPort: 8081
+  # -- HTTP port for Prometheus metrics
+  metricsPort: 9090
+  # -- Graceful shutdown grace period in seconds
+  shutdownGracePeriod: 25
+  # -- Shutdown delay in seconds (preStop sleep for k8s endpoint propagation)
+  shutdownDelay: 5
+
+# -- Logging configuration
+logging:
+  # -- Log verbosity level (0=info, higher=more verbose)
+  verbosity: 0
+
+# -- NVML Provider Sidecar configuration
+# Deploys the NVML provider as a sidecar container that connects to device-api-server
+# via gRPC. This provides better isolation and independent updates compared to the
+# built-in nvml provider.
+nvmlProvider:
+  # -- Enable the NVML provider sidecar container
+  enabled: false
+  # -- Image configuration for the nvml-provider sidecar
+  image:
+    # -- Image repository
+    repository: ghcr.io/nvidia/device-api-server
+    # -- Image tag (defaults to Chart appVersion)
+    tag: ""
+    # -- Image pull policy
+    pullPolicy: IfNotPresent
+  # -- gRPC address of the device-api-server (derived from server.unixSocket in daemonset template)
+  # Sidecar connects via shared unix socket volume.
+  # This value is ignored when the sidecar is enabled; the template uses server.unixSocket directly.
+  # -- Unique identifier for this provider instance
+  providerID: "nvml-provider-sidecar"
+  # -- Root path where NVIDIA driver libraries are located
+  driverRoot: /run/nvidia/driver
+  # -- Enable XID event monitoring for health checks
+  healthCheckEnabled: true
+  # -- HTTP port for health check endpoints
+  healthPort: 8082
+  # -- Resource limits and requests for the sidecar
+  resources:
+    requests:
+      cpu: 50m
+      memory: 64Mi
+    limits:
+      cpu: 200m
+      memory: 128Mi
+  # -- Security context for the sidecar container
+  securityContext:
+    runAsNonRoot: true
+    runAsUser: 65534
+    runAsGroup: 65534
+    readOnlyRootFilesystem: true
+    allowPrivilegeEscalation: false
+    capabilities:
+      drop:
+        - ALL
+
+# -- RuntimeClassName for the pod
+# Set to "nvidia" when nvml.enabled is true to inject NVIDIA driver libraries
+# Requires the NVIDIA GPU Operator or manual RuntimeClass configuration
+runtimeClassName: ""
+
+# -- ServiceAccount configuration
+serviceAccount:
+  # -- Create a ServiceAccount
+  create: true
+  # -- ServiceAccount name (generated if not set)
+  name: ""
+  # -- Annotations to add to the ServiceAccount
+  annotations: {}
+  # -- Automount service account token
+  automountServiceAccountToken: false
+
+# -- RBAC configuration
+rbac:
+  # -- Create RBAC resources
+  create: true
+
+# -- Pod annotations
+podAnnotations: {}
+
+# -- Pod labels
+podLabels: {}
+
+# -- Pod security context
+podSecurityContext:
+  runAsNonRoot: true
+  seccompProfile:
+    type: RuntimeDefault
+
+# -- Container security context
+securityContext:
+  runAsNonRoot: true
+  runAsUser: 65534
+  runAsGroup: 65534
+  readOnlyRootFilesystem: true
+  allowPrivilegeEscalation: false
+  capabilities:
+    drop:
+      - ALL
+
+# -- Resource limits and requests
+# Default limits handle the common 8-GPU case. For larger nodes, increase:
+# - 8 GPUs:  500m CPU, 512Mi memory (default)
+# - 16 GPUs: 1000m CPU, 1Gi memory
+# Memory usage scales with: GPU count * watch event size * watcher count
+resources:
+  requests:
+    cpu: 100m
+    memory: 128Mi
+  limits:
+    cpu: 500m
+    memory: 512Mi
+
+# -- Node selector for scheduling
+# @default -- Schedules only on GPU nodes
+nodeSelector:
+  nvidia.com/gpu.present: "true"
+
+# -- Tolerations for scheduling
+tolerations:
+  - key: nvidia.com/gpu
+    operator: Exists
+    effect: NoSchedule
+
+# -- Affinity rules
+affinity: {}
+
+# -- Priority class name
+priorityClassName: ""
+
+# -- Liveness probe configuration (gRPC health check on admin server)
+livenessProbe:
+  grpc:
+    port: 8081
+  initialDelaySeconds: 5
+  periodSeconds: 10
+  timeoutSeconds: 5
+  failureThreshold: 3
+
+# -- Readiness probe configuration (gRPC health check on admin server)
+readinessProbe:
+  grpc:
+    port: 8081
+  initialDelaySeconds: 5
+  periodSeconds: 10
+  timeoutSeconds: 5
+  failureThreshold: 3
+
+# -- Update strategy for the DaemonSet
+updateStrategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxUnavailable: 1
+
+# -- Service configuration (for metrics scraping)
+service:
+  # -- Service type
+  type: ClusterIP
+  # -- Service annotations
+  annotations: {}
+
+# -- Prometheus metrics configuration
+metrics:
+  # -- Enable metrics endpoint
+  enabled: true
+  # -- ServiceMonitor configuration (requires Prometheus Operator)
+  serviceMonitor:
+    # -- Create ServiceMonitor resource
+    enabled: false
+    # -- ServiceMonitor namespace (defaults to release namespace)
+    namespace: ""
+    # -- Additional labels for ServiceMonitor
+    labels: {}
+    # -- Scrape interval
+    interval: 30s
+    # -- Scrape timeout
+    scrapeTimeout: 10s
+    # -- Metric relabeling configs
+    metricRelabelings: []
+    # -- Relabeling configs
+    relabelings: []
+  # -- PrometheusRule configuration (requires Prometheus Operator)
+  prometheusRule:
+    # -- Create PrometheusRule resource
+    enabled: false
+    # -- PrometheusRule namespace (defaults to release namespace)
+    namespace: ""
+    # -- Additional labels for PrometheusRule
+    labels: {}
+    # -- Additional alerting rules
+    additionalRules: []
+
+# -- Additional environment variables
+env: []
+# - name: LOG_FORMAT
+#   value: json
+
+# -- Additional volume mounts
+extraVolumeMounts: []
+
+# -- Additional volumes
+extraVolumes: []
+
+# -- Init containers
+initContainers: []
+
+# -- Sidecar containers
+sidecars: []
diff --git a/deployments/helm/values-sidecar-test.yaml b/deployments/helm/values-sidecar-test.yaml
new file mode 100644
index 000000000..970b54d78
--- /dev/null
+++ b/deployments/helm/values-sidecar-test.yaml
@@ -0,0 +1,54 @@
+# Sidecar test values - validates nvml-provider sidecar architecture
+# Usage: helm upgrade device-api-server deployments/helm/device-api-server -n device-api -f deployments/helm/values-sidecar-test.yaml
+
+image:
+  repository: ttl.sh/device-api-server
+  tag: "2h"
+  pullPolicy: Always
+
+# Disable built-in NVML provider (use sidecar instead)
+nvml:
+  enabled: false
+
+# Enable NVML Provider sidecar
+nvmlProvider:
+  enabled: true
+  image:
+    repository: ttl.sh/device-api-server-sidecar
+    tag: "2h"
+    pullPolicy: Always
+  providerID: "nvml-provider-sidecar"
+  driverRoot: /run/nvidia/driver
+  healthCheckEnabled: true
+  healthPort: 8082
+  resources:
+    requests:
+      cpu: 50m
+      memory: 64Mi
+    limits:
+      cpu: 200m
+      memory: 128Mi
+
+# Override node selector (cluster uses node-type=gpu instead of nvidia.com/gpu.present)
+# Set to null to remove the default, then add only the one we need
+nodeSelector:
+  nvidia.com/gpu.present: null
+  node-type: gpu
+
+# RuntimeClass for NVML access
+runtimeClassName: nvidia
+
+logging:
+  verbosity: 2
+
+# Run as root to allow hostPath socket creation
+podSecurityContext:
+  runAsNonRoot: false
+  runAsUser: 0
+  runAsGroup: 0
+  fsGroup: 0
+
+securityContext:
+  runAsNonRoot: false
+  runAsUser: 0
+  runAsGroup: 0
diff --git a/deployments/static/nvsentinel-daemonset.yaml b/deployments/static/nvsentinel-daemonset.yaml
new file mode 100644
index 000000000..beb6c8d87
--- /dev/null
+++ b/deployments/static/nvsentinel-daemonset.yaml
@@ -0,0 +1,217 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# WARNING: These static manifests use placeholder image tags (v0.0.0).
+# For production deployments, use the Helm chart with explicit image versions
+# or replace v0.0.0 with a specific release tag (e.g., v1.0.0).
+
+# NVSentinel Static Deployment Manifest
+#
+# This manifest deploys the Device API Server with the NVML Provider sidecar.
+# For production use, consider using the Helm chart for better configurability.
+#
+# Usage:
+#   kubectl apply -f nvsentinel-daemonset.yaml
+#
+# Prerequisites:
+#   - Kubernetes 1.25+
+#   - RuntimeClass 'nvidia' configured (GPU Operator or manual setup)
+#   - GPU nodes labeled with 'nvidia.com/gpu.present=true'
+
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: nvsentinel
+  labels:
+    app.kubernetes.io/name: nvsentinel
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: nvsentinel
+  namespace: nvsentinel
+  labels:
+    app.kubernetes.io/name: nvsentinel
+automountServiceAccountToken: false
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: nvsentinel
+  namespace: nvsentinel
+  labels:
+    app.kubernetes.io/name: nvsentinel
+spec:
+  type: ClusterIP
+  clusterIP: None  # Headless for DaemonSet
+  selector:
+    app.kubernetes.io/name: nvsentinel
+  ports:
+    - name: health
+      port: 8081
+      targetPort: health
+      protocol: TCP
+    - name: metrics
+      port: 9090
+      targetPort: metrics
+      protocol: TCP
+---
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: nvsentinel
+  namespace: nvsentinel
+  labels:
+    app.kubernetes.io/name: nvsentinel
+    app.kubernetes.io/component: device-api-server
+spec:
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: nvsentinel
+  updateStrategy:
+    type: RollingUpdate
+    rollingUpdate:
+      maxUnavailable: 1
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: nvsentinel
+        app.kubernetes.io/component: device-api-server
+      annotations:
+        prometheus.io/scrape: "true"
+        prometheus.io/port: "9090"
+        prometheus.io/path: "/metrics"
+    spec:
+      serviceAccountName: nvsentinel
+      # runtimeClassName: nvidia enables the NVIDIA Container Runtime,
+      # required for the nvml-provider sidecar to access GPU devices.
+      # This requires RuntimeClass 'nvidia' configured in the cluster
+      # (via NVIDIA GPU Operator or manual setup).
+      # See: https://kubernetes.io/docs/concepts/containers/runtime-class/
+      runtimeClassName: nvidia
+      nodeSelector:
+        nvidia.com/gpu.present: "true"
+      tolerations:
+        - key: nvidia.com/gpu
+          operator: Exists
+          effect: NoSchedule
+      securityContext:
+        runAsNonRoot: true
+        seccompProfile:
+          type: RuntimeDefault
+      containers:
+        # Device API Server - Pure Go, no NVML dependencies
+        - name: device-api-server
+          image: ghcr.io/nvidia/device-api-server:v0.0.0  # Replace with specific version for production
+          imagePullPolicy: IfNotPresent
+          args:
+            - --bind-address=unix:///var/run/device-api/device.sock
+            - --health-probe-bind-address=:8081
+            - --metrics-bind-address=:9090
+            - --shutdown-grace-period=25s
+            - -v=0
+          ports:
+            - name: health
+              containerPort: 8081
+              protocol: TCP
+            - name: metrics
+              containerPort: 9090
+              protocol: TCP
+          livenessProbe:
+            httpGet:
+              path: /healthz
+              port: health
+            initialDelaySeconds: 5
+            periodSeconds: 10
+            timeoutSeconds: 5
+            failureThreshold: 3
+          readinessProbe:
+            httpGet:
+              path: /readyz
+              port: health
+            initialDelaySeconds: 5
+            periodSeconds: 10
+            timeoutSeconds: 5
+            failureThreshold: 3
+          resources:
+            requests:
+              cpu: 50m
+              memory: 64Mi
+            limits:
+              cpu: 200m
+              memory: 256Mi
+          securityContext:
+            runAsNonRoot: true
+            runAsUser: 65534
+            runAsGroup: 65534
+            readOnlyRootFilesystem: true
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+                - ALL
+          volumeMounts:
+            - name: device-api-socket
+              mountPath: /var/run/device-api
+
+        # NVML Provider Sidecar - CGO binary, requires RuntimeClass nvidia
+        - name: nvml-provider
+          image: ghcr.io/nvidia/device-api-server:nvml-provider-v0.0.0  # Replace with specific version for production
+          imagePullPolicy: IfNotPresent
+          args:
+            - --server-address=unix:///var/run/device-api/device.sock
+            - --provider-id=nvml-provider
+            - --driver-root=/run/nvidia/driver
+            - --health-port=8082
+            - --health-check=true
+            - -v=0
+          ports:
+            - name: provider-health
+              containerPort: 8082
+              protocol: TCP
+          livenessProbe:
+            httpGet:
+              path: /healthz
+              port: provider-health
+            initialDelaySeconds: 5
+            periodSeconds: 10
+            timeoutSeconds: 5
+            failureThreshold: 3
+          readinessProbe:
+            httpGet:
+              path: /readyz
+              port: provider-health
+            initialDelaySeconds: 5
+            periodSeconds: 10
+            timeoutSeconds: 5
+            failureThreshold: 3
+          resources:
+            requests:
+              cpu: 50m
+              memory: 64Mi
+            limits:
+              cpu: 200m
+              memory: 128Mi
+          securityContext:
+            runAsNonRoot: true
+            runAsUser: 65534
+            runAsGroup: 65534
+            readOnlyRootFilesystem: true
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+                - ALL
+      volumes:
+        - name: device-api-socket
+          emptyDir: {}
diff --git a/docs/api/device-api-server.md b/docs/api/device-api-server.md
new file mode 100644
index 000000000..22b0c6ee9
--- /dev/null
+++ b/docs/api/device-api-server.md
@@ -0,0 +1,425 @@
+# Device API Server - API Reference
+
+This document provides the complete API reference for the Device API Server gRPC services.
+
+## Overview
+
+The Device API Server exposes a unified `GpuService` that provides both read and write operations following Kubernetes API conventions:
+
+| Operation Type | Methods | Clients |
+|----------------|---------|---------|
+| Read | `GetGpu`, `ListGpus`, `WatchGpus` | Consumers (device plugins, DRA drivers) |
+| Write | `CreateGpu`, `UpdateGpu`, `UpdateGpuStatus`, `DeleteGpu` | Providers (health monitors, NVML) |
+
+**Package**: `nvidia.device.v1alpha1`
+
+**Connection Endpoints**:
+- Unix Socket: `unix:///var/run/device-api/device.sock` (recommended)
+- TCP: `localhost:50051`
+
+## GpuService
+
+The `GpuService` provides a unified API for GPU resource management:
+
+- **Read operations** (`GetGpu`, `ListGpus`, `WatchGpus`) for consumers
+- **Write operations** (`CreateGpu`, `UpdateGpu`, `UpdateGpuStatus`, `DeleteGpu`) for providers
+
+> **Important**: Write operations acquire exclusive locks, blocking all consumer reads until completion. This prevents consumers from reading stale "healthy" states during GPU health transitions.
+
+### Read Operations
+
+### GetGpu
+
+Retrieves a single GPU resource by its unique name.
+
+```protobuf
+rpc GetGpu(GetGpuRequest) returns (GetGpuResponse);
+```
+
+**Request**:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `name` | string | The unique resource name of the GPU |
+
+**Response**:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `gpu` | Gpu | The requested GPU resource |
+
+**Errors**:
+- `NOT_FOUND`: GPU with the specified name does not exist
+
+**Example**:
+
+```bash
+grpcurl -plaintext localhost:50051 \
+  -d '{"name": "gpu-abc123"}' \
+  nvidia.device.v1alpha1.GpuService/GetGpu
+```
+
+### ListGpus
+
+Retrieves a list of all GPU resources.
+
+```protobuf
+rpc ListGpus(ListGpusRequest) returns (ListGpusResponse);
+```
+
+**Request**: Empty (reserved for future filtering/pagination)
+
+**Response**:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `gpu_list` | GpuList | List of all GPU resources |
+
+**Example**:
+
+```bash
+grpcurl -plaintext localhost:50051 \
+  nvidia.device.v1alpha1.GpuService/ListGpus
+```
+
+**Response Example**:
+
+```json
+{
+  "gpuList": {
+    "items": [
+      {
+        "name": "gpu-abc123",
+        "spec": {
+          "uuid": "GPU-a1b2c3d4-e5f6-a7b8-c9d0-e1f2a3b4c5d6"
+        },
+        "status": {
+          "conditions": [
+            {
+              "type": "Ready",
+              "status": "True",
+              "lastTransitionTime": "2026-01-21T10:00:00Z",
+              "reason": "GPUHealthy",
+              "message": "GPU is healthy and available"
+            }
+          ]
+        },
+        "resourceVersion": "42"
+      }
+    ]
+  }
+}
+```
+
+### WatchGpus
+
+Streams lifecycle events for GPU resources. The stream remains open until the client disconnects or an error occurs.
+
+```protobuf
+rpc WatchGpus(WatchGpusRequest) returns (stream WatchGpusResponse);
+```
+
+**Request**: Empty (reserved for future filtering/resumption)
+
+**Response Stream**:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `type` | string | Event type: `ADDED`, `MODIFIED`, `DELETED`, `ERROR` |
+| `object` | Gpu | The GPU resource (last known state for DELETED) |
+
+**Event Types**:
+
+| Type | Description |
+|------|-------------|
+| `ADDED` | GPU was registered or first observed |
+| `MODIFIED` | GPU status was updated |
+| `DELETED` | GPU was unregistered |
+| `ERROR` | An error occurred in the watch stream |
+
+**Example**:
+
+```bash
+grpcurl -plaintext localhost:50051 \
+  nvidia.device.v1alpha1.GpuService/WatchGpus
+```
+
+**Behavior**:
+- On connection, receives `ADDED` events for all existing GPUs
+- Subsequent events reflect real-time changes
+- Stream is per-client; multiple clients can watch simultaneously
+
+### Write Operations
+
+#### CreateGpu
+
+Creates a new GPU resource. This is the standard way for providers to register GPUs.
+
+```protobuf
+rpc CreateGpu(CreateGpuRequest) returns (CreateGpuResponse);
+```
+
+**Request**:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `gpu` | Gpu | The GPU to create (metadata.name and spec.uuid required) |
+
+**Response**:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `gpu` | Gpu | The created GPU with server-assigned fields |
+| `created` | bool | True if new GPU was created, false if already existed |
+
+**Errors**:
+- `INVALID_ARGUMENT`: Required fields missing
+
+**Behavior**:
+- If GPU already exists, returns existing GPU (idempotent)
+- Triggers `ADDED` event for active watch streams
+
+**Example**:
+
+```bash
+grpcurl -plaintext localhost:50051 \
+  -d '{
+    "gpu": {
+      "metadata": {"name": "gpu-abc123"},
+      "spec": {"uuid": "GPU-a1b2c3d4-e5f6-a7b8-c9d0-e1f2a3b4c5d6"}
+    }
+  }' \
+  nvidia.device.v1alpha1.GpuService/CreateGpu
+```
+
+#### UpdateGpu
+
+Replaces an entire GPU resource (spec and status).
+
+```protobuf
+rpc UpdateGpu(UpdateGpuRequest) returns (Gpu);
+```
+
+**Request**:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `gpu` | Gpu | The GPU to update (metadata.name required) |
+
+**Response**: The updated GPU resource.
+
+**Errors**:
+- `NOT_FOUND`: GPU does not exist
+- `ABORTED`: Resource version conflict (optimistic concurrency)
+
+**Behavior**:
+- Uses optimistic concurrency via `resource_version`
+- Triggers `MODIFIED` event for active watch streams
+
+#### UpdateGpuStatus
+
+Updates only the status of an existing GPU (follows Kubernetes subresource pattern).
+
+```protobuf
+rpc UpdateGpuStatus(UpdateGpuStatusRequest) returns (Gpu);
+```
+
+**Request**:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `name` | string | The GPU name to update |
+| `status` | GpuStatus | New status (completely replaces existing) |
+| `resource_version` | int64 | Optional: expected version for conflict detection |
+
+**Response**: The updated GPU resource.
+
+**Errors**:
+- `NOT_FOUND`: GPU does not exist
+- `ABORTED`: Resource version conflict (optimistic concurrency)
+
+**Locking**: Acquires exclusive write lock, blocking all reads.
+
+**Example** (mark GPU unhealthy due to XID error):
+
+```bash
+grpcurl -plaintext localhost:50051 \
+  -d '{
+    "name": "gpu-abc123",
+    "status": {
+      "conditions": [{
+        "type": "Ready",
+        "status": "False",
+        "reason": "XidError",
+        "message": "Critical XID error 79 detected"
+      }]
+    }
+  }' \
+  nvidia.device.v1alpha1.GpuService/UpdateGpuStatus
+```
+
+#### DeleteGpu
+
+Removes a GPU from the server.
+
+```protobuf
+rpc DeleteGpu(DeleteGpuRequest) returns (google.protobuf.Empty);
+```
+
+**Request**:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `name` | string | Unique identifier of GPU to remove |
+
+**Response**: Empty on success.
+
+**Errors**:
+- `NOT_FOUND`: GPU does not exist
+
+**Behavior**:
+- GPU will no longer appear in ListGpus/GetGpu responses
+- Triggers `DELETED` event for active watch streams
+
+**Example**:
+
+```bash
+grpcurl -plaintext localhost:50051 \
+  -d '{"name": "gpu-abc123"}' \
+  nvidia.device.v1alpha1.GpuService/DeleteGpu
+```
+
+---
+
+## Resource Types
+
+### Gpu
+
+The main GPU resource following the Kubernetes Resource Model pattern.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `name` | string | Unique logical identifier |
+| `spec` | GpuSpec | Identity and desired attributes |
+| `status` | GpuStatus | Most recently observed state |
+| `resource_version` | int64 | Monotonically increasing version |
+
+### GpuSpec
+
+Defines the identity of a GPU.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `uuid` | string | Physical hardware UUID (e.g., `GPU-a1b2c3d4-...`) |
+
+### GpuStatus
+
+Contains the observed state of a GPU.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `conditions` | Condition[] | Current state observations |
+| `recommended_action` | string | Suggested resolution for negative states |
+
+### Condition
+
+Describes one aspect of the GPU's current state.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `type` | string | Category (e.g., `Ready`, `MemoryHealthy`) |
+| `status` | string | `True`, `False`, or `Unknown` |
+| `last_transition_time` | Timestamp | When status last changed |
+| `reason` | string | Machine-readable reason (UpperCamelCase) |
+| `message` | string | Human-readable details |
+
+**Standard Condition Types**:
+
+| Type | Description |
+|------|-------------|
+| `Ready` | Overall GPU health and availability |
+| `MemoryHealthy` | GPU memory is functioning correctly |
+| `ThermalHealthy` | GPU temperature is within safe limits |
+
+---
+
+## Go Client Example
+
+```go
+package main
+
+import (
+    "context"
+    "log"
+
+    v1alpha1 "github.com/nvidia/nvsentinel/api/gen/go/device/v1alpha1"
+    "google.golang.org/grpc"
+    "google.golang.org/grpc/credentials/insecure"
+)
+
+func main() {
+    // Connect via Unix socket (recommended)
+    conn, err := grpc.NewClient(
+        "unix:///var/run/device-api/device.sock",
+        grpc.WithTransportCredentials(insecure.NewCredentials()),
+    )
+    if err != nil {
+        log.Fatalf("failed to connect: %v", err)
+    }
+    defer conn.Close()
+
+    client := v1alpha1.NewGpuServiceClient(conn)
+
+    // Consumer: List GPUs
+    resp, err := client.ListGpus(context.Background(), &v1alpha1.ListGpusRequest{})
+    if err != nil {
+        log.Fatalf("failed to list GPUs: %v", err)
+    }
+
+    for _, gpu := range resp.GpuList.Items {
+        log.Printf("GPU: %s, Version: %d", gpu.Metadata.Name, gpu.Metadata.ResourceVersion)
+        for _, cond := range gpu.Status.Conditions {
+            log.Printf("  Condition: %s=%s (%s)", cond.Type, cond.Status, cond.Reason)
+        }
+    }
+
+    // Provider: Update GPU status
+    _, err = client.UpdateGpuStatus(context.Background(),
+        &v1alpha1.UpdateGpuStatusRequest{
+            Gpu: &v1alpha1.Gpu{
+                Metadata: &v1alpha1.ObjectMeta{Name: "gpu-abc123"},
+                Status: &v1alpha1.GpuStatus{
+                    Conditions: []*v1alpha1.Condition{{
+                        Type:    "Ready",
+                        Status:  "False",
+                        Reason:  "XidError",
+                        Message: "Critical XID 79 detected",
+                    }},
+                },
+            },
+        })
+    if err != nil {
+        log.Fatalf("failed to update status: %v", err)
+    }
+}
+```
+
+---
+
+## Error Codes
+
+| Code | Meaning |
+|------|---------|
+| `NOT_FOUND` | GPU with specified name does not exist |
+| `INVALID_ARGUMENT` | Request contains invalid parameters |
+| `ABORTED` | Resource version conflict (optimistic concurrency) |
+| `INTERNAL` | Server-side error occurred |
+| `UNAVAILABLE` | Server is temporarily unavailable |
+
+---
+
+## See Also
+
+- [Operations Guide](../operations/device-api-server.md)
+- [Design Document](../design/device-api-server.md)
+- [NVML Fallback Provider](../design/nvml-fallback-provider.md)
diff --git a/docs/design/device-api-server.md b/docs/design/device-api-server.md
new file mode 100644
index 000000000..89f159241
--- /dev/null
+++ b/docs/design/device-api-server.md
@@ -0,0 +1,695 @@
+# Device API Server - Design & Implementation Plan
+
+> **Status**: Draft  
+> **Author**: NVSentinel Team  
+> **Created**: 2026-01-21  
+
+## Table of Contents
+
+- [Executive Summary](#executive-summary)
+- [Architecture Overview](#architecture-overview)
+- [Design Decisions](#design-decisions)
+- [Implementation Phases](#implementation-phases)
+- [Directory Structure](#directory-structure)
+- [API Design](#api-design)
+- [Observability](#observability)
+- [Deployment](#deployment)
+
+## Related Documents
+
+- [Implementation Tasks](./device-api-server-tasks.md) - Detailed task breakdown
+- [NVML Fallback Provider](./nvml-fallback-provider.md) - Built-in NVML health provider design
+
+---
+
+## Executive Summary
+
+The Device API Server is a **node-local gRPC cache server** deployed as a Kubernetes DaemonSet. It acts as an intermediary between:
+
+- **Providers** (e.g., NVSentinel health monitors) that update GPU device states
+- **Consumers** (e.g., Device Plugins, DRA Drivers) that read device states for scheduling decisions
+
+### Key Requirements
+
+| Requirement | Description |
+|-------------|-------------|
+| Node-local | DaemonSet running on each GPU node |
+| Read-blocking semantics | MUST block reads during provider updates to prevent stale data |
+| Multiple providers | Support multiple health monitors updating different conditions |
+| Multiple consumers | Support multiple readers (device-plugin, DRA driver, etc.) |
+| Kubernetes patterns | klog/v2, structured logging, health probes |
+| Helm-only deployment | No kustomize, pure Helm chart |
+| Observability | Prometheus metrics, alerting rules |
+
+---
+
+## Architecture Overview
+
+```
+┌─────────────────────────────────────────────────────────────────────────────────┐
+│                              Kubernetes Node                                     │
+├─────────────────────────────────────────────────────────────────────────────────┤
+│                                                                                  │
+│  ┌──────────────────────┐                     ┌──────────────────────────────┐  │
+│  │     NVSentinel       │                     │    Device Plugin / DRA       │  │
+│  │   (Health Monitor)   │                     │         Driver               │  │
+│  │      [Provider]      │                     │        [Consumer]            │  │
+│  └──────────┬───────────┘                     └──────────────┬───────────────┘  │
+│             │                                                 │                  │
+│             │ UpdateGpuStatus()                               │ GetGpu()         │
+│             │ (gRPC)                                          │ ListGpus()       │
+│             │                                                 │ WatchGpus()      │
+│             ▼                                                 ▼                  │
+│  ┌──────────────────────────────────────────────────────────────────────────┐   │
+│  │                        Device API Server (DaemonSet)                      │   │
+│  │  ┌────────────────────────────────────────────────────────────────────┐  │   │
+│  │  │                         gRPC Server                                 │  │   │
+│  │  │  ┌────────────────────────────────────────────────────────────┐   │  │   │
+│  │  │  │                  GpuService (Unified)                      │   │  │   │
+│  │  │  │   Write: CreateGpu, UpdateGpu, UpdateGpuStatus, DeleteGpu  │   │  │   │
+│  │  │  │   Read:  GetGpu, ListGpus, WatchGpus                       │   │  │   │
+│  │  │  └────────────────────────────────┬───────────────────────────┘   │  │   │
+│  │  │                                    │                               │  │   │
+│  │  │                                    ▼                               │  │   │
+│  │  │  ┌─────────────────────────────────────────────────────────────┐  │  │   │
+│  │  │  │                    Cache Layer                               │  │  │   │
+│  │  │  │  ┌───────────────────────────────────────────────────────┐  │  │  │   │
+│  │  │  │  │              sync.RWMutex (Writer-Preference)         │  │  │  │   │
+│  │  │  │  │                                                       │  │  │   │   │
+│  │  │  │  │   Write Lock() ──────────► Blocks ALL new RLock()     │  │  │  │   │
+│  │  │  │  │                            until write completes      │  │  │  │   │
+│  │  │  │  │                                                       │  │  │  │   │
+│  │  │  │  │   This ensures consumers NEVER read stale data when   │  │  │  │   │
+│  │  │  │  │   a provider is updating (healthy → unhealthy)        │  │  │  │   │
+│  │  │  │  └───────────────────────────────────────────────────────┘  │  │  │   │
+│  │  │  │                                                              │  │  │   │
+│  │  │  │  ┌───────────────────────────────────────────────────────┐  │  │  │   │
+│  │  │  │  │              map[string]*Gpu (In-Memory Store)        │  │  │  │   │
+│  │  │  │  └───────────────────────────────────────────────────────┘  │  │  │   │
+│  │  │  └─────────────────────────────────────────────────────────────┘  │  │   │
+│  │  │                                                                    │  │   │
+│  │  │  ┌─────────────────────────────────────────────────────────────┐  │  │   │
+│  │  │  │                    Watch Broadcaster                         │  │  │   │
+│  │  │  │  Notifies all WatchGpus() streams on state changes          │  │  │   │
+│  │  │  └─────────────────────────────────────────────────────────────┘  │  │   │
+│  │  └────────────────────────────────────────────────────────────────────┘  │   │
+│  │                                                                           │   │
+│  │  ┌─────────────┐  ┌─────────────┐  ┌─────────────────────────────────┐   │   │
+│  │  │ Health      │  │ Metrics     │  │ Unix Socket                     │   │   │
+│  │  │ :8081       │  │ :9090       │  │ /var/run/device-api/device.sock │   │   │
+│  │  │ /healthz    │  │ /metrics    │  │ (node-local gRPC)               │   │   │
+│  │  │ /readyz     │  │             │  │                                 │   │   │
+│  │  └─────────────┘  └─────────────┘  └─────────────────────────────────┘   │   │
+│  └──────────────────────────────────────────────────────────────────────────┘   │
+│                                                                                  │
+└─────────────────────────────────────────────────────────────────────────────────┘
+```
+
+### Data Flow: Read-Blocking Semantics
+
+```
+Timeline ──────────────────────────────────────────────────────────────────────────►
+
+Provider (NVSentinel)           Cache (RWMutex)              Consumer (Device Plugin)
+        │                              │                              │
+        │                              │◄──── RLock() ────────────────┤ GetGpu()
+        │                              │      (allowed)               │
+        │                              │──────────────────────────────►│ Returns data
+        │                              │      RUnlock()               │
+        │                              │                              │
+        │──── UpdateGpuStatus() ──────►│                              │
+        │     Lock() requested         │                              │
+        │                              │                              │
+        │                              │◄──── RLock() ────────────────┤ GetGpu()
+        │                              │      BLOCKED ⛔               │ (waits)
+        │                              │                              │
+        │◄──── Lock() acquired ────────│                              │
+        │      (write in progress)     │                              │
+        │                              │                              │
+        │──── Update complete ────────►│                              │
+        │      Unlock()                │                              │
+        │                              │                              │
+        │                              │──── RLock() allowed ─────────►│
+        │                              │     (fresh data)             │
+        │                              │                              │
+
+⚠️  CRITICAL: Consumer NEVER reads stale "healthy" state when provider
+    is updating to "unhealthy". The RWMutex writer-preference ensures
+    new readers block once a write is pending.
+```
+
+---
+
+## Design Decisions
+
+### D1: Read-Blocking vs Eventually Consistent
+
+| Option | Pros | Cons | Decision |
+|--------|------|------|----------|
+| **sync.RWMutex (writer-preference)** | Prevents stale reads; simple; Go-native | Readers blocked during writes | ✅ **Selected** |
+| atomic.Value + copy-on-write | Never blocks readers | Readers may see stale data during update | ❌ Rejected |
+| sync.Map | Good for read-heavy | No blocking semantics; may read stale | ❌ Rejected |
+
+**Rationale**: The requirement explicitly states "MUST block reads, preventing false positives when a node 'was' healthy, and the next state is unhealthy." This mandates write-blocking reads.
+
+### D2: Transport Protocol
+
+| Option | Pros | Cons | Decision |
+|--------|------|------|----------|
+| **Unix Socket** | Node-local only; no network exposure; fast | Pod must mount socket path | ✅ **Primary** |
+| TCP localhost | Easy client setup | Requires port allocation | ✅ **Secondary** |
+| hostNetwork + TCP | Accessible from host | Security risk | ❌ Rejected |
+
+**Rationale**: Unix socket provides security isolation and performance for node-local communication. TCP fallback for flexibility.
+
+### D3: Provider Registration Model
+
+| Option | Pros | Cons | Decision |
+|--------|------|------|----------|
+| **Implicit (any caller can update)** | Simple; stateless server | No provider identity tracking | ✅ **Phase 1** |
+| Explicit registration | Track providers; detect failures | More complexity | 🔮 **Phase 2** |
+
+### D4: Logging Framework
+
+| Option | Pros | Cons | Decision |
+|--------|------|------|----------|
+| **klog/v2** | Kubernetes native; contextual logging; JSON format | Slightly verbose API | ✅ **Selected** |
+| zap | Fast; popular | Not Kubernetes native | ❌ Rejected |
+| logr | Interface-based | Needs backend anyway | Used via klog |
+
+---
+
+## Implementation Phases
+
+### Phase 1: Core Server Foundation
+
+**Goal**: Minimal viable gRPC server with cache and blocking semantics.
+
+| Task ID | Task | Description | Estimate |
+|---------|------|-------------|----------|
+| P1.1 | Project scaffolding | Create `cmd/device-api-server/`, `internal/` structure | S |
+| P1.2 | Proto extensions | Add provider-side RPCs (UpdateGpuStatus, RegisterGpu, UnregisterGpu) | M |
+| P1.3 | Cache implementation | Thread-safe cache with RWMutex, writer-preference blocking | M |
+| P1.4 | Consumer gRPC service | Implement GetGpu, ListGpus, WatchGpus (read path) | M |
+| P1.5 | Provider gRPC service | Implement UpdateGpuStatus, RegisterGpu, UnregisterGpu (write path) | M |
+| P1.6 | Watch broadcaster | Fan-out changes to all active WatchGpus streams | M |
+| P1.7 | Graceful shutdown | SIGTERM handling, drain connections, health status | S |
+| P1.8 | Unit tests | Cache tests, service tests, blocking behavior tests | L |
+
+**Deliverables**:
+- Working gRPC server binary
+- Consumer and Provider services
+- Basic health endpoint
+
+---
+
+### Phase 2: Kubernetes Integration
+
+**Goal**: Production-ready DaemonSet with proper k8s integration.
+
+| Task ID | Task | Description | Estimate |
+|---------|------|-------------|----------|
+| P2.1 | klog/v2 integration | Structured logging, contextual loggers, log levels | M |
+| P2.2 | Health probes | gRPC health protocol, HTTP /healthz /readyz endpoints | M |
+| P2.3 | Configuration | Flags, environment variables, config validation | S |
+| P2.4 | Unix socket support | Listen on configurable socket path | S |
+| P2.5 | Signal handling | Proper SIGTERM/SIGINT handling per k8s lifecycle | S |
+| P2.6 | Integration tests | Test with mock providers/consumers | L |
+
+**Deliverables**:
+- Kubernetes-ready binary
+- Health endpoints
+- Configurable via flags/env
+
+---
+
+### Phase 3: Observability
+
+**Goal**: Full observability stack with metrics and alerts.
+
+| Task ID | Task | Description | Estimate |
+|---------|------|-------------|----------|
+| P3.1 | Prometheus metrics | Request counts, latencies, cache stats, connection counts | M |
+| P3.2 | gRPC interceptors | grpc-prometheus interceptors for all RPCs | M |
+| P3.3 | Custom metrics | `device_api_server_gpus_total`, `_unhealthy`, `_cache_*` | M |
+| P3.4 | Metrics endpoint | HTTP /metrics on separate port | S |
+| P3.5 | Alerting rules | PrometheusRule CRD for critical alerts | M |
+| P3.6 | Grafana dashboard | JSON dashboard for visualization | M |
+
+**Metrics to implement**:
+
+```
+# Server metrics
+device_api_server_info{version="...", go_version="..."}
+device_api_server_up
+
+# Cache metrics  
+device_api_server_cache_gpus_total
+device_api_server_cache_gpus_healthy
+device_api_server_cache_gpus_unhealthy
+device_api_server_cache_updates_total{provider="..."}
+device_api_server_cache_lock_wait_seconds_bucket
+
+# gRPC metrics (via interceptor)
+grpc_server_started_total{grpc_service, grpc_method}
+grpc_server_handled_total{grpc_service, grpc_method, grpc_code}
+grpc_server_handling_seconds_bucket{grpc_service, grpc_method}
+
+# Watch metrics
+device_api_server_watch_streams_active
+device_api_server_watch_events_total{type="ADDED|MODIFIED|DELETED"}
+```
+
+**Alerts**:
+
+```yaml
+- alert: DeviceAPIServerDown
+  expr: up{job="device-api-server"} == 0
+  for: 5m
+  
+- alert: DeviceAPIServerHighLatency  
+  expr: histogram_quantile(0.99, grpc_server_handling_seconds_bucket) > 0.5
+  for: 5m
+  
+- alert: DeviceAPIServerUnhealthyGPUs
+  expr: device_api_server_cache_gpus_unhealthy > 0
+  for: 1m
+```
+
+---
+
+### Phase 4: Helm Chart
+
+**Goal**: Production-ready Helm chart with all configurations.
+
+| Task ID | Task | Description | Estimate |
+|---------|------|-------------|----------|
+| P4.1 | Chart scaffolding | `charts/device-api-server/` structure | S |
+| P4.2 | DaemonSet template | Node selector, tolerations, resource limits | M |
+| P4.3 | RBAC templates | ServiceAccount, Role, RoleBinding | M |
+| P4.4 | ConfigMap/Secret | Server configuration, TLS certs | M |
+| P4.5 | Service templates | Headless service, metrics service | S |
+| P4.6 | PrometheusRule | Alerting rules as k8s resource | M |
+| P4.7 | ServiceMonitor | Prometheus scrape configuration | S |
+| P4.8 | Values schema | JSON schema for values validation | M |
+| P4.9 | Chart tests | Helm test hooks | M |
+| P4.10 | Documentation | README, NOTES.txt, examples | M |
+
+**Chart Structure**:
+
+```
+charts/device-api-server/
+├── Chart.yaml
+├── values.yaml
+├── values.schema.json
+├── README.md
+├── templates/
+│   ├── _helpers.tpl
+│   ├── daemonset.yaml
+│   ├── serviceaccount.yaml
+│   ├── role.yaml
+│   ├── rolebinding.yaml
+│   ├── configmap.yaml
+│   ├── service.yaml
+│   ├── service-metrics.yaml
+│   ├── servicemonitor.yaml
+│   ├── prometheusrule.yaml
+│   ├── poddisruptionbudget.yaml
+│   └── NOTES.txt
+└── tests/
+    └── test-connection.yaml
+```
+
+---
+
+### Phase 5: Documentation & Polish
+
+**Goal**: Comprehensive documentation and production hardening.
+
+| Task ID | Task | Description | Estimate |
+|---------|------|-------------|----------|
+| P5.1 | Architecture docs | Design document, diagrams | M |
+| P5.2 | API reference | Proto documentation, examples | M |
+| P5.3 | Operations guide | Deployment, troubleshooting, runbooks | L |
+| P5.4 | Developer guide | Contributing, local development | M |
+| P5.5 | Security hardening | TLS, authentication review | M |
+| P5.6 | Performance testing | Benchmark under load | L |
+| P5.7 | CI/CD pipeline | GitHub Actions for build, test, release | M |
+
+---
+
+## Directory Structure
+
+Following the [kubernetes-sigs/node-feature-discovery](https://github.com/kubernetes-sigs/node-feature-discovery) pattern
+where the `api/` is a standalone module and `pkg/` contains public library code:
+
+```
+NVSentinel/
+├── api/                                   # STANDALONE API MODULE (own go.mod)
+│   ├── gen/go/device/v1alpha1/            # Generated Go code
+│   │   ├── gpu.pb.go
+│   │   └── gpu_grpc.pb.go
+│   ├── proto/device/v1alpha1/             # Proto definitions
+│   │   └── gpu.proto                      # Unified GpuService (CRUD operations)
+│   ├── go.mod                             # module github.com/nvidia/nvsentinel/api
+│   ├── go.sum
+│   └── Makefile
+├── cmd/                                   # Command entry points (thin)
+│   └── device-api-server/
+│       └── main.go                        # Server entrypoint only
+├── pkg/                                   # PUBLIC LIBRARY CODE (importable)
+│   ├── deviceapiserver/                   # Device API Server implementation
+│   │   ├── cache/                         # Thread-safe GPU cache
+│   │   │   ├── cache.go
+│   │   │   ├── cache_test.go
+│   │   │   └── broadcaster.go
+│   │   ├── service/                       # gRPC service implementation
+│   │   │   └── gpu_service.go             # GpuService (unified read/write)
+│   │   ├── nvml/                          # NVML provider (uses gRPC client)
+│   │   │   ├── provider.go
+│   │   │   ├── enumerator.go
+│   │   │   └── health_monitor.go
+│   │   ├── metrics/                       # Prometheus metrics
+│   │   └── health/                        # Health check handlers
+│   ├── version/                           # Version information
+│   │   └── version.go
+│   └── signals/                           # Signal handling utilities
+├── charts/                                # Helm charts
+│   └── device-api-server/
+│       ├── Chart.yaml
+│       ├── values.yaml
+│       └── templates/
+├── docs/
+│   ├── design/
+│   ├── api/
+│   └── operations/
+├── hack/                                  # Build/development scripts
+├── test/                                  # E2E tests
+├── go.mod                                 # Root module with replace directive
+├── go.sum
+└── Makefile
+```
+
+**Key Layout Decisions:**
+
+| Directory | Purpose | Importable |
+|-----------|---------|------------|
+| `api/` | Standalone API module for versioning | Yes (own module) |
+| `pkg/` | Public library code | Yes |
+| `cmd/` | Thin entry points | No |
+| `charts/` | Helm deployment | N/A |
+
+Root `go.mod` uses: `replace github.com/nvidia/nvsentinel/api => ./api`
+
+---
+
+## API Design
+
+### Unified GpuService
+
+Following Kubernetes API conventions, the API is consolidated into a single `GpuService` with standard CRUD methods:
+
+```protobuf
+// GpuService provides a unified API for managing GPU resources.
+//
+// Read operations (Get, List, Watch) are intended for consumers.
+// Write operations (Create, Update, UpdateStatus, Delete) are intended for providers.
+service GpuService {
+  // Read Operations
+  rpc GetGpu(GetGpuRequest) returns (Gpu);
+  rpc ListGpus(ListGpusRequest) returns (ListGpusResponse);
+  rpc WatchGpus(WatchGpusRequest) returns (stream WatchGpusResponse);
+
+  // Write Operations
+  rpc CreateGpu(CreateGpuRequest) returns (CreateGpuResponse);
+  rpc UpdateGpu(UpdateGpuRequest) returns (Gpu);
+  rpc UpdateGpuStatus(UpdateGpuStatusRequest) returns (Gpu);
+  rpc DeleteGpu(DeleteGpuRequest) returns (google.protobuf.Empty);
+}
+
+message CreateGpuRequest {
+  Gpu gpu = 1;  // metadata.name and spec.uuid required
+}
+
+message CreateGpuResponse {
+  Gpu gpu = 1;
+  bool created = 2;  // true if new, false if already existed
+}
+
+message UpdateGpuRequest {
+  Gpu gpu = 1;  // includes resource_version for optimistic concurrency
+}
+
+message UpdateGpuStatusRequest {
+  string name = 1;
+  GpuStatus status = 2;
+  int64 resource_version = 3;  // optional, for conflict detection
+}
+
+message DeleteGpuRequest {
+  string name = 1;
+}
+```
+
+**Design Rationale**:
+- Single service simplifies API surface and tooling compatibility
+- Standard CRUD verbs enable better integration with Kubernetes patterns
+- `UpdateGpuStatus` follows the Kubernetes subresource pattern
+- Optimistic concurrency via `resource_version` prevents lost updates
+
+---
+
+## Observability
+
+### Metrics Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                    Device API Server                             │
+│                                                                  │
+│  ┌─────────────────────────────────────────────────────────┐    │
+│  │                   gRPC Interceptors                      │    │
+│  │  grpc_server_started_total                               │    │
+│  │  grpc_server_handled_total                               │    │
+│  │  grpc_server_handling_seconds_bucket                     │    │
+│  └─────────────────────────────────────────────────────────┘    │
+│                                                                  │
+│  ┌─────────────────────────────────────────────────────────┐    │
+│  │                   Custom Metrics                         │    │
+│  │  device_api_server_cache_gpus_total                      │    │
+│  │  device_api_server_cache_lock_contention_total           │    │
+│  │  device_api_server_watch_streams_active                  │    │
+│  └─────────────────────────────────────────────────────────┘    │
+│                                                                  │
+│  ┌─────────────────────────────────────────────────────────┐    │
+│  │                   Go Runtime Metrics                     │    │
+│  │  go_goroutines                                           │    │
+│  │  go_memstats_alloc_bytes                                 │    │
+│  │  process_cpu_seconds_total                               │    │
+│  └─────────────────────────────────────────────────────────┘    │
+│                              │                                   │
+│                              ▼                                   │
+│                    :9090/metrics                                 │
+│                              │                                   │
+└──────────────────────────────┼───────────────────────────────────┘
+                               │
+                               ▼
+┌─────────────────────────────────────────────────────────────────┐
+│                       Prometheus                                 │
+│                                                                  │
+│  ServiceMonitor ──► scrape_configs                               │
+│                                                                  │
+│  PrometheusRule ──► alerting_rules                               │
+│                                                                  │
+└─────────────────────────────────────────────────────────────────┘
+                               │
+                               ▼
+┌─────────────────────────────────────────────────────────────────┐
+│                        Grafana                                   │
+│                                                                  │
+│  Dashboard: Device API Server Overview                           │
+│  - Request rate / error rate                                     │
+│  - P50/P99 latency                                               │
+│  - GPU health summary                                            │
+│  - Cache statistics                                              │
+│  - Active watch streams                                          │
+│                                                                  │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Deployment
+
+### Helm Values (Key Configuration)
+
+```yaml
+# values.yaml
+replicaCount: 1  # DaemonSet ignores this, but kept for consistency
+
+image:
+  repository: ghcr.io/nvidia/device-api-server
+  tag: ""  # Defaults to Chart appVersion
+  pullPolicy: IfNotPresent
+
+# Server configuration
+server:
+  # gRPC listen address (TCP) - localhost only by default for security
+  # Set to ":50051" to bind to all interfaces (WARNING: unauthenticated API)
+  grpcAddress: "127.0.0.1:50051"
+  # Unix socket path (primary for node-local)
+  unixSocket: /var/run/device-api/device.sock
+  # Health probe port
+  healthPort: 8081
+  # Metrics port
+  metricsPort: 9090
+
+# Logging
+logging:
+  # Log level (0=info, higher=more verbose)
+  verbosity: 0
+  # Output format: text, json
+  format: json
+
+# Node selection
+nodeSelector:
+  nvidia.com/gpu.present: "true"
+
+tolerations:
+  - key: nvidia.com/gpu
+    operator: Exists
+    effect: NoSchedule
+
+resources:
+  requests:
+    cpu: 50m
+    memory: 64Mi
+  limits:
+    cpu: 200m
+    memory: 256Mi
+
+# Security
+securityContext:
+  runAsNonRoot: true
+  runAsUser: 65534
+  readOnlyRootFilesystem: true
+  allowPrivilegeEscalation: false
+
+# RBAC
+serviceAccount:
+  create: true
+  name: ""
+  automountServiceAccountToken: false
+
+rbac:
+  create: true
+
+# Observability
+metrics:
+  enabled: true
+  serviceMonitor:
+    enabled: true
+    interval: 30s
+    scrapeTimeout: 10s
+  prometheusRule:
+    enabled: true
+
+# Health probes
+probes:
+  liveness:
+    initialDelaySeconds: 5
+    periodSeconds: 10
+  readiness:
+    initialDelaySeconds: 5
+    periodSeconds: 10
+```
+
+### DaemonSet Topology
+
+```
+┌─────────────────────────────────────────────────────────────────────────────────┐
+│                           Kubernetes Cluster                                     │
+├─────────────────────────────────────────────────────────────────────────────────┤
+│                                                                                  │
+│  ┌───────────────────────┐  ┌───────────────────────┐  ┌───────────────────────┐│
+│  │      GPU Node 1       │  │      GPU Node 2       │  │      GPU Node 3       ││
+│  │                       │  │                       │  │                       ││
+│  │  ┌─────────────────┐  │  │  ┌─────────────────┐  │  │  ┌─────────────────┐  ││
+│  │  │ device-api-     │  │  │  │ device-api-     │  │  │  │ device-api-     │  ││
+│  │  │ server pod      │  │  │  │ server pod      │  │  │  │ server pod      │  ││
+│  │  │                 │  │  │  │                 │  │  │  │                 │  ││
+│  │  │ GPU-0: Healthy  │  │  │  │ GPU-0: Healthy  │  │  │  │ GPU-0: Unhealthy│  ││
+│  │  │ GPU-1: Healthy  │  │  │  │ GPU-1: Healthy  │  │  │  │ GPU-1: Healthy  │  ││
+│  │  │ GPU-2: Healthy  │  │  │  │                 │  │  │  │ GPU-2: Healthy  │  ││
+│  │  │ GPU-3: Healthy  │  │  │  │                 │  │  │  │ GPU-3: Healthy  │  ││
+│  │  └─────────────────┘  │  │  └─────────────────┘  │  │  └─────────────────┘  ││
+│  │                       │  │                       │  │                       ││
+│  │  /var/run/device-api/ │  │  /var/run/device-api/ │  │  /var/run/device-api/ ││
+│  │    device.sock        │  │    device.sock        │  │    device.sock        ││
+│  │                       │  │                       │  │                       ││
+│  └───────────────────────┘  └───────────────────────┘  └───────────────────────┘│
+│                                                                                  │
+│  ┌───────────────────────┐                                                       │
+│  │   Non-GPU Node        │  (DaemonSet does NOT schedule here due to            │
+│  │   (No GPU)            │   nodeSelector: nvidia.com/gpu.present=true)         │
+│  └───────────────────────┘                                                       │
+│                                                                                  │
+└─────────────────────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Risk Assessment
+
+| Risk | Impact | Likelihood | Mitigation |
+|------|--------|------------|------------|
+| Cache corruption on concurrent writes | High | Low | RWMutex provides exclusivity |
+| Watch stream memory leak | Medium | Medium | Bounded channels, timeouts |
+| Provider not updating (stale data) | High | Medium | Health checks, provider heartbeat (Phase 2) |
+| Socket permission issues | Medium | Medium | Init container for socket dir |
+| High lock contention | Medium | Low | Metrics to detect, sharding if needed |
+
+---
+
+## Success Criteria
+
+### Phase 1
+- [ ] Server starts and accepts gRPC connections
+- [ ] Provider can register/update/unregister GPUs
+- [ ] Consumer can Get/List/Watch GPUs
+- [ ] Read-blocking verified under concurrent load
+
+### Phase 2
+- [ ] Structured logs with klog/v2
+- [ ] Health probes pass in Kubernetes
+- [ ] Unix socket communication works
+
+### Phase 3
+- [ ] Prometheus metrics exposed
+- [ ] Grafana dashboard visualizes key metrics
+- [ ] Alerts fire correctly in test scenarios
+
+### Phase 4
+- [ ] `helm install` works out of box
+- [ ] DaemonSet schedules on GPU nodes only
+- [ ] RBAC properly scoped
+
+### Phase 5
+- [ ] Documentation complete
+- [ ] CI/CD pipeline green
+- [ ] Performance benchmarks pass
+
+---
+
+## Appendix: Research References
+
+1. **Kubernetes DaemonSet gRPC Best Practices** - Health probes, graceful shutdown, load balancing
+2. **Go sync.RWMutex** - Writer-preference semantics, blocking behavior
+3. **klog/v2** - Structured logging, contextual logging, JSON format
+4. **Helm Chart Best Practices** - RBAC, ServiceAccount, DaemonSet templates
+5. **grpc-prometheus** - Metrics interceptors, histogram configuration
+
+---
+
+*Document version: 1.0*  
+*Last updated: 2026-01-21*
diff --git a/docs/operations/device-api-server.md b/docs/operations/device-api-server.md
new file mode 100644
index 000000000..96df4804a
--- /dev/null
+++ b/docs/operations/device-api-server.md
@@ -0,0 +1,358 @@
+# Device API Server - Operations Guide
+
+This guide covers deployment, configuration, monitoring, and troubleshooting of the Device API Server.
+
+## Architecture Overview
+
+The Device API Server is a pure Go gRPC server with no hardware dependencies.
+GPU enumeration and health monitoring is provided by external providers (sidecars).
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                        GPU Node                              │
+│  ┌─────────────────────────────────────────────────────────┐│
+│  │                Device API Server (DaemonSet)            ││
+│  │  ┌─────────────────────────────────────────────────┐   ││
+│  │  │               GpuService (unified)              │   ││
+│  │  │  Read:  GetGpu, ListGpus, WatchGpus             │   ││
+│  │  │  Write: CreateGpu, UpdateGpuStatus, DeleteGpu   │   ││
+│  │  └────────────────────┬────────────────────────────┘   ││
+│  │                       │                                 ││
+│  │                       ▼                                 ││
+│  │  ┌─────────────────────────────────────────────────────┐││
+│  │  │                  GPU Cache (RWMutex)                │││
+│  │  │  - Read-blocking during writes                      │││
+│  │  │  - Watch event broadcasting                         │││
+│  │  └─────────────────────────────────────────────────────┘││
+│  └─────────────────────────────────────────────────────────┘│
+│                                                              │
+│  Providers (gRPC clients):                                   │
+│  - nvml-provider sidecar (GPU enumeration, XID monitoring)   │
+│  - Custom providers (CreateGpu, UpdateGpuStatus)             │
+│                                                              │
+│  Consumers (gRPC clients):                                   │
+│  - Device plugins (GetGpu, ListGpus, WatchGpus)              │
+│  - DRA drivers (GetGpu, ListGpus, WatchGpus)                 │
+└─────────────────────────────────────────────────────────────┘
+```
+
+## Deployment
+
+### Prerequisites
+
+- Kubernetes 1.25+
+- Helm 3.0+
+- GPU nodes with label `nvidia.com/gpu.present=true`
+- (Optional) Prometheus Operator for monitoring
+
+### Installation
+
+**Basic Installation**:
+
+```bash
+helm install device-api-server ./deployments/helm/device-api-server \
+  --namespace device-api --create-namespace
+```
+
+**With Prometheus Monitoring**:
+
+```bash
+helm install device-api-server ./deployments/helm/device-api-server \
+  --namespace device-api --create-namespace \
+  --set metrics.serviceMonitor.enabled=true \
+  --set metrics.prometheusRule.enabled=true
+```
+
+### Verify Installation
+
+```bash
+# Check DaemonSet status
+kubectl get daemonset -n device-api
+
+# Check pods are running on GPU nodes
+kubectl get pods -n device-api -o wide
+
+# Check logs
+kubectl logs -n device-api -l app.kubernetes.io/name=device-api-server
+```
+
+---
+
+## Configuration
+
+### Command-Line Flags
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--bind-address` | `unix:///var/run/nvidia-device-api/device-api.sock` | Unix socket URI for the gRPC device API |
+| `--health-probe-bind-address` | `:50051` | TCP address for gRPC health and reflection |
+| `--metrics-bind-address` | `:9090` | TCP address for HTTP Prometheus metrics |
+| `--shutdown-grace-period` | `25s` | Maximum time to wait for graceful shutdown |
+| `--hostname-override` | (auto-detected) | Override the node hostname (must be a valid DNS subdomain) |
+| `-v` | `0` | Log verbosity level (klog) |
+
+### Helm Values
+
+See [values.yaml](../../deployments/helm/device-api-server/values.yaml) for the complete reference.
+
+Key configuration sections:
+
+```yaml
+# Server configuration
+server:
+  unixSocket: /var/run/device-api/device.sock
+  healthPort: 8081
+  metricsPort: 9090
+  shutdownGracePeriod: 25
+  shutdownDelay: 5
+
+# Node scheduling
+nodeSelector:
+  nvidia.com/gpu.present: "true"
+
+# Resources
+resources:
+  requests:
+    cpu: 50m
+    memory: 64Mi
+  limits:
+    cpu: 200m
+    memory: 256Mi
+```
+
+---
+
+## GPU Providers
+
+The Device API Server is a pure Go gRPC server with no hardware dependencies.
+GPU enumeration and health monitoring is provided by external providers that connect
+as gRPC clients:
+
+- **nvml-provider sidecar** - Recommended NVML-based provider for GPU enumeration and XID monitoring
+- **Custom providers** - Any gRPC client can register GPUs via `CreateGpu` and update health via `UpdateGpuStatus`
+
+See the [nvml-provider demo](../../demos/nvml-sidecar-demo.sh) for an example sidecar deployment.
+
+---
+
+## Monitoring
+
+### Health Endpoints
+
+| Endpoint | Port | Description |
+|----------|------|-------------|
+| `/healthz` | 8081 | Liveness probe - server is running |
+| `/readyz` | 8081 | Readiness probe - server is accepting traffic |
+| `/metrics` | 9090 | Prometheus metrics |
+
+### Prometheus Metrics
+
+**Server Metrics**:
+
+| Metric | Type | Description |
+|--------|------|-------------|
+| `device_api_server_info` | Gauge | Server information (version, go_version) |
+
+**Cache Metrics**:
+
+| Metric | Type | Description |
+|--------|------|-------------|
+| `device_api_server_cache_gpus_total` | Gauge | Total GPUs in cache |
+| `device_api_server_cache_gpus_healthy` | Gauge | Healthy GPUs |
+| `device_api_server_cache_gpus_unhealthy` | Gauge | Unhealthy GPUs |
+| `device_api_server_cache_gpus_unknown` | Gauge | GPUs with unknown status |
+| `device_api_server_cache_updates_total` | Counter | Cache update operations |
+| `device_api_server_cache_resource_version` | Gauge | Current cache version |
+
+**Watch Metrics**:
+
+| Metric | Type | Description |
+|--------|------|-------------|
+| `device_api_server_watch_streams_active` | Gauge | Active watch streams |
+| `device_api_server_watch_events_total` | Counter | Watch events sent |
+
+### Alerting Rules
+
+When `metrics.prometheusRule.enabled=true`, the following alerts are created:
+
+| Alert | Severity | Condition |
+|-------|----------|-----------|
+| `DeviceAPIServerDown` | Critical | Server unreachable for 5m |
+| `DeviceAPIServerHighLatency` | Warning | P99 latency > 500ms |
+| `DeviceAPIServerHighErrorRate` | Warning | Error rate > 10% |
+| `DeviceAPIServerUnhealthyGPUs` | Warning | Unhealthy GPUs > 0 |
+| `DeviceAPIServerNoGPUs` | Warning | No GPUs for 10m |
+| `DeviceAPIServerHighMemory` | Warning | Memory > 512MB |
+
+### Grafana Dashboard
+
+Example PromQL queries for dashboards:
+
+```promql
+# GPU health overview
+device_api_server_cache_gpus_healthy / device_api_server_cache_gpus_total * 100
+
+# Watch stream activity
+rate(device_api_server_watch_events_total[5m])
+
+# Cache update rate
+rate(device_api_server_cache_updates_total[5m])
+```
+
+---
+
+## Troubleshooting
+
+### Pod Not Scheduling
+
+**Symptom**: DaemonSet shows 0/N pods ready
+
+**Check**:
+
+```bash
+# Verify node labels
+kubectl get nodes --show-labels | grep gpu
+
+# Check DaemonSet events
+kubectl describe daemonset -n device-api device-api-server
+```
+
+**Solution**: Ensure nodes have `nvidia.com/gpu.present=true` label or override `nodeSelector`.
+
+### Permission Denied on Unix Socket
+
+**Symptom**: Clients cannot connect to Unix socket
+
+**Check**:
+
+```bash
+# Check socket permissions on node
+ls -la /var/run/device-api/
+```
+
+**Solution**: Verify `securityContext` allows socket creation, or adjust `runAsUser`.
+
+### GPUs Not Appearing
+
+**Symptom**: `ListGpus` returns empty
+
+**Check**:
+
+```bash
+# Check for GPU enumeration errors
+kubectl logs -n device-api <pod> | grep -i error
+
+# Check if provider sidecar is running
+kubectl get pods -n device-api -o wide
+```
+
+**Solutions**:
+1. Deploy the nvml-provider sidecar: see [nvml-provider demo](../../demos/nvml-sidecar-demo.sh)
+2. Deploy an external health provider
+3. Verify the provider can connect to the Device API Server
+
+### High Memory Usage
+
+**Symptom**: Pod OOMKilled or memory alerts firing
+
+**Check**:
+
+```bash
+# Check current memory usage
+kubectl top pods -n device-api
+
+# Check watch stream count
+curl -s http://<pod-ip>:9090/metrics | grep watch_streams
+```
+
+**Solutions**:
+1. Increase memory limits
+2. Investigate clients creating excessive watch streams
+3. Check for memory leaks in logs
+
+### Watch Stream Disconnections
+
+**Symptom**: Consumers report frequent reconnections
+
+**Check**:
+
+```bash
+# Check network policy
+kubectl get networkpolicy -n device-api
+
+# Check for errors in logs
+kubectl logs -n device-api <pod> | grep -i "stream\|watch"
+```
+
+**Solutions**:
+1. Ensure network policies allow intra-node traffic
+2. Check client timeout settings
+3. Verify server is not overloaded
+
+---
+
+## Graceful Shutdown
+
+The server implements graceful shutdown:
+
+1. **PreStop Hook**: Sleeps for `shutdownDelay` seconds
+2. **Signal Handling**: Catches SIGTERM/SIGINT
+3. **Drain Period**: Stops accepting new connections
+4. **In-Flight Completion**: Waits for active requests (up to `shutdownTimeout`)
+5. **Resource Cleanup**: Closes connections
+
+**Timeline**:
+
+```
+SIGTERM → [shutdownDelay] → Stop listeners → [shutdownGracePeriod] → Force close
+```
+
+Configure in Helm:
+
+```yaml
+server:
+  shutdownGracePeriod: 25  # Max wait for in-flight requests (seconds)
+  shutdownDelay: 5         # Pre-shutdown delay for endpoint propagation (seconds)
+```
+
+---
+
+## Security Considerations
+
+### Pod Security
+
+Default security context (non-root, restricted):
+
+```yaml
+securityContext:
+  runAsNonRoot: true
+  runAsUser: 65534
+  runAsGroup: 65534
+  readOnlyRootFilesystem: true
+  allowPrivilegeEscalation: false
+  capabilities:
+    drop:
+      - ALL
+```
+
+### Network Security
+
+> **Warning**: The gRPC API is unauthenticated.
+
+- The gRPC device API binds to a **Unix domain socket** by default (`--bind-address=unix:///var/run/nvidia-device-api/device-api.sock`). This limits access to processes on the same node.
+- The health probe endpoint (`--health-probe-bind-address`) binds to a TCP port for kubelet probes but only serves gRPC health and reflection, not the device API.
+- In multi-tenant or partially untrusted clusters, use a Kubernetes `NetworkPolicy` to restrict access to the health and metrics TCP ports.
+
+### Service Account
+
+- `automountServiceAccountToken: false` by default
+- No Kubernetes API access required
+
+---
+
+## See Also
+
+- [API Reference](../api/device-api-server.md)
+- [Design Document](../design/device-api-server.md)
+- [Helm Chart README](../../deployments/helm/device-api-server/README.md)
+- [NVML Sidecar Demo](../../demos/nvml-sidecar-demo.sh)
diff --git a/examples/fake-client/main_test.go b/examples/fake-client/main_test.go
index c552f566a..bc80953fe 100644
--- a/examples/fake-client/main_test.go
+++ b/examples/fake-client/main_test.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@ package main_test
 
 import (
 	"context"
+	"sync"
 	"testing"
 	"time"
 
@@ -31,6 +32,82 @@ import (
 	"k8s.io/client-go/tools/cache"
 )
 
+// bookmarkWatch wraps a watch.Interface to inject a bookmark event after
+// creation. This is needed because k8s.io/client-go v0.35+ requires bookmark
+// events for the reflector to consider initial sync complete, but the fake
+// client's ObjectTracker doesn't send them automatically.
+type bookmarkWatch struct {
+	watch.Interface
+	bookmarkCh chan watch.Event
+	resultCh   chan watch.Event
+	stopCh     chan struct{}
+	stopOnce   sync.Once
+}
+
+func newBookmarkWatch(w watch.Interface) *bookmarkWatch {
+	bw := &bookmarkWatch{
+		Interface:  w,
+		bookmarkCh: make(chan watch.Event, 1),
+		resultCh:   make(chan watch.Event),
+		stopCh:     make(chan struct{}),
+	}
+
+	// Send initial bookmark to signal list completion.
+	// The bookmark object must be the same type as the expected resource (GPU).
+	bw.bookmarkCh <- watch.Event{
+		Type: watch.Bookmark,
+		Object: &devicev1alpha1.GPU{
+			ObjectMeta: metav1.ObjectMeta{
+				ResourceVersion: "0",
+				Annotations: map[string]string{
+					metav1.InitialEventsAnnotationKey: "true",
+				},
+			},
+		},
+	}
+
+	// Multiplex bookmark and underlying watch events
+	go func() {
+		defer close(bw.resultCh)
+		for {
+			select {
+			case <-bw.stopCh:
+				return
+			case ev, ok := <-bw.bookmarkCh:
+				if ok {
+					select {
+					case bw.resultCh <- ev:
+					case <-bw.stopCh:
+						return
+					}
+				}
+			case ev, ok := <-w.ResultChan():
+				if !ok {
+					return
+				}
+				select {
+				case bw.resultCh <- ev:
+				case <-bw.stopCh:
+					return
+				}
+			}
+		}
+	}()
+
+	return bw
+}
+
+func (bw *bookmarkWatch) ResultChan() <-chan watch.Event {
+	return bw.resultCh
+}
+
+func (bw *bookmarkWatch) Stop() {
+	bw.stopOnce.Do(func() {
+		close(bw.stopCh)
+	})
+	bw.Interface.Stop()
+}
+
 func TestGPUInformerWithFakeClient(t *testing.T) {
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
@@ -47,6 +124,10 @@ func TestGPUInformerWithFakeClient(t *testing.T) {
 	// signal the test when the informer has successfully established its
 	// stream, preventing race conditions where events are injected before
 	// the watcher is ready.
+	//
+	// The reactor also wraps the watch to inject a bookmark event, which is
+	// required by k8s.io/client-go v0.35+ for the reflector to consider the
+	// initial sync complete.
 	client.PrependWatchReactor("*", func(action clienttesting.Action) (handled bool, ret watch.Interface, err error) {
 		watchAction, ok := action.(clienttesting.WatchActionImpl)
 		if !ok {
@@ -58,15 +139,18 @@ func TestGPUInformerWithFakeClient(t *testing.T) {
 		ns := action.GetNamespace()
 
 		// Manually invoke the tracker to create the watch stream.
-		watch, err := client.Tracker().Watch(gvr, ns, opts)
+		w, err := client.Tracker().Watch(gvr, ns, opts)
 		if err != nil {
 			return false, nil, err
 		}
 
+		// Wrap watch to inject initial bookmark event for reflector sync
+		wrappedWatch := newBookmarkWatch(w)
+
 		// Close the channel to notify the test that the Informer is now
 		// listening for events.
 		close(watcherStarted)
-		return true, watch, nil
+		return true, wrappedWatch, nil
 	})
 
 	// Create a factory for the informers.
diff --git a/go.mod b/go.mod
index d1f0ae9d1..23a936b27 100644
--- a/go.mod
+++ b/go.mod
@@ -1,20 +1,20 @@
 module github.com/nvidia/nvsentinel
 
-go 1.25.5
+go 1.25.0
 
 require (
+	github.com/NVIDIA/go-nvml v0.12.9-0
 	github.com/go-logr/logr v1.4.3
 	github.com/go-logr/stdr v1.2.2
 	github.com/google/go-cmp v0.7.0
 	github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1
 	github.com/k3s-io/kine v1.14.2
 	github.com/prometheus/client_golang v1.23.2
-	github.com/spf13/cobra v1.10.2
-	github.com/spf13/pflag v1.0.9
+	github.com/spf13/pflag v1.0.10
 	go.uber.org/goleak v1.3.0
 	golang.org/x/sync v0.18.0
-	google.golang.org/grpc v1.78.0
-	google.golang.org/protobuf v1.36.11
+	google.golang.org/grpc v1.77.0
+	google.golang.org/protobuf v1.36.10
 	k8s.io/apimachinery v0.35.0
 	k8s.io/apiserver v0.35.0
 	k8s.io/client-go v0.35.0
@@ -26,7 +26,6 @@ require (
 require (
 	cel.dev/expr v0.24.0 // indirect
 	filippo.io/edwards25519 v1.1.0 // indirect
-	github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
 	github.com/NYTimes/gziphandler v1.1.1 // indirect
 	github.com/Rican7/retry v0.3.1 // indirect
 	github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
@@ -75,7 +74,6 @@ require (
 	github.com/mailru/easyjson v0.7.7 // indirect
 	github.com/mattn/go-sqlite3 v1.14.32 // indirect
 	github.com/minio/highwayhash v1.0.3 // indirect
-	github.com/moby/term v0.5.0 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
@@ -93,6 +91,7 @@ require (
 	github.com/shengdoushi/base58 v1.0.0 // indirect
 	github.com/sirupsen/logrus v1.9.3 // indirect
 	github.com/soheilhy/cmux v0.1.5 // indirect
+	github.com/spf13/cobra v1.10.0 // indirect
 	github.com/stoewer/go-strcase v1.3.0 // indirect
 	github.com/tidwall/btree v1.8.1 // indirect
 	github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 // indirect
@@ -128,7 +127,7 @@ require (
 	golang.org/x/text v0.31.0 // indirect
 	golang.org/x/time v0.12.0 // indirect
 	gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
-	google.golang.org/genproto/googleapis/api v0.0.0-20251029180050-ab9386a59fda // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20251022142026-3a174f9686a8 // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20251124214823-79d6a2a48846 // indirect
 	gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
diff --git a/go.sum b/go.sum
index 7c99213db..cb1e0c3fa 100644
--- a/go.sum
+++ b/go.sum
@@ -2,10 +2,10 @@ cel.dev/expr v0.24.0 h1:56OvJKSH3hDGL0ml5uSxZmz3/3Pq4tJ+fb1unVLAFcY=
 cel.dev/expr v0.24.0/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw=
 filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
 filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
-github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
-github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
 github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
 github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
+github.com/NVIDIA/go-nvml v0.12.9-0 h1:e344UK8ZkeMeeLkdQtRhmXRxNf+u532LDZPGMtkdus0=
+github.com/NVIDIA/go-nvml v0.12.9-0/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4=
 github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I=
 github.com/NYTimes/gziphandler v1.1.1/go.mod h1:n/CVRwUEOgIxrgPvAQhUUr9oeUtvrhMomdKFjzJNB0c=
 github.com/Rican7/retry v0.3.1 h1:scY4IbO8swckzoA/11HgBwaZRJEyY9vaNJshcdhp1Mc=
@@ -32,8 +32,6 @@ github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8
 github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
 github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
-github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
-github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -151,8 +149,6 @@ github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuE
 github.com/mattn/go-sqlite3 v1.14.32/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
 github.com/minio/highwayhash v1.0.3 h1:kbnuUMoHYyVl7szWjSxJnxw11k2U709jqFPPmIUyD6Q=
 github.com/minio/highwayhash v1.0.3/go.mod h1:GGYsuwP/fPD6Y9hMiXuapVvlIUEhFhMTh0rxU3ik1LQ=
-github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
-github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@@ -201,10 +197,11 @@ github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ
 github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
 github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js=
 github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0=
-github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU=
-github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4=
-github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY=
-github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/spf13/cobra v1.10.0 h1:a5/WeUlSDCvV5a45ljW2ZFtV0bTDpkfSAj3uqB6Sc+0=
+github.com/spf13/cobra v1.10.0/go.mod h1:9dhySC7dnTtEiqzmqfkLj47BslqLCUPMXjG2lj/NgoE=
+github.com/spf13/pflag v1.0.8/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
+github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs=
 github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
@@ -309,7 +306,6 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
@@ -338,14 +334,14 @@ gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw
 gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY=
 gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
 gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
-google.golang.org/genproto/googleapis/api v0.0.0-20251029180050-ab9386a59fda h1:+2XxjfsAu6vqFxwGBRcHiMaDCuZiqXGDUDVWVtrFAnE=
-google.golang.org/genproto/googleapis/api v0.0.0-20251029180050-ab9386a59fda/go.mod h1:fDMmzKV90WSg1NbozdqrE64fkuTv6mlq2zxo9ad+3yo=
+google.golang.org/genproto/googleapis/api v0.0.0-20251022142026-3a174f9686a8 h1:mepRgnBZa07I4TRuomDE4sTIYieg/osKmzIf4USdWS4=
+google.golang.org/genproto/googleapis/api v0.0.0-20251022142026-3a174f9686a8/go.mod h1:fDMmzKV90WSg1NbozdqrE64fkuTv6mlq2zxo9ad+3yo=
 google.golang.org/genproto/googleapis/rpc v0.0.0-20251124214823-79d6a2a48846 h1:Wgl1rcDNThT+Zn47YyCXOXyX/COgMTIdhJ717F0l4xk=
 google.golang.org/genproto/googleapis/rpc v0.0.0-20251124214823-79d6a2a48846/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk=
-google.golang.org/grpc v1.78.0 h1:K1XZG/yGDJnzMdd/uZHAkVqJE+xIDOcmdSFZkBUicNc=
-google.golang.org/grpc v1.78.0/go.mod h1:I47qjTo4OKbMkjA/aOOwxDIiPSBofUtQUI5EfpWvW7U=
-google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
-google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
+google.golang.org/grpc v1.77.0 h1:wVVY6/8cGA6vvffn+wWK5ToddbgdU3d8MNENr4evgXM=
+google.golang.org/grpc v1.77.0/go.mod h1:z0BY1iVj0q8E1uSQCjL9cppRj+gnZjzDnzV0dHhrNig=
+google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE=
+google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
diff --git a/hack/boilerplate.go.txt b/hack/boilerplate.go.txt
index e1732e8d5..6307eef7b 100644
--- a/hack/boilerplate.go.txt
+++ b/hack/boilerplate.go.txt
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 // 
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/internal/generated/device/v1alpha1/gpu.pb.go b/internal/generated/device/v1alpha1/gpu.pb.go
index 17419e268..d184d3eb9 100644
--- a/internal/generated/device/v1alpha1/gpu.pb.go
+++ b/internal/generated/device/v1alpha1/gpu.pb.go
@@ -1,4 +1,4 @@
-// Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 // Code generated by protoc-gen-go. DO NOT EDIT.
 // versions:
 // 	protoc-gen-go v1.36.10
-// 	protoc        v6.33.0
+// 	protoc        v6.33.4
 // source: device/v1alpha1/gpu.proto
 
 package v1alpha1
@@ -1173,6 +1173,64 @@ func (x *UpdateGpuRequest) GetOpts() *UpdateOptions {
 	return nil
 }
 
+// UpdateGpuStatusRequest specifies the GPU whose status should be updated.
+// Only metadata (name, namespace, resource_version) and status fields are used.
+type UpdateGpuStatusRequest struct {
+	state protoimpl.MessageState `protogen:"open.v1"`
+	// gpu is the GPU resource with updated status.
+	// The server reads metadata.name, metadata.namespace, metadata.resource_version
+	// and status from this object. All other fields are ignored.
+	Gpu *Gpu `protobuf:"bytes,1,opt,name=gpu,proto3" json:"gpu,omitempty"`
+	// opts contains the options for the update.
+	Opts          *UpdateOptions `protobuf:"bytes,2,opt,name=opts,proto3" json:"opts,omitempty"`
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
+}
+
+func (x *UpdateGpuStatusRequest) Reset() {
+	*x = UpdateGpuStatusRequest{}
+	mi := &file_device_v1alpha1_gpu_proto_msgTypes[20]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *UpdateGpuStatusRequest) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*UpdateGpuStatusRequest) ProtoMessage() {}
+
+func (x *UpdateGpuStatusRequest) ProtoReflect() protoreflect.Message {
+	mi := &file_device_v1alpha1_gpu_proto_msgTypes[20]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use UpdateGpuStatusRequest.ProtoReflect.Descriptor instead.
+func (*UpdateGpuStatusRequest) Descriptor() ([]byte, []int) {
+	return file_device_v1alpha1_gpu_proto_rawDescGZIP(), []int{20}
+}
+
+func (x *UpdateGpuStatusRequest) GetGpu() *Gpu {
+	if x != nil {
+		return x.Gpu
+	}
+	return nil
+}
+
+func (x *UpdateGpuStatusRequest) GetOpts() *UpdateOptions {
+	if x != nil {
+		return x.Opts
+	}
+	return nil
+}
+
 type DeleteGpuRequest struct {
 	state protoimpl.MessageState `protogen:"open.v1"`
 	// The unique resource name of the GPU to delete.
@@ -1190,7 +1248,7 @@ type DeleteGpuRequest struct {
 
 func (x *DeleteGpuRequest) Reset() {
 	*x = DeleteGpuRequest{}
-	mi := &file_device_v1alpha1_gpu_proto_msgTypes[20]
+	mi := &file_device_v1alpha1_gpu_proto_msgTypes[21]
 	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
 	ms.StoreMessageInfo(mi)
 }
@@ -1202,7 +1260,7 @@ func (x *DeleteGpuRequest) String() string {
 func (*DeleteGpuRequest) ProtoMessage() {}
 
 func (x *DeleteGpuRequest) ProtoReflect() protoreflect.Message {
-	mi := &file_device_v1alpha1_gpu_proto_msgTypes[20]
+	mi := &file_device_v1alpha1_gpu_proto_msgTypes[21]
 	if x != nil {
 		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
 		if ms.LoadMessageInfo() == nil {
@@ -1215,7 +1273,7 @@ func (x *DeleteGpuRequest) ProtoReflect() protoreflect.Message {
 
 // Deprecated: Use DeleteGpuRequest.ProtoReflect.Descriptor instead.
 func (*DeleteGpuRequest) Descriptor() ([]byte, []int) {
-	return file_device_v1alpha1_gpu_proto_rawDescGZIP(), []int{20}
+	return file_device_v1alpha1_gpu_proto_rawDescGZIP(), []int{21}
 }
 
 func (x *DeleteGpuRequest) GetName() string {
@@ -1306,18 +1364,22 @@ const file_device_v1alpha1_gpu_proto_rawDesc = "" +
 	"\x04opts\x18\x02 \x01(\v2).nvidia.nvsentinel.v1alpha1.CreateOptionsR\x04opts\"\x84\x01\n" +
 	"\x10UpdateGpuRequest\x121\n" +
 	"\x03gpu\x18\x01 \x01(\v2\x1f.nvidia.nvsentinel.v1alpha1.GpuR\x03gpu\x12=\n" +
+	"\x04opts\x18\x02 \x01(\v2).nvidia.nvsentinel.v1alpha1.UpdateOptionsR\x04opts\"\x8a\x01\n" +
+	"\x16UpdateGpuStatusRequest\x121\n" +
+	"\x03gpu\x18\x01 \x01(\v2\x1f.nvidia.nvsentinel.v1alpha1.GpuR\x03gpu\x12=\n" +
 	"\x04opts\x18\x02 \x01(\v2).nvidia.nvsentinel.v1alpha1.UpdateOptionsR\x04opts\"\x83\x01\n" +
 	"\x10DeleteGpuRequest\x12\x12\n" +
 	"\x04name\x18\x01 \x01(\tR\x04name\x12\x1c\n" +
 	"\tnamespace\x18\x02 \x01(\tR\tnamespace\x12=\n" +
-	"\x04opts\x18\x03 \x01(\v2).nvidia.nvsentinel.v1alpha1.DeleteOptionsR\x04opts2\xcb\x04\n" +
+	"\x04opts\x18\x03 \x01(\v2).nvidia.nvsentinel.v1alpha1.DeleteOptionsR\x04opts2\xb3\x05\n" +
 	"\n" +
 	"GpuService\x12_\n" +
 	"\x06GetGpu\x12).nvidia.nvsentinel.v1alpha1.GetGpuRequest\x1a*.nvidia.nvsentinel.v1alpha1.GetGpuResponse\x12e\n" +
 	"\bListGpus\x12+.nvidia.nvsentinel.v1alpha1.ListGpusRequest\x1a,.nvidia.nvsentinel.v1alpha1.ListGpusResponse\x12j\n" +
 	"\tWatchGpus\x12,.nvidia.nvsentinel.v1alpha1.WatchGpusRequest\x1a-.nvidia.nvsentinel.v1alpha1.WatchGpusResponse0\x01\x12Z\n" +
 	"\tCreateGpu\x12,.nvidia.nvsentinel.v1alpha1.CreateGpuRequest\x1a\x1f.nvidia.nvsentinel.v1alpha1.Gpu\x12Z\n" +
-	"\tUpdateGpu\x12,.nvidia.nvsentinel.v1alpha1.UpdateGpuRequest\x1a\x1f.nvidia.nvsentinel.v1alpha1.Gpu\x12Q\n" +
+	"\tUpdateGpu\x12,.nvidia.nvsentinel.v1alpha1.UpdateGpuRequest\x1a\x1f.nvidia.nvsentinel.v1alpha1.Gpu\x12f\n" +
+	"\x0fUpdateGpuStatus\x122.nvidia.nvsentinel.v1alpha1.UpdateGpuStatusRequest\x1a\x1f.nvidia.nvsentinel.v1alpha1.Gpu\x12Q\n" +
 	"\tDeleteGpu\x12,.nvidia.nvsentinel.v1alpha1.DeleteGpuRequest\x1a\x16.google.protobuf.EmptyBJZHgithub.com/nvidia/nvsentinel/internal/generated/device/v1alpha1;v1alpha1b\x06proto3"
 
 var (
@@ -1332,41 +1394,42 @@ func file_device_v1alpha1_gpu_proto_rawDescGZIP() []byte {
 	return file_device_v1alpha1_gpu_proto_rawDescData
 }
 
-var file_device_v1alpha1_gpu_proto_msgTypes = make([]protoimpl.MessageInfo, 21)
+var file_device_v1alpha1_gpu_proto_msgTypes = make([]protoimpl.MessageInfo, 22)
 var file_device_v1alpha1_gpu_proto_goTypes = []any{
-	(*ObjectMeta)(nil),            // 0: nvidia.nvsentinel.v1alpha1.ObjectMeta
-	(*ListMeta)(nil),              // 1: nvidia.nvsentinel.v1alpha1.ListMeta
-	(*GetOptions)(nil),            // 2: nvidia.nvsentinel.v1alpha1.GetOptions
-	(*ListOptions)(nil),           // 3: nvidia.nvsentinel.v1alpha1.ListOptions
-	(*CreateOptions)(nil),         // 4: nvidia.nvsentinel.v1alpha1.CreateOptions
-	(*UpdateOptions)(nil),         // 5: nvidia.nvsentinel.v1alpha1.UpdateOptions
-	(*DeleteOptions)(nil),         // 6: nvidia.nvsentinel.v1alpha1.DeleteOptions
-	(*Gpu)(nil),                   // 7: nvidia.nvsentinel.v1alpha1.Gpu
-	(*GpuList)(nil),               // 8: nvidia.nvsentinel.v1alpha1.GpuList
-	(*GpuSpec)(nil),               // 9: nvidia.nvsentinel.v1alpha1.GpuSpec
-	(*GpuStatus)(nil),             // 10: nvidia.nvsentinel.v1alpha1.GpuStatus
-	(*Condition)(nil),             // 11: nvidia.nvsentinel.v1alpha1.Condition
-	(*GetGpuRequest)(nil),         // 12: nvidia.nvsentinel.v1alpha1.GetGpuRequest
-	(*GetGpuResponse)(nil),        // 13: nvidia.nvsentinel.v1alpha1.GetGpuResponse
-	(*ListGpusRequest)(nil),       // 14: nvidia.nvsentinel.v1alpha1.ListGpusRequest
-	(*ListGpusResponse)(nil),      // 15: nvidia.nvsentinel.v1alpha1.ListGpusResponse
-	(*WatchGpusRequest)(nil),      // 16: nvidia.nvsentinel.v1alpha1.WatchGpusRequest
-	(*WatchGpusResponse)(nil),     // 17: nvidia.nvsentinel.v1alpha1.WatchGpusResponse
-	(*CreateGpuRequest)(nil),      // 18: nvidia.nvsentinel.v1alpha1.CreateGpuRequest
-	(*UpdateGpuRequest)(nil),      // 19: nvidia.nvsentinel.v1alpha1.UpdateGpuRequest
-	(*DeleteGpuRequest)(nil),      // 20: nvidia.nvsentinel.v1alpha1.DeleteGpuRequest
-	(*timestamppb.Timestamp)(nil), // 21: google.protobuf.Timestamp
-	(*emptypb.Empty)(nil),         // 22: google.protobuf.Empty
+	(*ObjectMeta)(nil),             // 0: nvidia.nvsentinel.v1alpha1.ObjectMeta
+	(*ListMeta)(nil),               // 1: nvidia.nvsentinel.v1alpha1.ListMeta
+	(*GetOptions)(nil),             // 2: nvidia.nvsentinel.v1alpha1.GetOptions
+	(*ListOptions)(nil),            // 3: nvidia.nvsentinel.v1alpha1.ListOptions
+	(*CreateOptions)(nil),          // 4: nvidia.nvsentinel.v1alpha1.CreateOptions
+	(*UpdateOptions)(nil),          // 5: nvidia.nvsentinel.v1alpha1.UpdateOptions
+	(*DeleteOptions)(nil),          // 6: nvidia.nvsentinel.v1alpha1.DeleteOptions
+	(*Gpu)(nil),                    // 7: nvidia.nvsentinel.v1alpha1.Gpu
+	(*GpuList)(nil),                // 8: nvidia.nvsentinel.v1alpha1.GpuList
+	(*GpuSpec)(nil),                // 9: nvidia.nvsentinel.v1alpha1.GpuSpec
+	(*GpuStatus)(nil),              // 10: nvidia.nvsentinel.v1alpha1.GpuStatus
+	(*Condition)(nil),              // 11: nvidia.nvsentinel.v1alpha1.Condition
+	(*GetGpuRequest)(nil),          // 12: nvidia.nvsentinel.v1alpha1.GetGpuRequest
+	(*GetGpuResponse)(nil),         // 13: nvidia.nvsentinel.v1alpha1.GetGpuResponse
+	(*ListGpusRequest)(nil),        // 14: nvidia.nvsentinel.v1alpha1.ListGpusRequest
+	(*ListGpusResponse)(nil),       // 15: nvidia.nvsentinel.v1alpha1.ListGpusResponse
+	(*WatchGpusRequest)(nil),       // 16: nvidia.nvsentinel.v1alpha1.WatchGpusRequest
+	(*WatchGpusResponse)(nil),      // 17: nvidia.nvsentinel.v1alpha1.WatchGpusResponse
+	(*CreateGpuRequest)(nil),       // 18: nvidia.nvsentinel.v1alpha1.CreateGpuRequest
+	(*UpdateGpuRequest)(nil),       // 19: nvidia.nvsentinel.v1alpha1.UpdateGpuRequest
+	(*UpdateGpuStatusRequest)(nil), // 20: nvidia.nvsentinel.v1alpha1.UpdateGpuStatusRequest
+	(*DeleteGpuRequest)(nil),       // 21: nvidia.nvsentinel.v1alpha1.DeleteGpuRequest
+	(*timestamppb.Timestamp)(nil),  // 22: google.protobuf.Timestamp
+	(*emptypb.Empty)(nil),          // 23: google.protobuf.Empty
 }
 var file_device_v1alpha1_gpu_proto_depIdxs = []int32{
-	21, // 0: nvidia.nvsentinel.v1alpha1.ObjectMeta.creation_timestamp:type_name -> google.protobuf.Timestamp
+	22, // 0: nvidia.nvsentinel.v1alpha1.ObjectMeta.creation_timestamp:type_name -> google.protobuf.Timestamp
 	0,  // 1: nvidia.nvsentinel.v1alpha1.Gpu.metadata:type_name -> nvidia.nvsentinel.v1alpha1.ObjectMeta
 	9,  // 2: nvidia.nvsentinel.v1alpha1.Gpu.spec:type_name -> nvidia.nvsentinel.v1alpha1.GpuSpec
 	10, // 3: nvidia.nvsentinel.v1alpha1.Gpu.status:type_name -> nvidia.nvsentinel.v1alpha1.GpuStatus
 	1,  // 4: nvidia.nvsentinel.v1alpha1.GpuList.metadata:type_name -> nvidia.nvsentinel.v1alpha1.ListMeta
 	7,  // 5: nvidia.nvsentinel.v1alpha1.GpuList.items:type_name -> nvidia.nvsentinel.v1alpha1.Gpu
 	11, // 6: nvidia.nvsentinel.v1alpha1.GpuStatus.conditions:type_name -> nvidia.nvsentinel.v1alpha1.Condition
-	21, // 7: nvidia.nvsentinel.v1alpha1.Condition.last_transition_time:type_name -> google.protobuf.Timestamp
+	22, // 7: nvidia.nvsentinel.v1alpha1.Condition.last_transition_time:type_name -> google.protobuf.Timestamp
 	2,  // 8: nvidia.nvsentinel.v1alpha1.GetGpuRequest.opts:type_name -> nvidia.nvsentinel.v1alpha1.GetOptions
 	7,  // 9: nvidia.nvsentinel.v1alpha1.GetGpuResponse.gpu:type_name -> nvidia.nvsentinel.v1alpha1.Gpu
 	3,  // 10: nvidia.nvsentinel.v1alpha1.ListGpusRequest.opts:type_name -> nvidia.nvsentinel.v1alpha1.ListOptions
@@ -1377,24 +1440,28 @@ var file_device_v1alpha1_gpu_proto_depIdxs = []int32{
 	4,  // 15: nvidia.nvsentinel.v1alpha1.CreateGpuRequest.opts:type_name -> nvidia.nvsentinel.v1alpha1.CreateOptions
 	7,  // 16: nvidia.nvsentinel.v1alpha1.UpdateGpuRequest.gpu:type_name -> nvidia.nvsentinel.v1alpha1.Gpu
 	5,  // 17: nvidia.nvsentinel.v1alpha1.UpdateGpuRequest.opts:type_name -> nvidia.nvsentinel.v1alpha1.UpdateOptions
-	6,  // 18: nvidia.nvsentinel.v1alpha1.DeleteGpuRequest.opts:type_name -> nvidia.nvsentinel.v1alpha1.DeleteOptions
-	12, // 19: nvidia.nvsentinel.v1alpha1.GpuService.GetGpu:input_type -> nvidia.nvsentinel.v1alpha1.GetGpuRequest
-	14, // 20: nvidia.nvsentinel.v1alpha1.GpuService.ListGpus:input_type -> nvidia.nvsentinel.v1alpha1.ListGpusRequest
-	16, // 21: nvidia.nvsentinel.v1alpha1.GpuService.WatchGpus:input_type -> nvidia.nvsentinel.v1alpha1.WatchGpusRequest
-	18, // 22: nvidia.nvsentinel.v1alpha1.GpuService.CreateGpu:input_type -> nvidia.nvsentinel.v1alpha1.CreateGpuRequest
-	19, // 23: nvidia.nvsentinel.v1alpha1.GpuService.UpdateGpu:input_type -> nvidia.nvsentinel.v1alpha1.UpdateGpuRequest
-	20, // 24: nvidia.nvsentinel.v1alpha1.GpuService.DeleteGpu:input_type -> nvidia.nvsentinel.v1alpha1.DeleteGpuRequest
-	13, // 25: nvidia.nvsentinel.v1alpha1.GpuService.GetGpu:output_type -> nvidia.nvsentinel.v1alpha1.GetGpuResponse
-	15, // 26: nvidia.nvsentinel.v1alpha1.GpuService.ListGpus:output_type -> nvidia.nvsentinel.v1alpha1.ListGpusResponse
-	17, // 27: nvidia.nvsentinel.v1alpha1.GpuService.WatchGpus:output_type -> nvidia.nvsentinel.v1alpha1.WatchGpusResponse
-	7,  // 28: nvidia.nvsentinel.v1alpha1.GpuService.CreateGpu:output_type -> nvidia.nvsentinel.v1alpha1.Gpu
-	7,  // 29: nvidia.nvsentinel.v1alpha1.GpuService.UpdateGpu:output_type -> nvidia.nvsentinel.v1alpha1.Gpu
-	22, // 30: nvidia.nvsentinel.v1alpha1.GpuService.DeleteGpu:output_type -> google.protobuf.Empty
-	25, // [25:31] is the sub-list for method output_type
-	19, // [19:25] is the sub-list for method input_type
-	19, // [19:19] is the sub-list for extension type_name
-	19, // [19:19] is the sub-list for extension extendee
-	0,  // [0:19] is the sub-list for field type_name
+	7,  // 18: nvidia.nvsentinel.v1alpha1.UpdateGpuStatusRequest.gpu:type_name -> nvidia.nvsentinel.v1alpha1.Gpu
+	5,  // 19: nvidia.nvsentinel.v1alpha1.UpdateGpuStatusRequest.opts:type_name -> nvidia.nvsentinel.v1alpha1.UpdateOptions
+	6,  // 20: nvidia.nvsentinel.v1alpha1.DeleteGpuRequest.opts:type_name -> nvidia.nvsentinel.v1alpha1.DeleteOptions
+	12, // 21: nvidia.nvsentinel.v1alpha1.GpuService.GetGpu:input_type -> nvidia.nvsentinel.v1alpha1.GetGpuRequest
+	14, // 22: nvidia.nvsentinel.v1alpha1.GpuService.ListGpus:input_type -> nvidia.nvsentinel.v1alpha1.ListGpusRequest
+	16, // 23: nvidia.nvsentinel.v1alpha1.GpuService.WatchGpus:input_type -> nvidia.nvsentinel.v1alpha1.WatchGpusRequest
+	18, // 24: nvidia.nvsentinel.v1alpha1.GpuService.CreateGpu:input_type -> nvidia.nvsentinel.v1alpha1.CreateGpuRequest
+	19, // 25: nvidia.nvsentinel.v1alpha1.GpuService.UpdateGpu:input_type -> nvidia.nvsentinel.v1alpha1.UpdateGpuRequest
+	20, // 26: nvidia.nvsentinel.v1alpha1.GpuService.UpdateGpuStatus:input_type -> nvidia.nvsentinel.v1alpha1.UpdateGpuStatusRequest
+	21, // 27: nvidia.nvsentinel.v1alpha1.GpuService.DeleteGpu:input_type -> nvidia.nvsentinel.v1alpha1.DeleteGpuRequest
+	13, // 28: nvidia.nvsentinel.v1alpha1.GpuService.GetGpu:output_type -> nvidia.nvsentinel.v1alpha1.GetGpuResponse
+	15, // 29: nvidia.nvsentinel.v1alpha1.GpuService.ListGpus:output_type -> nvidia.nvsentinel.v1alpha1.ListGpusResponse
+	17, // 30: nvidia.nvsentinel.v1alpha1.GpuService.WatchGpus:output_type -> nvidia.nvsentinel.v1alpha1.WatchGpusResponse
+	7,  // 31: nvidia.nvsentinel.v1alpha1.GpuService.CreateGpu:output_type -> nvidia.nvsentinel.v1alpha1.Gpu
+	7,  // 32: nvidia.nvsentinel.v1alpha1.GpuService.UpdateGpu:output_type -> nvidia.nvsentinel.v1alpha1.Gpu
+	7,  // 33: nvidia.nvsentinel.v1alpha1.GpuService.UpdateGpuStatus:output_type -> nvidia.nvsentinel.v1alpha1.Gpu
+	23, // 34: nvidia.nvsentinel.v1alpha1.GpuService.DeleteGpu:output_type -> google.protobuf.Empty
+	28, // [28:35] is the sub-list for method output_type
+	21, // [21:28] is the sub-list for method input_type
+	21, // [21:21] is the sub-list for extension type_name
+	21, // [21:21] is the sub-list for extension extendee
+	0,  // [0:21] is the sub-list for field type_name
 }
 
 func init() { file_device_v1alpha1_gpu_proto_init() }
@@ -1408,7 +1475,7 @@ func file_device_v1alpha1_gpu_proto_init() {
 			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
 			RawDescriptor: unsafe.Slice(unsafe.StringData(file_device_v1alpha1_gpu_proto_rawDesc), len(file_device_v1alpha1_gpu_proto_rawDesc)),
 			NumEnums:      0,
-			NumMessages:   21,
+			NumMessages:   22,
 			NumExtensions: 0,
 			NumServices:   1,
 		},
diff --git a/internal/generated/device/v1alpha1/gpu_grpc.pb.go b/internal/generated/device/v1alpha1/gpu_grpc.pb.go
index c31f32a56..2590d7ca7 100644
--- a/internal/generated/device/v1alpha1/gpu_grpc.pb.go
+++ b/internal/generated/device/v1alpha1/gpu_grpc.pb.go
@@ -1,4 +1,4 @@
-// Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 // Code generated by protoc-gen-go-grpc. DO NOT EDIT.
 // versions:
 // - protoc-gen-go-grpc v1.5.1
-// - protoc             v6.33.0
+// - protoc             v6.33.4
 // source: device/v1alpha1/gpu.proto
 
 package v1alpha1
@@ -34,12 +34,13 @@ import (
 const _ = grpc.SupportPackageIsVersion9
 
 const (
-	GpuService_GetGpu_FullMethodName    = "/nvidia.nvsentinel.v1alpha1.GpuService/GetGpu"
-	GpuService_ListGpus_FullMethodName  = "/nvidia.nvsentinel.v1alpha1.GpuService/ListGpus"
-	GpuService_WatchGpus_FullMethodName = "/nvidia.nvsentinel.v1alpha1.GpuService/WatchGpus"
-	GpuService_CreateGpu_FullMethodName = "/nvidia.nvsentinel.v1alpha1.GpuService/CreateGpu"
-	GpuService_UpdateGpu_FullMethodName = "/nvidia.nvsentinel.v1alpha1.GpuService/UpdateGpu"
-	GpuService_DeleteGpu_FullMethodName = "/nvidia.nvsentinel.v1alpha1.GpuService/DeleteGpu"
+	GpuService_GetGpu_FullMethodName          = "/nvidia.nvsentinel.v1alpha1.GpuService/GetGpu"
+	GpuService_ListGpus_FullMethodName        = "/nvidia.nvsentinel.v1alpha1.GpuService/ListGpus"
+	GpuService_WatchGpus_FullMethodName       = "/nvidia.nvsentinel.v1alpha1.GpuService/WatchGpus"
+	GpuService_CreateGpu_FullMethodName       = "/nvidia.nvsentinel.v1alpha1.GpuService/CreateGpu"
+	GpuService_UpdateGpu_FullMethodName       = "/nvidia.nvsentinel.v1alpha1.GpuService/UpdateGpu"
+	GpuService_UpdateGpuStatus_FullMethodName = "/nvidia.nvsentinel.v1alpha1.GpuService/UpdateGpuStatus"
+	GpuService_DeleteGpu_FullMethodName       = "/nvidia.nvsentinel.v1alpha1.GpuService/DeleteGpu"
 )
 
 // GpuServiceClient is the client API for GpuService service.
@@ -58,6 +59,8 @@ type GpuServiceClient interface {
 	CreateGpu(ctx context.Context, in *CreateGpuRequest, opts ...grpc.CallOption) (*Gpu, error)
 	// UpdateGpu updates a single GPU resource.
 	UpdateGpu(ctx context.Context, in *UpdateGpuRequest, opts ...grpc.CallOption) (*Gpu, error)
+	// UpdateGpuStatus updates only the status subresource of a GPU.
+	UpdateGpuStatus(ctx context.Context, in *UpdateGpuStatusRequest, opts ...grpc.CallOption) (*Gpu, error)
 	// DeleteGpu deletes a single GPU resource.
 	DeleteGpu(ctx context.Context, in *DeleteGpuRequest, opts ...grpc.CallOption) (*emptypb.Empty, error)
 }
@@ -129,6 +132,16 @@ func (c *gpuServiceClient) UpdateGpu(ctx context.Context, in *UpdateGpuRequest,
 	return out, nil
 }
 
+func (c *gpuServiceClient) UpdateGpuStatus(ctx context.Context, in *UpdateGpuStatusRequest, opts ...grpc.CallOption) (*Gpu, error) {
+	cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
+	out := new(Gpu)
+	err := c.cc.Invoke(ctx, GpuService_UpdateGpuStatus_FullMethodName, in, out, cOpts...)
+	if err != nil {
+		return nil, err
+	}
+	return out, nil
+}
+
 func (c *gpuServiceClient) DeleteGpu(ctx context.Context, in *DeleteGpuRequest, opts ...grpc.CallOption) (*emptypb.Empty, error) {
 	cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
 	out := new(emptypb.Empty)
@@ -155,6 +168,8 @@ type GpuServiceServer interface {
 	CreateGpu(context.Context, *CreateGpuRequest) (*Gpu, error)
 	// UpdateGpu updates a single GPU resource.
 	UpdateGpu(context.Context, *UpdateGpuRequest) (*Gpu, error)
+	// UpdateGpuStatus updates only the status subresource of a GPU.
+	UpdateGpuStatus(context.Context, *UpdateGpuStatusRequest) (*Gpu, error)
 	// DeleteGpu deletes a single GPU resource.
 	DeleteGpu(context.Context, *DeleteGpuRequest) (*emptypb.Empty, error)
 	mustEmbedUnimplementedGpuServiceServer()
@@ -182,6 +197,9 @@ func (UnimplementedGpuServiceServer) CreateGpu(context.Context, *CreateGpuReques
 func (UnimplementedGpuServiceServer) UpdateGpu(context.Context, *UpdateGpuRequest) (*Gpu, error) {
 	return nil, status.Errorf(codes.Unimplemented, "method UpdateGpu not implemented")
 }
+func (UnimplementedGpuServiceServer) UpdateGpuStatus(context.Context, *UpdateGpuStatusRequest) (*Gpu, error) {
+	return nil, status.Errorf(codes.Unimplemented, "method UpdateGpuStatus not implemented")
+}
 func (UnimplementedGpuServiceServer) DeleteGpu(context.Context, *DeleteGpuRequest) (*emptypb.Empty, error) {
 	return nil, status.Errorf(codes.Unimplemented, "method DeleteGpu not implemented")
 }
@@ -289,6 +307,24 @@ func _GpuService_UpdateGpu_Handler(srv interface{}, ctx context.Context, dec fun
 	return interceptor(ctx, in, info, handler)
 }
 
+func _GpuService_UpdateGpuStatus_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+	in := new(UpdateGpuStatusRequest)
+	if err := dec(in); err != nil {
+		return nil, err
+	}
+	if interceptor == nil {
+		return srv.(GpuServiceServer).UpdateGpuStatus(ctx, in)
+	}
+	info := &grpc.UnaryServerInfo{
+		Server:     srv,
+		FullMethod: GpuService_UpdateGpuStatus_FullMethodName,
+	}
+	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+		return srv.(GpuServiceServer).UpdateGpuStatus(ctx, req.(*UpdateGpuStatusRequest))
+	}
+	return interceptor(ctx, in, info, handler)
+}
+
 func _GpuService_DeleteGpu_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
 	in := new(DeleteGpuRequest)
 	if err := dec(in); err != nil {
@@ -330,6 +366,10 @@ var GpuService_ServiceDesc = grpc.ServiceDesc{
 			MethodName: "UpdateGpu",
 			Handler:    _GpuService_UpdateGpu_Handler,
 		},
+		{
+			MethodName: "UpdateGpuStatus",
+			Handler:    _GpuService_UpdateGpuStatus_Handler,
+		},
 		{
 			MethodName: "DeleteGpu",
 			Handler:    _GpuService_DeleteGpu_Handler,
diff --git a/pkg/client-go/client/versioned/clientset.go b/pkg/client-go/client/versioned/clientset.go
index 0779de3d7..6a7505817 100644
--- a/pkg/client-go/client/versioned/clientset.go
+++ b/pkg/client-go/client/versioned/clientset.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/pkg/client-go/client/versioned/fake/clientset_generated.go b/pkg/client-go/client/versioned/fake/clientset_generated.go
index e0118c0f8..71cea6c1b 100644
--- a/pkg/client-go/client/versioned/fake/clientset_generated.go
+++ b/pkg/client-go/client/versioned/fake/clientset_generated.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/pkg/client-go/client/versioned/fake/doc.go b/pkg/client-go/client/versioned/fake/doc.go
index 44b048c89..f6c7d06f0 100644
--- a/pkg/client-go/client/versioned/fake/doc.go
+++ b/pkg/client-go/client/versioned/fake/doc.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/pkg/client-go/client/versioned/fake/register.go b/pkg/client-go/client/versioned/fake/register.go
index 1573cb4f7..a2d9f7802 100644
--- a/pkg/client-go/client/versioned/fake/register.go
+++ b/pkg/client-go/client/versioned/fake/register.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/pkg/client-go/client/versioned/scheme/doc.go b/pkg/client-go/client/versioned/scheme/doc.go
index 55f52dc51..a3f9c58bc 100644
--- a/pkg/client-go/client/versioned/scheme/doc.go
+++ b/pkg/client-go/client/versioned/scheme/doc.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/pkg/client-go/client/versioned/scheme/register.go b/pkg/client-go/client/versioned/scheme/register.go
index 97cf5a8ff..46045b406 100644
--- a/pkg/client-go/client/versioned/scheme/register.go
+++ b/pkg/client-go/client/versioned/scheme/register.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/pkg/client-go/client/versioned/typed/device/v1alpha1/device_client.go b/pkg/client-go/client/versioned/typed/device/v1alpha1/device_client.go
index 01b63b877..d2ca86aa3 100644
--- a/pkg/client-go/client/versioned/typed/device/v1alpha1/device_client.go
+++ b/pkg/client-go/client/versioned/typed/device/v1alpha1/device_client.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/pkg/client-go/client/versioned/typed/device/v1alpha1/doc.go b/pkg/client-go/client/versioned/typed/device/v1alpha1/doc.go
index 7749c1800..c689ab840 100644
--- a/pkg/client-go/client/versioned/typed/device/v1alpha1/doc.go
+++ b/pkg/client-go/client/versioned/typed/device/v1alpha1/doc.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/pkg/client-go/client/versioned/typed/device/v1alpha1/fake/doc.go b/pkg/client-go/client/versioned/typed/device/v1alpha1/fake/doc.go
index 2702a5453..942a10f72 100644
--- a/pkg/client-go/client/versioned/typed/device/v1alpha1/fake/doc.go
+++ b/pkg/client-go/client/versioned/typed/device/v1alpha1/fake/doc.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/pkg/client-go/client/versioned/typed/device/v1alpha1/fake/fake_device_client.go b/pkg/client-go/client/versioned/typed/device/v1alpha1/fake/fake_device_client.go
index 32c7c5401..5bd437c2c 100644
--- a/pkg/client-go/client/versioned/typed/device/v1alpha1/fake/fake_device_client.go
+++ b/pkg/client-go/client/versioned/typed/device/v1alpha1/fake/fake_device_client.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/pkg/client-go/client/versioned/typed/device/v1alpha1/fake/fake_gpu.go b/pkg/client-go/client/versioned/typed/device/v1alpha1/fake/fake_gpu.go
index 192da6fa1..e68564670 100644
--- a/pkg/client-go/client/versioned/typed/device/v1alpha1/fake/fake_gpu.go
+++ b/pkg/client-go/client/versioned/typed/device/v1alpha1/fake/fake_gpu.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
@@ -110,6 +110,15 @@ func (c *fakeGPUs) Update(ctx context.Context, gPU *devicev1alpha1.GPU, opts v1.
 	return obj.(*devicev1alpha1.GPU), err
 }
 
+func (c *fakeGPUs) UpdateStatus(ctx context.Context, gPU *devicev1alpha1.GPU, opts v1.UpdateOptions) (*devicev1alpha1.GPU, error) {
+	obj, err := c.Fake.
+		Invokes(testing.NewRootUpdateSubresourceActionWithOptions(c.Resource(), "status", gPU, opts), &devicev1alpha1.GPU{})
+	if obj == nil {
+		return nil, err
+	}
+	return obj.(*devicev1alpha1.GPU), err
+}
+
 // Delete takes name of the gPU and deletes it. Returns an error if one occurs.
 func (c *fakeGPUs) Delete(ctx context.Context, name string, opts v1.DeleteOptions) error {
 	_, err := c.Fake.
diff --git a/pkg/client-go/client/versioned/typed/device/v1alpha1/generated_expansion.go b/pkg/client-go/client/versioned/typed/device/v1alpha1/generated_expansion.go
index c99bbb48c..97d724146 100644
--- a/pkg/client-go/client/versioned/typed/device/v1alpha1/generated_expansion.go
+++ b/pkg/client-go/client/versioned/typed/device/v1alpha1/generated_expansion.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/pkg/client-go/client/versioned/typed/device/v1alpha1/gpu.go b/pkg/client-go/client/versioned/typed/device/v1alpha1/gpu.go
index 4328d58a5..734754200 100644
--- a/pkg/client-go/client/versioned/typed/device/v1alpha1/gpu.go
+++ b/pkg/client-go/client/versioned/typed/device/v1alpha1/gpu.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
@@ -38,6 +38,7 @@ type GPUsGetter interface {
 type GPUInterface interface {
 	Create(ctx context.Context, gPU *devicev1alpha1.GPU, opts v1.CreateOptions) (*devicev1alpha1.GPU, error)
 	Update(ctx context.Context, gPU *devicev1alpha1.GPU, opts v1.UpdateOptions) (*devicev1alpha1.GPU, error)
+	UpdateStatus(ctx context.Context, gPU *devicev1alpha1.GPU, opts v1.UpdateOptions) (*devicev1alpha1.GPU, error)
 	Delete(ctx context.Context, name string, opts v1.DeleteOptions) error
 	Get(ctx context.Context, name string, opts v1.GetOptions) (*devicev1alpha1.GPU, error)
 	List(ctx context.Context, opts v1.ListOptions) (*devicev1alpha1.GPUList, error)
@@ -191,6 +192,26 @@ func (c *gpus) Update(ctx context.Context, gpu *devicev1alpha1.GPU, opts v1.Upda
 	return obj, nil
 }
 
+// UpdateStatus updates only the status subresource of a GPU.
+func (c *gpus) UpdateStatus(ctx context.Context, gpu *devicev1alpha1.GPU, opts v1.UpdateOptions) (*devicev1alpha1.GPU, error) {
+	resp, err := c.client.UpdateGpuStatus(ctx, &pb.UpdateGpuStatusRequest{
+		Gpu:  devicev1alpha1.ToProto(gpu),
+		Opts: &pb.UpdateOptions{},
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	obj := devicev1alpha1.FromProto(resp)
+	c.logger.V(2).Info("Updated GPU status",
+		"name", obj.GetName(),
+		"namespace", c.getNamespace(),
+		"resource-version", obj.GetResourceVersion(),
+	)
+
+	return obj, nil
+}
+
 // TODO: Implement DeleteOptions support.
 func (c *gpus) Delete(ctx context.Context, name string, opts v1.DeleteOptions) error {
 	_, err := c.client.DeleteGpu(ctx, &pb.DeleteGpuRequest{
diff --git a/pkg/client-go/informers/externalversions/device/interface.go b/pkg/client-go/informers/externalversions/device/interface.go
index 871a7d07f..702c09212 100644
--- a/pkg/client-go/informers/externalversions/device/interface.go
+++ b/pkg/client-go/informers/externalversions/device/interface.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/pkg/client-go/informers/externalversions/device/v1alpha1/gpu.go b/pkg/client-go/informers/externalversions/device/v1alpha1/gpu.go
index db5da81ac..b5f6f419f 100644
--- a/pkg/client-go/informers/externalversions/device/v1alpha1/gpu.go
+++ b/pkg/client-go/informers/externalversions/device/v1alpha1/gpu.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/pkg/client-go/informers/externalversions/device/v1alpha1/interface.go b/pkg/client-go/informers/externalversions/device/v1alpha1/interface.go
index f3921c8e3..68303b6eb 100644
--- a/pkg/client-go/informers/externalversions/device/v1alpha1/interface.go
+++ b/pkg/client-go/informers/externalversions/device/v1alpha1/interface.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/pkg/client-go/informers/externalversions/factory.go b/pkg/client-go/informers/externalversions/factory.go
index 296c50425..cbf2ef267 100644
--- a/pkg/client-go/informers/externalversions/factory.go
+++ b/pkg/client-go/informers/externalversions/factory.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/pkg/client-go/informers/externalversions/generic.go b/pkg/client-go/informers/externalversions/generic.go
index f8ccccacc..0382aab5b 100644
--- a/pkg/client-go/informers/externalversions/generic.go
+++ b/pkg/client-go/informers/externalversions/generic.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/pkg/client-go/informers/externalversions/internalinterfaces/factory_interfaces.go b/pkg/client-go/informers/externalversions/internalinterfaces/factory_interfaces.go
index 35543b30e..f63107c96 100644
--- a/pkg/client-go/informers/externalversions/internalinterfaces/factory_interfaces.go
+++ b/pkg/client-go/informers/externalversions/internalinterfaces/factory_interfaces.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/pkg/client-go/listers/device/v1alpha1/expansion_generated.go b/pkg/client-go/listers/device/v1alpha1/expansion_generated.go
index 1aa65cee4..011529aa5 100644
--- a/pkg/client-go/listers/device/v1alpha1/expansion_generated.go
+++ b/pkg/client-go/listers/device/v1alpha1/expansion_generated.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/pkg/client-go/listers/device/v1alpha1/gpu.go b/pkg/client-go/listers/device/v1alpha1/gpu.go
index 709bd429f..2ea778590 100644
--- a/pkg/client-go/listers/device/v1alpha1/gpu.go
+++ b/pkg/client-go/listers/device/v1alpha1/gpu.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
diff --git a/pkg/controlplane/apiserver/config.go b/pkg/controlplane/apiserver/config.go
index bb3d8bff7..0fa090d3b 100644
--- a/pkg/controlplane/apiserver/config.go
+++ b/pkg/controlplane/apiserver/config.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
@@ -30,7 +30,7 @@ import (
 	"github.com/nvidia/nvsentinel/pkg/controlplane/apiserver/metrics"
 	"github.com/nvidia/nvsentinel/pkg/controlplane/apiserver/options"
 	"github.com/nvidia/nvsentinel/pkg/controlplane/apiserver/registry"
-	"github.com/nvidia/nvsentinel/pkg/util/version"
+	"github.com/nvidia/nvsentinel/pkg/version"
 )
 
 type Config struct {
diff --git a/pkg/controlplane/apiserver/metrics/metrics.go b/pkg/controlplane/apiserver/metrics/metrics.go
index 98056ec81..2618ebebc 100644
--- a/pkg/controlplane/apiserver/metrics/metrics.go
+++ b/pkg/controlplane/apiserver/metrics/metrics.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
@@ -18,7 +18,7 @@ import (
 	"sync"
 
 	grpcprom "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus"
-	"github.com/nvidia/nvsentinel/pkg/util/version"
+	"github.com/nvidia/nvsentinel/pkg/version"
 	"github.com/prometheus/client_golang/prometheus"
 	"google.golang.org/grpc"
 	"k8s.io/klog/v2"
@@ -31,14 +31,18 @@ type ServerMetrics struct {
 	Registry            *prometheus.Registry
 	Collectors          *grpcprom.ServerMetrics
 	ServiceHealthStatus *prometheus.GaugeVec
+	mu                  sync.Mutex
 	buildInfoLabels     prometheus.Labels
 	registerOnce        sync.Once
 }
 
 // WithBuildInfo populates the metadata labels used by the build_info metric.
+// Must be called before Register() and only from a single goroutine (typically during init).
 func (m *ServerMetrics) WithBuildInfo(info version.Info) *ServerMetrics {
+	m.mu.Lock()
+	defer m.mu.Unlock()
 	m.buildInfoLabels = prometheus.Labels{
-		"version":    info.GitVersion,
+		"version":    info.Version,
 		"revision":   info.GitCommit,
 		"build_date": info.BuildDate,
 		"goversion":  info.GoVersion,
@@ -79,11 +83,15 @@ func (m *ServerMetrics) Register() {
 			klog.ErrorS(err, "Failed to register service health metrics")
 		}
 
-		if m.buildInfoLabels != nil {
+		m.mu.Lock()
+		labels := m.buildInfoLabels
+		m.mu.Unlock()
+
+		if labels != nil {
 			version := prometheus.NewGauge(prometheus.GaugeOpts{
 				Name:        "device_apiserver_build_info",
 				Help:        "Build information about the device-apiserver binary.",
-				ConstLabels: m.buildInfoLabels,
+				ConstLabels: labels,
 			})
 			version.Set(1)
 
diff --git a/pkg/controlplane/apiserver/options/grpc/options.go b/pkg/controlplane/apiserver/options/grpc/options.go
index 238700c8b..ff46b4728 100644
--- a/pkg/controlplane/apiserver/options/grpc/options.go
+++ b/pkg/controlplane/apiserver/options/grpc/options.go
@@ -1,4 +1,4 @@
-// Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -125,8 +125,6 @@ func (o *Options) Complete() (CompletedOptions, error) {
 		o.MinPingInterval = 5 * time.Second
 	}
 
-	o.PermitWithoutStream = true
-
 	completed := completedOptions{
 		Options: *o,
 	}
@@ -197,12 +195,6 @@ func (o *Options) Validate() []error {
 				o.MinPingInterval))
 	}
 
-	if !o.PermitWithoutStream {
-		allErrors = append(allErrors,
-			fmt.Errorf("permit-without-stream: %v must be true to allow keepalive pings without active streams",
-				o.PermitWithoutStream))
-	}
-
 	return allErrors
 }
 
diff --git a/pkg/controlplane/apiserver/options/grpc/options_test.go b/pkg/controlplane/apiserver/options/grpc/options_test.go
index eb725f423..f39e52f4c 100644
--- a/pkg/controlplane/apiserver/options/grpc/options_test.go
+++ b/pkg/controlplane/apiserver/options/grpc/options_test.go
@@ -1,4 +1,4 @@
-// Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -75,9 +75,8 @@ func TestComplete(t *testing.T) {
 		if completed.MaxRecvMsgSize != 4194304 {
 			t.Errorf("expected default recv size 4MiB, got %d", completed.MaxRecvMsgSize)
 		}
-		if !completed.PermitWithoutStream {
-			t.Error("PermitWithoutStream should be forced to true")
-		}
+		// PermitWithoutStream defaults to true via NewOptions(), not forced by Complete().
+		// A zero-value Options{} will have PermitWithoutStream=false since there is no flag for it.
 	})
 
 	t.Run("Preserve user overrides", func(t *testing.T) {
diff --git a/pkg/controlplane/apiserver/options/options.go b/pkg/controlplane/apiserver/options/options.go
index 113523ef5..c6b5b9470 100644
--- a/pkg/controlplane/apiserver/options/options.go
+++ b/pkg/controlplane/apiserver/options/options.go
@@ -1,4 +1,4 @@
-// Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@ package options
 import (
 	"context"
 	"fmt"
+	"net"
 	"os"
 	"strings"
 	"time"
@@ -116,6 +117,8 @@ func (o *Options) Complete(ctx context.Context) (CompletedOptions, error) {
 	o.NodeName = strings.ToLower(strings.TrimSpace(o.NodeName)) //nolint:wsl
 
 	if o.HealthAddress == "" {
+		// Default binds to all interfaces for Kubernetes kubelet health probes.
+		// Use NetworkPolicy to restrict access in production.
 		o.HealthAddress = ":50051"
 	}
 
@@ -124,6 +127,8 @@ func (o *Options) Complete(ctx context.Context) (CompletedOptions, error) {
 	}
 
 	if o.MetricsAddress == "" {
+		// Default binds to all interfaces for Prometheus scraping.
+		// Use NetworkPolicy to restrict access in production.
 		o.MetricsAddress = ":9090"
 	}
 
@@ -203,10 +208,15 @@ func (o *CompletedOptions) Validate() []error {
 		}
 	}
 
-	if o.HealthAddress != "" && o.HealthAddress == o.MetricsAddress {
-		allErrors = append(allErrors,
-			fmt.Errorf("health-probe-bind-address and metrics-bind-address: must not be the same (%s)",
-				o.HealthAddress))
+	if o.HealthAddress != "" && o.MetricsAddress != "" {
+		_, healthPort, _ := net.SplitHostPort(o.HealthAddress)
+		_, metricsPort, _ := net.SplitHostPort(o.MetricsAddress)
+
+		if healthPort != "" && healthPort == metricsPort {
+			allErrors = append(allErrors,
+				fmt.Errorf("health-probe-bind-address and metrics-bind-address: must not use the same port (%s)",
+					healthPort))
+		}
 	}
 
 	if o.ShutdownGracePeriod < 0 {
diff --git a/pkg/controlplane/apiserver/options/options_test.go b/pkg/controlplane/apiserver/options/options_test.go
index 0f905a6bc..1108e9f7e 100644
--- a/pkg/controlplane/apiserver/options/options_test.go
+++ b/pkg/controlplane/apiserver/options/options_test.go
@@ -1,4 +1,4 @@
-// Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -158,7 +158,7 @@ func TestValidate(t *testing.T) {
 				o.MetricsAddress = ":8080"
 			},
 			wantErr:     true,
-			errContains: "must not be the same (:8080)",
+			errContains: "must not use the same port (8080)",
 		},
 		{
 			name: "Negative service monitor period",
diff --git a/pkg/controlplane/apiserver/server.go b/pkg/controlplane/apiserver/server.go
index a7a616aa9..41ecdb00a 100644
--- a/pkg/controlplane/apiserver/server.go
+++ b/pkg/controlplane/apiserver/server.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
@@ -28,7 +28,7 @@ import (
 	"github.com/nvidia/nvsentinel/pkg/controlplane/apiserver/metrics"
 	"github.com/nvidia/nvsentinel/pkg/storage/storagebackend"
 	netutils "github.com/nvidia/nvsentinel/pkg/util/net"
-	"github.com/nvidia/nvsentinel/pkg/util/version"
+	"github.com/nvidia/nvsentinel/pkg/version"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promhttp"
 	"google.golang.org/grpc"
@@ -36,6 +36,7 @@ import (
 	"google.golang.org/grpc/health"
 	healthpb "google.golang.org/grpc/health/grpc_health_v1"
 	"google.golang.org/grpc/reflection"
+	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/klog/v2"
 )
 
@@ -51,7 +52,6 @@ type DeviceAPIServer struct {
 	AdminServer      *grpc.Server
 	AdminCleanup     func()
 	Metrics          *metrics.ServerMetrics
-	MetricsRegistry  *prometheus.Registry
 	Storage          *storagebackend.Storage
 	ServiceProviders []api.ServiceProvider
 	mu               sync.RWMutex
@@ -92,9 +92,16 @@ func (s *DeviceAPIServer) PrepareRun(ctx context.Context) (preparedDeviceAPIServ
 	if s.HealthAddress != "" {
 		s.HealthServer = health.NewServer()
 		healthpb.RegisterHealthServer(s.AdminServer, s.HealthServer)
+		// Also register on DeviceServer so sidecar providers connecting via
+		// unix socket can perform health checks without a separate connection.
+		healthpb.RegisterHealthServer(s.DeviceServer, s.HealthServer)
 		s.HealthServer.SetServingStatus("", healthpb.HealthCheckResponse_NOT_SERVING)
 	}
 
+	// Enable gRPC reflection on both servers. This is intentional:
+	// - DeviceServer: allows grpcurl/grpc_cli debugging
+	// - AdminServer: required for channelz and admin tooling
+	// To restrict in production, use NetworkPolicy on the admin port.
 	reflection.Register(s.DeviceServer)
 	reflection.Register(s.AdminServer)
 
@@ -139,13 +146,27 @@ func (s *DeviceAPIServer) run(ctx context.Context) error {
 		go func() {
 			defer s.wg.Done()
 
+			defer func() {
+				if r := recover(); r != nil {
+					klog.ErrorS(nil, "Health monitor panicked, setting NOT_SERVING", "panic", r)
+
+					if s.HealthServer != nil {
+						s.HealthServer.SetServingStatus("", healthpb.HealthCheckResponse_NOT_SERVING)
+					}
+				}
+			}()
+
 			s.monitorServiceHealth(ctx)
 		}()
 	}
 
 	if s.MetricsAddress != "" {
-		// TODO: put in wg??
-		go s.serveMetrics(ctx)
+		s.wg.Add(1)
+		go func() {
+			defer s.wg.Done()
+
+			s.serveMetrics(ctx)
+		}()
 	}
 
 	if err := s.waitForStorage(ctx); err != nil {
@@ -174,7 +195,18 @@ func (s *DeviceAPIServer) run(ctx context.Context) error {
 		s.DeviceServer.GracefulStop()
 
 		if s.AdminServer != nil {
-			s.AdminServer.GracefulStop()
+			adminDone := make(chan struct{})
+			go func() {
+				s.AdminServer.GracefulStop()
+				close(adminDone)
+			}()
+
+			select {
+			case <-adminDone:
+			case <-time.After(s.ShutdownGracePeriod):
+				logger.V(2).Info("AdminServer graceful stop timed out, forcing stop")
+				s.AdminServer.Stop()
+			}
 		}
 
 		if s.AdminCleanup != nil {
@@ -214,14 +246,17 @@ func (s *DeviceAPIServer) serveHealth(ctx context.Context) {
 	// to unblock Serve and reject new conns.
 	go func() {
 		<-ctx.Done()
-		lis.Close()
+
+		if err := lis.Close(); err != nil {
+			logger.Error(err, "Failed to close health listener", "address", s.HealthAddress)
+		}
 	}()
 
 	logger.V(2).Info("Starting health server", "address", s.HealthAddress)
 
 	serveErr := s.AdminServer.Serve(lis)
 	if serveErr != nil && !errors.Is(serveErr, grpc.ErrServerStopped) && !errors.Is(serveErr, net.ErrClosed) {
-		logger.Error(err, "Health server stopped unexpectedly")
+		logger.Error(serveErr, "Health server stopped unexpectedly")
 	}
 }
 
@@ -268,7 +303,7 @@ func (s *DeviceAPIServer) serveMetrics(ctx context.Context) {
 
 	serveErr := metricsSrv.Serve(lis)
 	if serveErr != nil && !errors.Is(serveErr, http.ErrServerClosed) && !errors.Is(serveErr, net.ErrClosed) {
-		logger.Error(err, "Metrics server stopped unexpectedly", "address", s.MetricsAddress)
+		logger.Error(serveErr, "Metrics server stopped unexpectedly", "address", s.MetricsAddress)
 	}
 }
 
@@ -277,48 +312,40 @@ func (s *DeviceAPIServer) waitForStorage(ctx context.Context) error {
 		return fmt.Errorf("storage backend is not initialized")
 	}
 
-	logger := klog.FromContext(ctx)
-	startTime := time.Now()
-
 	if s.Storage.IsReady() {
 		return nil
 	}
 
-	pollTicker := time.NewTicker(200 * time.Millisecond)
-	defer pollTicker.Stop()
-
-	heartbeat := time.NewTicker(5 * time.Second)
-	defer heartbeat.Stop()
-
+	logger := klog.FromContext(ctx)
 	logger.Info("Waiting for storage backend to become ready")
+	startTime := time.Now()
 
-	for {
-		select {
-		case <-ctx.Done():
-			return ctx.Err()
-
-		case <-pollTicker.C:
+	err := wait.PollUntilContextTimeout(ctx, 200*time.Millisecond, 60*time.Second, true,
+		func(ctx context.Context) (bool, error) {
 			if s.Storage.IsReady() {
 				logger.V(2).Info("Storage backend is ready",
 					"duration", time.Since(startTime).Round(time.Second))
-				return nil
+				return true, nil
 			}
 
-		case <-heartbeat.C:
-			logger.V(2).Info("Still waiting for storage backend",
-				"elapsed", time.Since(startTime).Round(time.Second))
-		}
+			return false, nil
+		},
+	)
+	if err != nil {
+		return fmt.Errorf("timed out waiting for storage backend readiness: %w", err)
 	}
+
+	return nil
 }
 
 func (s *DeviceAPIServer) installAPIServices(ctx context.Context) error {
 	logger := klog.FromContext(ctx)
 
 	var services []api.Service
-	for _, sp := range s.ServiceProviders {
+	for i, sp := range s.ServiceProviders {
 		service, err := sp.Install(s.DeviceServer, s.Storage.StorageConfig)
 		if err != nil {
-			return fmt.Errorf("failed to install API service: %w", err)
+			return fmt.Errorf("failed to install API service (index %d): %w", i, err)
 		}
 
 		services = append(services, service)
diff --git a/pkg/grpc/client/client_conn.go b/pkg/grpc/client/client_conn.go
index 1563e6d6a..5a19b3810 100644
--- a/pkg/grpc/client/client_conn.go
+++ b/pkg/grpc/client/client_conn.go
@@ -1,4 +1,4 @@
-// Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@ package client
 
 import (
 	"fmt"
+	"strings"
 
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/credentials/insecure"
@@ -39,6 +40,15 @@ func ClientConnFor(config *Config, opts ...DialOption) (*grpc.ClientConn, error)
 		return nil, err
 	}
 
+	// Insecure credentials are only safe over Unix domain sockets.
+	// TLS is required for non-UDS targets (dns:, passthrough:).
+	if !strings.HasPrefix(cfg.Target, "unix://") && !strings.HasPrefix(cfg.Target, "unix:") {
+		return nil, fmt.Errorf(
+			"insecure credentials require unix:// target, got %q; TLS is required for non-UDS targets",
+			cfg.Target,
+		)
+	}
+
 	logger := cfg.GetLogger()
 
 	grpcOpts := []grpc.DialOption{
diff --git a/pkg/grpc/client/client_conn_test.go b/pkg/grpc/client/client_conn_test.go
index 18f9d1864..b8589fad4 100644
--- a/pkg/grpc/client/client_conn_test.go
+++ b/pkg/grpc/client/client_conn_test.go
@@ -1,4 +1,4 @@
-// Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -15,6 +15,7 @@
 package client
 
 import (
+	"strings"
 	"testing"
 
 	"github.com/go-logr/logr"
@@ -54,4 +55,18 @@ func TestClientConnFor(t *testing.T) {
 		}
 		conn.Close()
 	})
+
+	t.Run("Rejects non-unix target with insecure credentials", func(t *testing.T) {
+		cfg := &Config{
+			Target:    "dns:///localhost:8080",
+			UserAgent: "test/1.0",
+		}
+		_, err := ClientConnFor(cfg)
+		if err == nil {
+			t.Fatal("expected error for non-unix target with insecure credentials")
+		}
+		if !strings.Contains(err.Error(), "insecure credentials require unix://") {
+			t.Errorf("unexpected error message: %v", err)
+		}
+	})
 }
diff --git a/pkg/grpc/client/config.go b/pkg/grpc/client/config.go
index 308e72bef..1697845df 100644
--- a/pkg/grpc/client/config.go
+++ b/pkg/grpc/client/config.go
@@ -1,4 +1,4 @@
-// Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -17,10 +17,11 @@ package client
 import (
 	"fmt"
 	"os"
+	"strings"
 	"time"
 
 	"github.com/go-logr/logr"
-	"github.com/nvidia/nvsentinel/pkg/util/version"
+	"github.com/nvidia/nvsentinel/pkg/version"
 )
 
 const (
@@ -73,6 +74,12 @@ func (c *Config) Validate() error {
 		return fmt.Errorf("gRPC target address is required; verify %s is not empty", NvidiaDeviceAPITargetEnvVar)
 	}
 
+	// Validate target scheme
+	if !strings.HasPrefix(c.Target, "unix://") && !strings.HasPrefix(c.Target, "unix:") &&
+		!strings.HasPrefix(c.Target, "dns:") && !strings.HasPrefix(c.Target, "passthrough:") {
+		return fmt.Errorf("gRPC target %q must use unix://, dns:, or passthrough: scheme", c.Target)
+	}
+
 	if c.UserAgent == "" {
 		return fmt.Errorf("user-agent cannot be empty")
 	}
diff --git a/pkg/grpc/client/config_test.go b/pkg/grpc/client/config_test.go
index 8cb550ed3..048b54e13 100644
--- a/pkg/grpc/client/config_test.go
+++ b/pkg/grpc/client/config_test.go
@@ -1,4 +1,4 @@
-// Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -86,13 +86,53 @@ func TestConfig_Validate(t *testing.T) {
 		wantErr bool
 	}{
 		{
-			name: "Valid config",
+			name: "Valid unix:/// config",
 			cfg: Config{
 				Target:    "unix:///var/run/test.sock",
 				UserAgent: "test/1.0",
 			},
 			wantErr: false,
 		},
+		{
+			name: "Valid unix: config",
+			cfg: Config{
+				Target:    "unix:/var/run/test.sock",
+				UserAgent: "test/1.0",
+			},
+			wantErr: false,
+		},
+		{
+			name: "Valid dns: config",
+			cfg: Config{
+				Target:    "dns:///localhost:8080",
+				UserAgent: "test/1.0",
+			},
+			wantErr: false,
+		},
+		{
+			name: "Valid passthrough: config",
+			cfg: Config{
+				Target:    "passthrough:///localhost:8080",
+				UserAgent: "test/1.0",
+			},
+			wantErr: false,
+		},
+		{
+			name: "Rejects http scheme",
+			cfg: Config{
+				Target:    "http://evil.com",
+				UserAgent: "test/1.0",
+			},
+			wantErr: true,
+		},
+		{
+			name: "Rejects bare hostname",
+			cfg: Config{
+				Target:    "somehost:1234",
+				UserAgent: "test/1.0",
+			},
+			wantErr: true,
+		},
 		{
 			name: "Missing target",
 			cfg: Config{
diff --git a/pkg/grpc/client/interceptors.go b/pkg/grpc/client/interceptors.go
index 796a34e50..c8e9e391c 100644
--- a/pkg/grpc/client/interceptors.go
+++ b/pkg/grpc/client/interceptors.go
@@ -1,4 +1,4 @@
-// Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -46,7 +46,8 @@ func NewLatencyUnaryInterceptor(logger logr.Logger) grpc.UnaryClientInterceptor
 				return err
 			}
 
-			logger.Error(err, "RPC failed", kv...)
+			logger.V(4).Info("RPC error details", "error", err)
+			logger.Error(nil, "RPC failed", kv...)
 
 			return err
 		}
@@ -81,7 +82,8 @@ func NewLatencyStreamInterceptor(logger logr.Logger) grpc.StreamClientIntercepto
 				return stream, err
 			}
 
-			logger.Error(err, "Stream establishment failed", kv...)
+			logger.V(4).Info("Stream error details", "error", err)
+			logger.Error(nil, "Stream establishment failed", kv...)
 
 			return stream, err
 		}
diff --git a/pkg/grpc/client/watcher.go b/pkg/grpc/client/watcher.go
index 5972ef536..f688d550e 100644
--- a/pkg/grpc/client/watcher.go
+++ b/pkg/grpc/client/watcher.go
@@ -1,4 +1,4 @@
-// Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -17,8 +17,10 @@ package client
 import (
 	"context"
 	"errors"
+	"fmt"
 	"io"
 	"sync"
+	"time"
 
 	"github.com/go-logr/logr"
 	"google.golang.org/grpc/codes"
@@ -64,17 +66,16 @@ func NewWatcher(
 	return w
 }
 
-// Stop cancels the context and closes the event source.
+// Stop signals the receive loop to exit, cancels the context, and closes the event source.
 func (w *Watcher) Stop() {
 	w.stopOnce.Do(func() {
 		w.logger.V(4).Info("Stopping watcher")
-		w.cancel()
+		close(w.done) // Signal receive loop to exit first
+		w.cancel()    // Cancel the context
 
 		if err := w.source.Close(); err != nil {
 			w.logger.V(4).Info("Error closing source during stop", "err", err)
 		}
-
-		close(w.done)
 	})
 }
 
@@ -125,7 +126,7 @@ func (w *Watcher) receive() {
 
 			return
 		default:
-			w.logger.V(2).Info("Skipping unknown event type from server", "rawType", typeStr)
+			w.logger.V(1).Info("Skipping unknown event type from server", "rawType", typeStr)
 			continue
 		}
 
@@ -141,17 +142,26 @@ func (w *Watcher) receive() {
 					"resourceVersion", meta.GetResourceVersion(),
 				)
 			}
+		case <-time.After(30 * time.Second):
+			w.logger.Error(nil, "Event send timed out; consumer not reading, stopping watcher")
+			return
 		}
 	}
 }
 
 func (w *Watcher) sendError(err error) {
 	st := status.Convert(err)
-
 	code := st.Code()
+
+	// Log full error details at debug level only
+	w.logger.V(4).Info("Watch stream error",
+		"code", code,
+		"serverMessage", st.Message(),
+	)
+
 	statusErr := &metav1.Status{
 		Status:  metav1.StatusFailure,
-		Message: st.Message(),
+		Message: fmt.Sprintf("watch stream error: %s", code.String()),
 		Code:    int32(code), // #nosec G115
 	}
 
@@ -181,5 +191,7 @@ func (w *Watcher) sendError(err error) {
 	case <-w.done:
 		w.logger.V(4).Info("Watcher already done, dropping error event")
 	case w.result <- watch.Event{Type: watch.Error, Object: statusErr}:
+	case <-time.After(5 * time.Second):
+		w.logger.V(2).Info("Error event send timed out, dropping")
 	}
 }
diff --git a/pkg/providers/nvml/enumerator.go b/pkg/providers/nvml/enumerator.go
new file mode 100644
index 000000000..f1ac61b38
--- /dev/null
+++ b/pkg/providers/nvml/enumerator.go
@@ -0,0 +1,199 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build nvml
+
+package nvml
+
+import (
+	"fmt"
+	"time"
+
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	devicev1alpha1 "github.com/nvidia/nvsentinel/api/device/v1alpha1"
+)
+
+// enumerateDevices discovers all GPUs via NVML and registers them via gRPC.
+//
+// For each GPU found, it extracts device information and creates a GPU entry
+// via the GpuService API with an initial "NVMLReady" condition set to True.
+//
+// Returns the number of GPUs discovered.
+func (p *Provider) enumerateDevices() (int, error) {
+	count, ret := p.nvmllib.DeviceGetCount()
+	if ret != nvml.SUCCESS {
+		return 0, fmt.Errorf("failed to get device count: %v", nvml.ErrorString(ret))
+	}
+
+	if count == 0 {
+		p.logger.Info("No GPUs found on this node")
+		return 0, nil
+	}
+
+	p.logger.V(1).Info("Enumerating GPUs", "count", count)
+
+	successCount := 0
+	uuids := make([]string, 0, count)
+
+	for i := 0; i < count; i++ {
+		device, ret := p.nvmllib.DeviceGetHandleByIndex(i)
+		if ret != nvml.SUCCESS {
+			p.logger.Error(nil, "Failed to get device handle", "index", i, "error", nvml.ErrorString(ret))
+
+			continue
+		}
+
+		gpu, productName, memoryBytes, err := p.deviceToGpu(i, device)
+		if err != nil {
+			p.logger.Error(err, "Failed to get GPU info", "index", i)
+
+			continue
+		}
+
+		// Register GPU via typed client (Create is idempotent -- returns existing GPU if already registered)
+		_, err = p.client.Create(p.ctx, gpu, metav1.CreateOptions{})
+		if err != nil {
+			p.logger.Error(err, "Failed to create GPU via gRPC", "uuid", gpu.Name)
+
+			continue
+		}
+
+		// Track UUID for health monitoring
+		uuids = append(uuids, gpu.Name)
+
+		p.logger.Info("GPU registered",
+			"uuid", gpu.Name,
+			"productName", productName,
+			"memory", FormatBytes(memoryBytes),
+		)
+
+		successCount++
+	}
+
+	// Assign tracked UUIDs atomically (caller holds p.mu)
+	p.gpuUUIDs = uuids
+
+	return successCount, nil
+}
+
+// deviceToGpu extracts GPU information from an NVML device handle.
+// Returns the GPU object, product name, and memory bytes (for logging).
+func (p *Provider) deviceToGpu(index int, device Device) (*devicev1alpha1.GPU, string, uint64, error) {
+	// Get UUID (required)
+	uuid, ret := device.GetUUID()
+	if ret != nvml.SUCCESS {
+		return nil, "", 0, fmt.Errorf("failed to get UUID: %v", nvml.ErrorString(ret))
+	}
+
+	// Get memory info (for logging)
+	var memoryBytes uint64
+
+	memInfo, ret := device.GetMemoryInfo()
+	if ret == nvml.SUCCESS {
+		memoryBytes = memInfo.Total
+	}
+
+	// Get product name (for logging)
+	productName, ret := device.GetName()
+	if ret != nvml.SUCCESS {
+		productName = "Unknown"
+	}
+
+	// Build GPU object using K8s-native types
+	now := metav1.Now()
+	gpu := &devicev1alpha1.GPU{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: uuid,
+		},
+		Spec: devicev1alpha1.GPUSpec{
+			UUID: uuid,
+		},
+		Status: devicev1alpha1.GPUStatus{
+			Conditions: []metav1.Condition{
+				{
+					Type:               ConditionTypeNVMLReady,
+					Status:             metav1.ConditionStatus(ConditionStatusTrue),
+					Reason:             "Initialized",
+					Message:            fmt.Sprintf("GPU enumerated via NVML: %s (%s)", productName, FormatBytes(memoryBytes)),
+					LastTransitionTime: now,
+				},
+			},
+		},
+	}
+
+	return gpu, productName, memoryBytes, nil
+}
+
+// UpdateCondition updates a single condition on a GPU via the typed client.
+//
+// This method:
+// 1. Gets the current GPU state
+// 2. Updates/adds the condition in the status
+// 3. Sends the updated status via UpdateStatus (status subresource)
+//
+// The condition's LastTransitionTime is set to the current time.
+func (p *Provider) UpdateCondition(
+	uuid string,
+	conditionType string,
+	conditionStatus string,
+	reason, message string,
+) error {
+	// Get current GPU state
+	gpu, err := p.client.Get(p.ctx, uuid, metav1.GetOptions{})
+	if err != nil {
+		return fmt.Errorf("failed to get GPU %s: %w", uuid, err)
+	}
+
+	if gpu == nil {
+		return fmt.Errorf("Get returned nil for %s", uuid)
+	}
+
+	// Build the new condition
+	condition := metav1.Condition{
+		Type:               conditionType,
+		Status:             metav1.ConditionStatus(conditionStatus),
+		Reason:             reason,
+		Message:            message,
+		LastTransitionTime: metav1.NewTime(time.Now()),
+	}
+
+	// Find and replace existing condition, or append
+	found := false
+	for i, existing := range gpu.Status.Conditions {
+		if existing.Type == conditionType {
+			gpu.Status.Conditions[i] = condition
+			found = true
+			break
+		}
+	}
+	if !found {
+		gpu.Status.Conditions = append(gpu.Status.Conditions, condition)
+	}
+
+	// Cap conditions to prevent unbounded growth
+	const maxConditions = 100
+	if len(gpu.Status.Conditions) > maxConditions {
+		gpu.Status.Conditions = gpu.Status.Conditions[len(gpu.Status.Conditions)-maxConditions:]
+	}
+
+	// Update the GPU status via the status subresource
+	_, err = p.client.UpdateStatus(p.ctx, gpu, metav1.UpdateOptions{})
+	if err != nil {
+		return fmt.Errorf("failed to update GPU status %s: %w", uuid, err)
+	}
+
+	return nil
+}
diff --git a/pkg/providers/nvml/health_monitor.go b/pkg/providers/nvml/health_monitor.go
new file mode 100644
index 000000000..5169b3d79
--- /dev/null
+++ b/pkg/providers/nvml/health_monitor.go
@@ -0,0 +1,282 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build nvml
+
+package nvml
+
+import (
+	"fmt"
+	"time"
+
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+)
+
+// HealthMonitor monitors GPU health via NVML events.
+type HealthMonitor struct {
+	provider *Provider
+}
+
+// EventTimeout is the timeout for NVML event wait (in milliseconds).
+const EventTimeout = 5000
+
+// unknownUUID is used when UUID cannot be retrieved.
+const unknownUUID = "unknown"
+
+// startHealthMonitoring initializes and starts XID event monitoring.
+func (p *Provider) startHealthMonitoring() error {
+	// Create event set
+	eventSet, ret := p.nvmllib.EventSetCreate()
+	if ret != nvml.SUCCESS {
+		return fmt.Errorf("failed to create event set: %v", nvml.ErrorString(ret))
+	}
+
+	p.eventSet = eventSet
+
+	// Register for health events on all GPUs
+	eventMask := uint64(
+		nvml.EventTypeXidCriticalError |
+			nvml.EventTypeDoubleBitEccError |
+			nvml.EventTypeSingleBitEccError,
+	)
+
+	count, ret := p.nvmllib.DeviceGetCount()
+	if ret != nvml.SUCCESS {
+		_ = p.eventSet.Free()
+		p.eventSet = nil
+		return fmt.Errorf("failed to get device count for health monitoring: %v", nvml.ErrorString(ret))
+	}
+
+	registeredCount := 0
+
+	for i := 0; i < count; i++ {
+		device, ret := p.nvmllib.DeviceGetHandleByIndex(i)
+		if ret != nvml.SUCCESS {
+			continue
+		}
+
+		uuid, ret := device.GetUUID()
+		if ret != nvml.SUCCESS {
+			p.logger.V(1).Info("Failed to get device UUID for health monitoring, skipping",
+				"index", i,
+				"error", nvml.ErrorString(ret),
+			)
+			continue
+		}
+
+		// Get supported events for this device
+		supportedEvents, ret := device.GetSupportedEventTypes()
+		if ret != nvml.SUCCESS {
+			p.logger.V(1).Info("Device does not support event queries",
+				"index", i,
+				"uuid", uuid,
+				"error", nvml.ErrorString(ret),
+			)
+
+			continue
+		}
+
+		// Register only supported events
+		eventsToRegister := eventMask & supportedEvents
+		if eventsToRegister == 0 {
+			p.logger.V(1).Info("Device does not support any health events",
+				"index", i,
+				"uuid", uuid,
+			)
+
+			continue
+		}
+
+		ret = device.RegisterEvents(eventsToRegister, p.eventSet.Raw())
+		if ret == nvml.ERROR_NOT_SUPPORTED {
+			p.logger.V(1).Info("Device too old for health monitoring",
+				"index", i,
+				"uuid", uuid,
+			)
+
+			continue
+		}
+
+		if ret != nvml.SUCCESS {
+			p.logger.Error(nil, "Failed to register events",
+				"index", i,
+				"uuid", uuid,
+				"error", nvml.ErrorString(ret),
+			)
+
+			continue
+		}
+
+		registeredCount++
+
+		p.logger.V(2).Info("Registered health events",
+			"index", i,
+			"uuid", uuid,
+			"events", eventsToRegister,
+		)
+	}
+
+	if registeredCount == 0 {
+		_ = p.eventSet.Free()
+		p.eventSet = nil
+
+		return fmt.Errorf("no devices support health event monitoring")
+	}
+
+	p.logger.Info("Starting health monitoring", "devices", registeredCount)
+
+	// Create health monitor
+	p.healthMonitor = &HealthMonitor{provider: p}
+
+	// Start monitoring goroutine
+	p.wg.Add(1)
+
+	go p.runHealthMonitor()
+
+	p.monitorRunning = true
+
+	return nil
+}
+
+// runHealthMonitor is the main health monitoring loop.
+//
+// The loop checks for context cancellation before each iteration to ensure
+// prompt shutdown when requested. The processEvents() call blocks for up to
+// EventTimeout milliseconds waiting for NVML events.
+func (p *Provider) runHealthMonitor() {
+	defer p.wg.Done()
+
+	p.logger.V(1).Info("Health monitor started")
+
+	for {
+		// Check for shutdown before processing events.
+		// This ensures we respond promptly to cancellation rather than
+		// waiting for the next event timeout cycle.
+		select {
+		case <-p.ctx.Done():
+			p.logger.V(1).Info("Health monitor stopping")
+			return
+		default:
+		}
+
+		p.processEvents()
+	}
+}
+
+// processEvents waits for and processes NVML events.
+func (p *Provider) processEvents() {
+	event, ret := p.eventSet.Wait(EventTimeout)
+
+	if ret == nvml.ERROR_TIMEOUT {
+		// Normal timeout, continue
+		return
+	}
+
+	if ret != nvml.SUCCESS {
+		if ret == nvml.ERROR_GPU_IS_LOST {
+			p.logger.Error(nil, "GPU lost detected, marking all GPUs unhealthy")
+			p.markAllUnhealthy("GPULost", "GPU is lost error detected")
+
+			return
+		}
+
+		p.logger.V(2).Info("Error waiting for event",
+			"error", nvml.ErrorString(ret),
+		)
+
+		// Brief sleep to avoid tight loop on persistent errors
+		time.Sleep(100 * time.Millisecond)
+
+		return
+	}
+
+	// Process the event
+	p.handleEvent(event)
+}
+
+// handleEvent processes a single NVML event.
+func (p *Provider) handleEvent(event nvml.EventData) {
+	eventType := event.EventType
+	xid := event.EventData
+	gpuInstanceID := event.GpuInstanceId
+	computeInstanceID := event.ComputeInstanceId
+
+	// Get UUID for logging
+	uuid := unknownUUID
+
+	if event.Device != nil {
+		if u, ret := event.Device.GetUUID(); ret == nvml.SUCCESS {
+			uuid = u
+		}
+	}
+
+	// Only process XID critical errors for health changes
+	if eventType != nvml.EventTypeXidCriticalError {
+		p.logger.V(2).Info("Non-critical event received",
+			"uuid", uuid,
+			"eventType", eventType,
+			"xid", xid,
+		)
+
+		return
+	}
+
+	// Check if this XID should be ignored
+	if isIgnoredXid(xid, p.additionalIgnoredXids) {
+		p.logger.V(2).Info("Ignoring non-critical XID",
+			"uuid", uuid,
+			"xid", xid,
+			"gpuInstanceId", gpuInstanceID,
+			"computeInstanceId", computeInstanceID,
+		)
+
+		return
+	}
+
+	// Critical XID - mark GPU unhealthy
+	p.logger.Info("Critical XID error detected",
+		"uuid", uuid,
+		"xid", xid,
+		"xidName", xidToString(xid),
+		"gpuInstanceId", gpuInstanceID,
+		"computeInstanceId", computeInstanceID,
+	)
+
+	message := fmt.Sprintf("Critical XID error %d (%s) detected", xid, xidToString(xid))
+	if err := p.UpdateCondition(uuid, ConditionTypeNVMLReady, ConditionStatusFalse, "XidError", message); err != nil {
+		p.logger.Error(err, "Failed to update GPU condition", "uuid", uuid)
+	}
+}
+
+// markAllUnhealthy marks all tracked GPUs as unhealthy.
+func (p *Provider) markAllUnhealthy(reason, message string) {
+	p.mu.RLock()
+	uuids := make([]string, len(p.gpuUUIDs))
+	copy(uuids, p.gpuUUIDs)
+	p.mu.RUnlock()
+
+	for _, uuid := range uuids {
+		err := p.UpdateCondition(uuid, ConditionTypeNVMLReady, ConditionStatusFalse, reason, message)
+		if err != nil {
+			p.logger.Error(err, "Failed to mark GPU unhealthy", "uuid", uuid)
+		}
+	}
+}
+
+// MarkHealthy marks a specific GPU as healthy.
+//
+// This can be called to restore a GPU's health status after recovery.
+func (p *Provider) MarkHealthy(uuid string) error {
+	return p.UpdateCondition(uuid, ConditionTypeNVMLReady, ConditionStatusTrue, "Healthy", "GPU is healthy")
+}
diff --git a/pkg/providers/nvml/interface.go b/pkg/providers/nvml/interface.go
new file mode 100644
index 000000000..5b534b154
--- /dev/null
+++ b/pkg/providers/nvml/interface.go
@@ -0,0 +1,143 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build nvml
+
+package nvml
+
+import (
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+)
+
+// Library is the interface for NVML library operations.
+// This interface contains only the methods used by the Provider,
+// making it easier to mock for testing.
+type Library interface {
+	Init() nvml.Return
+	Shutdown() nvml.Return
+	SystemGetDriverVersion() (string, nvml.Return)
+	DeviceGetCount() (int, nvml.Return)
+	DeviceGetHandleByIndex(index int) (Device, nvml.Return)
+	EventSetCreate() (EventSet, nvml.Return)
+}
+
+// Device is the interface for NVML device operations.
+type Device interface {
+	GetUUID() (string, nvml.Return)
+	GetName() (string, nvml.Return)
+	GetMemoryInfo() (nvml.Memory, nvml.Return)
+	GetRetiredPagesPendingStatus() (nvml.EnableState, nvml.Return)
+	GetSupportedEventTypes() (uint64, nvml.Return)
+	RegisterEvents(eventTypes uint64, set nvml.EventSet) nvml.Return
+}
+
+// EventSet is the interface for NVML event set operations.
+type EventSet interface {
+	Wait(timeout uint32) (nvml.EventData, nvml.Return)
+	Free() nvml.Return
+	// Raw returns the underlying nvml.EventSet for use with RegisterEvents.
+	Raw() nvml.EventSet
+}
+
+// nvmlLibraryWrapper wraps the real nvml.Interface to implement Library.
+type nvmlLibraryWrapper struct {
+	lib nvml.Interface
+}
+
+// NewLibraryWrapper creates a Library wrapper around an nvml.Interface.
+func NewLibraryWrapper(lib nvml.Interface) Library {
+	return &nvmlLibraryWrapper{lib: lib}
+}
+
+func (w *nvmlLibraryWrapper) Init() nvml.Return {
+	return w.lib.Init()
+}
+
+func (w *nvmlLibraryWrapper) Shutdown() nvml.Return {
+	return w.lib.Shutdown()
+}
+
+func (w *nvmlLibraryWrapper) SystemGetDriverVersion() (string, nvml.Return) {
+	return w.lib.SystemGetDriverVersion()
+}
+
+func (w *nvmlLibraryWrapper) DeviceGetCount() (int, nvml.Return) {
+	return w.lib.DeviceGetCount()
+}
+
+func (w *nvmlLibraryWrapper) DeviceGetHandleByIndex(index int) (Device, nvml.Return) {
+	device, ret := w.lib.DeviceGetHandleByIndex(index)
+	if ret != nvml.SUCCESS {
+		return nil, ret
+	}
+
+	return &nvmlDeviceWrapper{device: device}, ret
+}
+
+func (w *nvmlLibraryWrapper) EventSetCreate() (EventSet, nvml.Return) {
+	es, ret := w.lib.EventSetCreate()
+	if ret != nvml.SUCCESS {
+		return nil, ret
+	}
+
+	return &nvmlEventSetWrapper{es: es}, ret
+}
+
+// nvmlDeviceWrapper wraps nvml.Device to implement Device.
+type nvmlDeviceWrapper struct {
+	device nvml.Device
+}
+
+func (w *nvmlDeviceWrapper) GetUUID() (string, nvml.Return) {
+	return w.device.GetUUID()
+}
+
+func (w *nvmlDeviceWrapper) GetName() (string, nvml.Return) {
+	return w.device.GetName()
+}
+
+func (w *nvmlDeviceWrapper) GetMemoryInfo() (nvml.Memory, nvml.Return) {
+	return w.device.GetMemoryInfo()
+}
+
+func (w *nvmlDeviceWrapper) GetRetiredPagesPendingStatus() (nvml.EnableState, nvml.Return) {
+	return w.device.GetRetiredPagesPendingStatus()
+}
+
+func (w *nvmlDeviceWrapper) GetSupportedEventTypes() (uint64, nvml.Return) {
+	return w.device.GetSupportedEventTypes()
+}
+
+func (w *nvmlDeviceWrapper) RegisterEvents(eventTypes uint64, set nvml.EventSet) nvml.Return {
+	return w.device.RegisterEvents(eventTypes, set)
+}
+
+// nvmlEventSetWrapper wraps nvml.EventSet to implement EventSet.
+type nvmlEventSetWrapper struct {
+	es nvml.EventSet
+}
+
+func (w *nvmlEventSetWrapper) Wait(timeout uint32) (nvml.EventData, nvml.Return) {
+	return w.es.Wait(timeout)
+}
+
+func (w *nvmlEventSetWrapper) Free() nvml.Return {
+	return w.es.Free()
+}
+
+// Raw returns the underlying nvml.EventSet for use with device.RegisterEvents.
+// This is needed because RegisterEvents expects the concrete nvml.EventSet type.
+func (w *nvmlEventSetWrapper) Raw() nvml.EventSet {
+	return w.es
+}
diff --git a/pkg/providers/nvml/mock_test.go b/pkg/providers/nvml/mock_test.go
new file mode 100644
index 000000000..05785ae64
--- /dev/null
+++ b/pkg/providers/nvml/mock_test.go
@@ -0,0 +1,245 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build nvml
+
+package nvml
+
+import (
+	"sync"
+
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+)
+
+// MockLibrary is a mock implementation of Library for testing.
+type MockLibrary struct {
+	// Init behavior
+	InitReturn nvml.Return
+
+	// Shutdown behavior
+	ShutdownReturn nvml.Return
+
+	// SystemGetDriverVersion behavior
+	DriverVersion       string
+	DriverVersionReturn nvml.Return
+
+	// DeviceGetCount behavior
+	DeviceCount       int
+	DeviceCountReturn nvml.Return
+
+	// Devices returns mock devices by index
+	Devices map[int]*MockDevice
+
+	// EventSetCreate behavior
+	EventSet             *MockEventSet
+	EventSetCreateReturn nvml.Return
+
+	// Track calls for verification
+	mu             sync.Mutex
+	InitCalled     bool
+	ShutdownCalled bool
+}
+
+// NewMockLibrary creates a new mock Library with defaults.
+func NewMockLibrary() *MockLibrary {
+	return &MockLibrary{
+		InitReturn:           nvml.SUCCESS,
+		ShutdownReturn:       nvml.SUCCESS,
+		DriverVersion:        "535.104.05",
+		DriverVersionReturn:  nvml.SUCCESS,
+		DeviceCount:          0,
+		DeviceCountReturn:    nvml.SUCCESS,
+		Devices:              make(map[int]*MockDevice),
+		EventSetCreateReturn: nvml.SUCCESS,
+	}
+}
+
+// AddDevice adds a mock device at the specified index.
+func (m *MockLibrary) AddDevice(index int, device *MockDevice) {
+	m.Devices[index] = device
+	m.DeviceCount = len(m.Devices)
+}
+
+// Init implements Library.
+func (m *MockLibrary) Init() nvml.Return {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.InitCalled = true
+
+	return m.InitReturn
+}
+
+// Shutdown implements Library.
+func (m *MockLibrary) Shutdown() nvml.Return {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.ShutdownCalled = true
+
+	return m.ShutdownReturn
+}
+
+// SystemGetDriverVersion implements Library.
+func (m *MockLibrary) SystemGetDriverVersion() (string, nvml.Return) {
+	return m.DriverVersion, m.DriverVersionReturn
+}
+
+// DeviceGetCount implements Library.
+func (m *MockLibrary) DeviceGetCount() (int, nvml.Return) {
+	return m.DeviceCount, m.DeviceCountReturn
+}
+
+// DeviceGetHandleByIndex implements Library.
+func (m *MockLibrary) DeviceGetHandleByIndex(index int) (Device, nvml.Return) {
+	if device, ok := m.Devices[index]; ok {
+		return device, nvml.SUCCESS
+	}
+
+	return nil, nvml.ERROR_NOT_FOUND
+}
+
+// EventSetCreate implements Library.
+func (m *MockLibrary) EventSetCreate() (EventSet, nvml.Return) {
+	if m.EventSet == nil {
+		m.EventSet = NewMockEventSet()
+	}
+
+	return m.EventSet, m.EventSetCreateReturn
+}
+
+// MockDevice is a mock implementation of Device.
+type MockDevice struct {
+	UUID                        string
+	UUIDReturn                  nvml.Return
+	Name                        string
+	NameReturn                  nvml.Return
+	MemoryInfo                  nvml.Memory
+	MemoryInfoReturn            nvml.Return
+	RetiredPagesPending         nvml.EnableState
+	RetiredPagesPendingReturn   nvml.Return
+	SupportedEvents             uint64
+	SupportedEventsReturn       nvml.Return
+	RegisterEventsReturn        nvml.Return
+}
+
+// NewMockDevice creates a new mock device with sensible defaults.
+func NewMockDevice(uuid, name string) *MockDevice {
+	return &MockDevice{
+		UUID:       uuid,
+		UUIDReturn: nvml.SUCCESS,
+		Name:       name,
+		NameReturn: nvml.SUCCESS,
+		MemoryInfo: nvml.Memory{
+			Total: 16 * 1024 * 1024 * 1024, // 16 GB
+			Free:  15 * 1024 * 1024 * 1024,
+			Used:  1 * 1024 * 1024 * 1024,
+		},
+		MemoryInfoReturn:      nvml.SUCCESS,
+		SupportedEvents:       uint64(nvml.EventTypeXidCriticalError | nvml.EventTypeDoubleBitEccError),
+		SupportedEventsReturn: nvml.SUCCESS,
+		RegisterEventsReturn:  nvml.SUCCESS,
+	}
+}
+
+// GetUUID implements Device.
+func (d *MockDevice) GetUUID() (string, nvml.Return) {
+	return d.UUID, d.UUIDReturn
+}
+
+// GetName implements Device.
+func (d *MockDevice) GetName() (string, nvml.Return) {
+	return d.Name, d.NameReturn
+}
+
+// GetMemoryInfo implements Device.
+func (d *MockDevice) GetMemoryInfo() (nvml.Memory, nvml.Return) {
+	return d.MemoryInfo, d.MemoryInfoReturn
+}
+
+// GetRetiredPagesPendingStatus implements Device.
+func (d *MockDevice) GetRetiredPagesPendingStatus() (nvml.EnableState, nvml.Return) {
+	return d.RetiredPagesPending, d.RetiredPagesPendingReturn
+}
+
+// GetSupportedEventTypes implements Device.
+func (d *MockDevice) GetSupportedEventTypes() (uint64, nvml.Return) {
+	return d.SupportedEvents, d.SupportedEventsReturn
+}
+
+// RegisterEvents implements Device.
+func (d *MockDevice) RegisterEvents(_ uint64, _ nvml.EventSet) nvml.Return {
+	return d.RegisterEventsReturn
+}
+
+// MockEventSet is a mock implementation of EventSet.
+type MockEventSet struct {
+	mu         sync.Mutex
+	events     []nvml.EventData
+	eventIdx   int
+	WaitReturn nvml.Return
+	FreeReturn nvml.Return
+	Freed      bool
+}
+
+// NewMockEventSet creates a new mock event set.
+func NewMockEventSet() *MockEventSet {
+	return &MockEventSet{
+		events:     make([]nvml.EventData, 0),
+		WaitReturn: nvml.ERROR_TIMEOUT,
+		FreeReturn: nvml.SUCCESS,
+	}
+}
+
+// AddEvent adds an event to be returned by Wait.
+func (e *MockEventSet) AddEvent(event nvml.EventData) {
+	e.mu.Lock()
+	defer e.mu.Unlock()
+	e.events = append(e.events, event)
+}
+
+// Wait implements EventSet.
+func (e *MockEventSet) Wait(_ uint32) (nvml.EventData, nvml.Return) {
+	e.mu.Lock()
+	defer e.mu.Unlock()
+
+	if e.eventIdx < len(e.events) {
+		event := e.events[e.eventIdx]
+		e.eventIdx++
+
+		return event, nvml.SUCCESS
+	}
+
+	return nvml.EventData{}, e.WaitReturn
+}
+
+// Free implements EventSet.
+func (e *MockEventSet) Free() nvml.Return {
+	e.mu.Lock()
+	defer e.mu.Unlock()
+	e.Freed = true
+
+	return e.FreeReturn
+}
+
+// Raw implements EventSet - returns nil for mocks since we don't need real event set.
+func (e *MockEventSet) Raw() nvml.EventSet {
+	return nil
+}
+
+// Compile-time interface checks.
+var (
+	_ Library  = (*MockLibrary)(nil)
+	_ Device   = (*MockDevice)(nil)
+	_ EventSet = (*MockEventSet)(nil)
+)
+
diff --git a/pkg/providers/nvml/provider.go b/pkg/providers/nvml/provider.go
new file mode 100644
index 000000000..77c26bcd7
--- /dev/null
+++ b/pkg/providers/nvml/provider.go
@@ -0,0 +1,275 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build nvml
+
+// Package nvml provides a built-in NVML-based health provider for the Device API Server.
+//
+// This provider uses NVML (NVIDIA Management Library) to:
+//   - Enumerate GPUs on the node at startup
+//   - Monitor GPU health via XID error events
+//   - Provide baseline device information when no external providers are connected
+//
+// The provider requires the NVIDIA driver to be installed and NVML libraries to be
+// accessible. When running in Kubernetes, this is typically achieved by using the
+// "nvidia" RuntimeClass which injects the driver libraries via the NVIDIA Container
+// Toolkit, without consuming GPU resources.
+package nvml
+
+import (
+	"context"
+	"fmt"
+	"sync"
+
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+	"k8s.io/klog/v2"
+
+	gpuclient "github.com/nvidia/nvsentinel/pkg/client-go/client/versioned/typed/device/v1alpha1"
+)
+
+// Provider is the built-in NVML-based health provider.
+//
+// It uses NVML to enumerate GPUs and monitor their health status.
+// The provider is optional and gracefully degrades if NVML is unavailable.
+//
+// The provider communicates with the Device API Server via the gRPC client
+// interface, making it a "dogfooding" client of its own API. This design:
+//   - Decouples the provider from server internals
+//   - Enables running the provider as a separate sidecar process
+//   - Validates the API from a provider's perspective
+type Provider struct {
+	// Configuration
+	config Config
+
+	// NVML library interface (uses our wrapper for testability)
+	nvmllib Library
+
+	// Typed client to communicate with Device API Server
+	client gpuclient.GPUInterface
+
+	// Logger
+	logger klog.Logger
+
+	// Health monitoring
+	eventSet       EventSet
+	healthMonitor  *HealthMonitor
+	monitorRunning bool
+
+	// Lifecycle management
+	mu     sync.RWMutex
+	ctx    context.Context
+	cancel context.CancelFunc
+	wg     sync.WaitGroup
+
+	// State
+	initialized bool
+	gpuCount    int
+
+	// Tracked GPU UUIDs for health monitoring
+	gpuUUIDs []string
+
+	// Pre-computed map of additional ignored XIDs for O(1) lookup
+	additionalIgnoredXids map[uint64]bool
+}
+
+// Config holds configuration for the NVML provider.
+type Config struct {
+	// DriverRoot is the root path where NVIDIA driver libraries are located.
+	// Common values:
+	//   - "/run/nvidia/driver" (container with CDI/RuntimeClass)
+	//   - "/" (bare metal or host path mount)
+	DriverRoot string
+
+	// AdditionalIgnoredXids is a list of additional XID error codes to ignore.
+	// These are added to the default list of ignored XIDs (application errors).
+	AdditionalIgnoredXids []uint64
+
+	// HealthCheckEnabled enables XID event monitoring for health checks.
+	// When disabled, only device enumeration is performed.
+	HealthCheckEnabled bool
+}
+
+// DefaultConfig returns a Config with sensible defaults.
+func DefaultConfig() Config {
+	return Config{
+		DriverRoot:            "/run/nvidia/driver",
+		AdditionalIgnoredXids: nil,
+		HealthCheckEnabled:    true,
+	}
+}
+
+// New creates a new NVML provider.
+//
+// The provider is not started until Start() is called. If NVML cannot be
+// initialized (e.g., no driver installed), Start() will return an error
+// but the server can continue without NVML support.
+//
+// The client parameter is a GPUInterface used to communicate with the
+// Device API Server. This enables the provider to be either:
+//   - Co-located with the server (using a loopback connection)
+//   - Running as a separate sidecar process (using a network connection)
+func New(cfg Config, client gpuclient.GPUInterface, logger klog.Logger) *Provider {
+	logger = logger.WithName("nvml-provider")
+
+	// Find NVML library path
+	libraryPath := FindDriverLibrary(cfg.DriverRoot)
+	logger.V(2).Info("Using NVML library path", "path", libraryPath)
+
+	// Create NVML interface with explicit library path
+	var rawLib nvml.Interface
+	if libraryPath != "" {
+		rawLib = nvml.New(nvml.WithLibraryPath(libraryPath))
+	} else {
+		// Fall back to system default
+		rawLib = nvml.New()
+	}
+
+	return &Provider{
+		config:  cfg,
+		nvmllib: NewLibraryWrapper(rawLib),
+		client:  client,
+		logger:  logger,
+	}
+}
+
+// Start initializes NVML and enumerates GPUs.
+//
+// If health checking is enabled, it also starts the XID event monitoring
+// goroutine. Returns an error if NVML cannot be initialized.
+func (p *Provider) Start(ctx context.Context) error {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	if p.initialized {
+		return fmt.Errorf("provider already started")
+	}
+
+	p.logger.Info("Starting NVML provider")
+
+	// Initialize NVML
+	ret := p.nvmllib.Init()
+	if ret != nvml.SUCCESS {
+		return fmt.Errorf("failed to initialize NVML: %v", nvml.ErrorString(ret))
+	}
+
+	// Get driver version for logging
+	driverVersion, ret := p.nvmllib.SystemGetDriverVersion()
+	if ret == nvml.SUCCESS {
+		p.logger.Info("NVML initialized", "driverVersion", driverVersion)
+	}
+
+	// Build map of additional ignored XIDs for O(1) lookup
+	if len(p.config.AdditionalIgnoredXids) > 0 {
+		p.additionalIgnoredXids = make(map[uint64]bool, len(p.config.AdditionalIgnoredXids))
+		for _, xid := range p.config.AdditionalIgnoredXids {
+			p.additionalIgnoredXids[xid] = true
+		}
+	}
+
+	// Set up context for lifecycle management (must be before enumerateDevices,
+	// which uses p.ctx for gRPC calls)
+	p.ctx, p.cancel = context.WithCancel(ctx)
+
+	// Enumerate devices
+	count, err := p.enumerateDevices()
+	if err != nil {
+		p.cancel()
+		p.ctx = nil
+		p.cancel = nil
+		_ = p.nvmllib.Shutdown()
+
+		return fmt.Errorf("failed to enumerate devices: %w", err)
+	}
+
+	p.gpuCount = count
+
+	p.logger.Info("Enumerated GPUs", "count", count)
+
+	p.initialized = true
+
+	// Start health monitoring if enabled and we have GPUs
+	if p.config.HealthCheckEnabled && count > 0 {
+		if err := p.startHealthMonitoring(); err != nil {
+			p.logger.Error(err, "Failed to start health monitoring, continuing without it")
+			// Don't fail - health monitoring is optional
+		}
+	}
+
+	return nil
+}
+
+// Stop shuts down the NVML provider.
+//
+// It stops health monitoring (if running) and shuts down NVML.
+// This method is safe to call multiple times.
+func (p *Provider) Stop() {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	if !p.initialized {
+		return
+	}
+
+	p.logger.Info("Stopping NVML provider")
+
+	// Cancel context to stop health monitoring
+	if p.cancel != nil {
+		p.cancel()
+	}
+
+	// Wait for health monitor to stop
+	p.wg.Wait()
+
+	// Clean up event set
+	if p.eventSet != nil {
+		if ret := p.eventSet.Free(); ret != nvml.SUCCESS {
+			p.logger.V(1).Info("Failed to free event set", "error", nvml.ErrorString(ret))
+		}
+
+		p.eventSet = nil
+	}
+
+	// Shutdown NVML
+	if ret := p.nvmllib.Shutdown(); ret != nvml.SUCCESS {
+		p.logger.V(1).Info("Failed to shutdown NVML", "error", nvml.ErrorString(ret))
+	}
+
+	p.initialized = false
+	p.monitorRunning = false
+	p.logger.Info("NVML provider stopped")
+}
+
+// IsInitialized returns true if the provider has been successfully started.
+func (p *Provider) IsInitialized() bool {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	return p.initialized
+}
+
+// GPUCount returns the number of GPUs discovered.
+func (p *Provider) GPUCount() int {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	return p.gpuCount
+}
+
+// IsHealthMonitorRunning returns true if health monitoring is active.
+func (p *Provider) IsHealthMonitorRunning() bool {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	return p.monitorRunning
+}
diff --git a/pkg/providers/nvml/provider_test.go b/pkg/providers/nvml/provider_test.go
new file mode 100644
index 000000000..244fc106b
--- /dev/null
+++ b/pkg/providers/nvml/provider_test.go
@@ -0,0 +1,606 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build nvml
+
+package nvml
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/NVIDIA/go-nvml/pkg/nvml"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/watch"
+	"k8s.io/klog/v2"
+
+	devicev1alpha1 "github.com/nvidia/nvsentinel/api/device/v1alpha1"
+	gpuclient "github.com/nvidia/nvsentinel/pkg/client-go/client/versioned/typed/device/v1alpha1"
+	"github.com/nvidia/nvsentinel/pkg/testutil"
+)
+
+// testLogger returns a test logger.
+func testLogger() klog.Logger {
+	return klog.NewKlogr().WithName("test")
+}
+
+// TestProvider_Start_Success tests successful provider initialization.
+func TestProvider_Start_Success(t *testing.T) {
+	mockLib := NewMockLibrary()
+	mockLib.AddDevice(0, NewMockDevice("GPU-uuid-0", "NVIDIA A100"))
+	mockLib.AddDevice(1, NewMockDevice("GPU-uuid-1", "NVIDIA A100"))
+
+	client := testutil.NewTestGPUTypedClient(t)
+
+	provider := &Provider{
+		config:  DefaultConfig(),
+		nvmllib: mockLib,
+		client:  client,
+		logger:  testLogger(),
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	err := provider.Start(ctx)
+	if err != nil {
+		t.Fatalf("Start() failed: %v", err)
+	}
+	defer provider.Stop()
+
+	// Verify NVML was initialized
+	if !mockLib.InitCalled {
+		t.Error("Init() was not called")
+	}
+
+	// Verify GPUs were registered
+	gpuList, err := client.List(context.Background(), metav1.ListOptions{})
+	if err != nil {
+		t.Fatalf("List failed: %v", err)
+	}
+	if len(gpuList.Items) != 2 {
+		t.Errorf("Expected 2 GPUs, got %d", len(gpuList.Items))
+	}
+
+	// Verify provider state
+	if !provider.IsInitialized() {
+		t.Error("Provider should be initialized")
+	}
+
+	if provider.GPUCount() != 2 {
+		t.Errorf("Expected GPUCount() = 2, got %d", provider.GPUCount())
+	}
+}
+
+// TestProvider_Start_NVMLInitFails tests graceful handling of NVML init failure.
+func TestProvider_Start_NVMLInitFails(t *testing.T) {
+	mockLib := NewMockLibrary()
+	mockLib.InitReturn = nvml.ERROR_LIBRARY_NOT_FOUND
+
+	client := testutil.NewTestGPUTypedClient(t)
+
+	provider := &Provider{
+		config:  DefaultConfig(),
+		nvmllib: mockLib,
+		client:  client,
+		logger:  testLogger(),
+	}
+
+	ctx := context.Background()
+	err := provider.Start(ctx)
+
+	if err == nil {
+		t.Fatal("Expected Start() to fail when NVML init fails")
+	}
+
+	if provider.IsInitialized() {
+		t.Error("Provider should not be initialized after failure")
+	}
+}
+
+// TestProvider_Start_NoGPUs tests handling of nodes without GPUs.
+func TestProvider_Start_NoGPUs(t *testing.T) {
+	mockLib := NewMockLibrary()
+	mockLib.DeviceCount = 0
+
+	client := testutil.NewTestGPUTypedClient(t)
+
+	provider := &Provider{
+		config:  DefaultConfig(),
+		nvmllib: mockLib,
+		client:  client,
+		logger:  testLogger(),
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	err := provider.Start(ctx)
+	if err != nil {
+		t.Fatalf("Start() failed: %v", err)
+	}
+	defer provider.Stop()
+
+	if provider.GPUCount() != 0 {
+		t.Errorf("Expected 0 GPUs, got %d", provider.GPUCount())
+	}
+
+	// Health monitor should not be running with 0 GPUs
+	if provider.IsHealthMonitorRunning() {
+		t.Error("Health monitor should not run with 0 GPUs")
+	}
+}
+
+// TestProvider_Start_AlreadyStarted tests double-start prevention.
+func TestProvider_Start_AlreadyStarted(t *testing.T) {
+	mockLib := NewMockLibrary()
+	client := testutil.NewTestGPUTypedClient(t)
+
+	provider := &Provider{
+		config:  DefaultConfig(),
+		nvmllib: mockLib,
+		client:  client,
+		logger:  testLogger(),
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	// First start
+	err := provider.Start(ctx)
+	if err != nil {
+		t.Fatalf("First Start() failed: %v", err)
+	}
+	defer provider.Stop()
+
+	// Second start should fail
+	err = provider.Start(ctx)
+	if err == nil {
+		t.Error("Second Start() should fail")
+	}
+}
+
+// TestProvider_Stop tests provider shutdown.
+func TestProvider_Stop(t *testing.T) {
+	mockLib := NewMockLibrary()
+	mockLib.AddDevice(0, NewMockDevice("GPU-uuid-0", "NVIDIA A100"))
+
+	client := testutil.NewTestGPUTypedClient(t)
+
+	provider := &Provider{
+		config:  DefaultConfig(),
+		nvmllib: mockLib,
+		client:  client,
+		logger:  testLogger(),
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	err := provider.Start(ctx)
+	if err != nil {
+		t.Fatalf("Start() failed: %v", err)
+	}
+
+	// Stop the provider
+	provider.Stop()
+
+	// Verify state
+	if provider.IsInitialized() {
+		t.Error("Provider should not be initialized after Stop()")
+	}
+
+	if !mockLib.ShutdownCalled {
+		t.Error("NVML Shutdown() was not called")
+	}
+
+	// Double stop should be safe
+	provider.Stop()
+}
+
+// TestProvider_Stop_NotStarted tests Stop() on unstarted provider.
+func TestProvider_Stop_NotStarted(t *testing.T) {
+	mockLib := NewMockLibrary()
+	client := testutil.NewTestGPUTypedClient(t)
+
+	provider := &Provider{
+		config:  DefaultConfig(),
+		nvmllib: mockLib,
+		client:  client,
+		logger:  testLogger(),
+	}
+
+	// Stop should be safe even if not started
+	provider.Stop()
+
+	if mockLib.ShutdownCalled {
+		t.Error("Shutdown() should not be called if provider was never started")
+	}
+}
+
+// TestProvider_DeviceEnumeration tests that devices are properly enumerated.
+func TestProvider_DeviceEnumeration(t *testing.T) {
+	mockLib := NewMockLibrary()
+
+	// Add devices with varying configurations
+	device0 := NewMockDevice("GPU-11111111-1111-1111-1111-111111111111", "NVIDIA H100")
+	device0.MemoryInfo = nvml.Memory{Total: 80 * 1024 * 1024 * 1024} // 80 GB
+
+	device1 := NewMockDevice("GPU-22222222-2222-2222-2222-222222222222", "NVIDIA A100")
+	device1.MemoryInfo = nvml.Memory{Total: 40 * 1024 * 1024 * 1024} // 40 GB
+
+	mockLib.AddDevice(0, device0)
+	mockLib.AddDevice(1, device1)
+
+	client := testutil.NewTestGPUTypedClient(t)
+
+	provider := &Provider{
+		config:  DefaultConfig(),
+		nvmllib: mockLib,
+		client:  client,
+		logger:  testLogger(),
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	err := provider.Start(ctx)
+	if err != nil {
+		t.Fatalf("Start() failed: %v", err)
+	}
+	defer provider.Stop()
+
+	// Verify both devices are registered
+	gpuList, err := client.List(context.Background(), metav1.ListOptions{})
+	if err != nil {
+		t.Fatalf("List failed: %v", err)
+	}
+	gpus := gpuList.Items
+	if len(gpus) != 2 {
+		t.Fatalf("Expected 2 GPUs, got %d", len(gpus))
+	}
+
+	// Verify GPU details
+	uuids := make(map[string]bool)
+	for _, gpu := range gpus {
+		uuids[gpu.Name] = true
+
+		// Check initial condition
+		if len(gpu.Status.Conditions) == 0 {
+			t.Errorf("GPU %s has no conditions", gpu.Name)
+			continue
+		}
+
+		cond := gpu.Status.Conditions[0]
+		if cond.Type != ConditionTypeNVMLReady {
+			t.Errorf("Expected condition type %s, got %s", ConditionTypeNVMLReady, cond.Type)
+		}
+
+		if cond.Status != metav1.ConditionStatus(ConditionStatusTrue) {
+			t.Errorf("Expected condition status True, got %s", cond.Status)
+		}
+	}
+
+	if !uuids["GPU-11111111-1111-1111-1111-111111111111"] {
+		t.Error("GPU-11111111... not found in cache")
+	}
+
+	if !uuids["GPU-22222222-2222-2222-2222-222222222222"] {
+		t.Error("GPU-22222222... not found in cache")
+	}
+}
+
+// TestProvider_DeviceEnumeration_PartialFailure tests handling of partial device failures.
+func TestProvider_DeviceEnumeration_PartialFailure(t *testing.T) {
+	mockLib := NewMockLibrary()
+
+	// First device is fine
+	mockLib.AddDevice(0, NewMockDevice("GPU-good", "NVIDIA A100"))
+
+	// Second device fails UUID retrieval
+	device1 := NewMockDevice("GPU-bad", "NVIDIA A100")
+	device1.UUIDReturn = nvml.ERROR_UNKNOWN
+	mockLib.AddDevice(1, device1)
+
+	// Third device is fine
+	mockLib.AddDevice(2, NewMockDevice("GPU-good-2", "NVIDIA A100"))
+
+	client := testutil.NewTestGPUTypedClient(t)
+
+	provider := &Provider{
+		config:  DefaultConfig(),
+		nvmllib: mockLib,
+		client:  client,
+		logger:  testLogger(),
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	err := provider.Start(ctx)
+	if err != nil {
+		t.Fatalf("Start() failed: %v", err)
+	}
+	defer provider.Stop()
+
+	// Only 2 GPUs should be registered (one failed)
+	gpuList, err := client.List(context.Background(), metav1.ListOptions{})
+	if err != nil {
+		t.Fatalf("List failed: %v", err)
+	}
+	if len(gpuList.Items) != 2 {
+		t.Errorf("Expected 2 GPUs (1 failed), got %d", len(gpuList.Items))
+	}
+}
+
+// TestProvider_HealthCheckDisabled tests that health monitoring can be disabled.
+func TestProvider_HealthCheckDisabled(t *testing.T) {
+	mockLib := NewMockLibrary()
+	mockLib.AddDevice(0, NewMockDevice("GPU-uuid-0", "NVIDIA A100"))
+
+	client := testutil.NewTestGPUTypedClient(t)
+
+	config := DefaultConfig()
+	config.HealthCheckEnabled = false
+
+	provider := &Provider{
+		config:  config,
+		nvmllib: mockLib,
+		client:  client,
+		logger:  testLogger(),
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	err := provider.Start(ctx)
+	if err != nil {
+		t.Fatalf("Start() failed: %v", err)
+	}
+	defer provider.Stop()
+
+	// Give a moment for any goroutines to start
+	time.Sleep(10 * time.Millisecond)
+
+	if provider.IsHealthMonitorRunning() {
+		t.Error("Health monitor should not be running when disabled")
+	}
+}
+
+// TestProvider_UpdateCondition tests condition updates.
+func TestProvider_UpdateCondition(t *testing.T) {
+	mockLib := NewMockLibrary()
+	mockLib.AddDevice(0, NewMockDevice("GPU-uuid-0", "NVIDIA A100"))
+
+	client := testutil.NewTestGPUTypedClient(t)
+
+	config := DefaultConfig()
+	config.HealthCheckEnabled = false
+
+	provider := &Provider{
+		config:  config,
+		nvmllib: mockLib,
+		client:  client,
+		logger:  testLogger(),
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	err := provider.Start(ctx)
+	if err != nil {
+		t.Fatalf("Start() failed: %v", err)
+	}
+	defer provider.Stop()
+
+	// Update condition to unhealthy
+	err = provider.UpdateCondition("GPU-uuid-0", ConditionTypeNVMLReady, ConditionStatusFalse, "XidError", "Critical XID 48")
+	if err != nil {
+		t.Fatalf("UpdateCondition() failed: %v", err)
+	}
+
+	// Verify condition was updated
+	gpu, err := client.Get(context.Background(), "GPU-uuid-0", metav1.GetOptions{})
+	if err != nil {
+		t.Fatalf("Get failed: %v", err)
+	}
+
+	var foundCondition bool
+
+	for _, cond := range gpu.Status.Conditions {
+		if cond.Type == ConditionTypeNVMLReady {
+			foundCondition = true
+
+			if string(cond.Status) != ConditionStatusFalse {
+				t.Errorf("Expected status False, got %s", cond.Status)
+			}
+
+			if cond.Reason != "XidError" {
+				t.Errorf("Expected reason XidError, got %s", cond.Reason)
+			}
+		}
+	}
+
+	if !foundCondition {
+		t.Error("NVMLReady condition not found")
+	}
+}
+
+// TestProvider_UpdateCondition_GPUNotFound tests condition update for non-existent GPU.
+func TestProvider_UpdateCondition_GPUNotFound(t *testing.T) {
+	mockLib := NewMockLibrary()
+	client := testutil.NewTestGPUTypedClient(t)
+
+	config := DefaultConfig()
+	config.HealthCheckEnabled = false
+
+	provider := &Provider{
+		config:  config,
+		nvmllib: mockLib,
+		client:  client,
+		logger:  testLogger(),
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	err := provider.Start(ctx)
+	if err != nil {
+		t.Fatalf("Start() failed: %v", err)
+	}
+	defer provider.Stop()
+
+	// Try to update condition for non-existent GPU
+	err = provider.UpdateCondition("GPU-nonexistent", ConditionTypeNVMLReady, ConditionStatusFalse, "XidError", "Test")
+	if err == nil {
+		t.Error("Expected error for non-existent GPU")
+	}
+}
+
+// TestProvider_MarkHealthy tests marking a GPU as healthy.
+func TestProvider_MarkHealthy(t *testing.T) {
+	mockLib := NewMockLibrary()
+	mockLib.AddDevice(0, NewMockDevice("GPU-uuid-0", "NVIDIA A100"))
+
+	client := testutil.NewTestGPUTypedClient(t)
+
+	config := DefaultConfig()
+	config.HealthCheckEnabled = false
+
+	provider := &Provider{
+		config:  config,
+		nvmllib: mockLib,
+		client:  client,
+		logger:  testLogger(),
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	err := provider.Start(ctx)
+	if err != nil {
+		t.Fatalf("Start() failed: %v", err)
+	}
+	defer provider.Stop()
+
+	// First mark as unhealthy
+	err = provider.UpdateCondition("GPU-uuid-0", ConditionTypeNVMLReady, ConditionStatusFalse, "XidError", "Test")
+	if err != nil {
+		t.Fatalf("UpdateCondition() failed: %v", err)
+	}
+
+	// Then mark as healthy
+	err = provider.MarkHealthy("GPU-uuid-0")
+	if err != nil {
+		t.Fatalf("MarkHealthy() failed: %v", err)
+	}
+
+	// Verify it's healthy
+	gpu, err := client.Get(context.Background(), "GPU-uuid-0", metav1.GetOptions{})
+	if err != nil {
+		t.Fatalf("Get failed: %v", err)
+	}
+
+	for _, cond := range gpu.Status.Conditions {
+		if cond.Type == ConditionTypeNVMLReady {
+			if string(cond.Status) != ConditionStatusTrue {
+				t.Errorf("Expected status True after MarkHealthy, got %s", cond.Status)
+			}
+
+			return
+		}
+	}
+
+	t.Error("NVMLReady condition not found")
+}
+
+// contextCapturingClient wraps a GPUInterface and captures the context
+// passed to Create. This allows tests to verify that enumerateDevices
+// receives a non-nil context.
+type contextCapturingClient struct {
+	inner       gpuclient.GPUInterface
+	capturedCtx context.Context
+}
+
+func newContextCapturingClient(inner gpuclient.GPUInterface) *contextCapturingClient {
+	return &contextCapturingClient{inner: inner}
+}
+
+func (c *contextCapturingClient) Create(ctx context.Context, gpu *devicev1alpha1.GPU, opts metav1.CreateOptions) (*devicev1alpha1.GPU, error) {
+	c.capturedCtx = ctx
+	return c.inner.Create(ctx, gpu, opts)
+}
+
+func (c *contextCapturingClient) Get(ctx context.Context, name string, opts metav1.GetOptions) (*devicev1alpha1.GPU, error) {
+	return c.inner.Get(ctx, name, opts)
+}
+
+func (c *contextCapturingClient) Update(ctx context.Context, gpu *devicev1alpha1.GPU, opts metav1.UpdateOptions) (*devicev1alpha1.GPU, error) {
+	return c.inner.Update(ctx, gpu, opts)
+}
+
+func (c *contextCapturingClient) UpdateStatus(ctx context.Context, gpu *devicev1alpha1.GPU, opts metav1.UpdateOptions) (*devicev1alpha1.GPU, error) {
+	return c.inner.UpdateStatus(ctx, gpu, opts)
+}
+
+func (c *contextCapturingClient) List(ctx context.Context, opts metav1.ListOptions) (*devicev1alpha1.GPUList, error) {
+	return c.inner.List(ctx, opts)
+}
+
+func (c *contextCapturingClient) Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error {
+	return c.inner.Delete(ctx, name, opts)
+}
+
+func (c *contextCapturingClient) Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) {
+	return c.inner.Watch(ctx, opts)
+}
+
+// TestProvider_Start_ContextSetBeforeEnumerate verifies that enumerateDevices
+// receives a non-nil context. Before the fix, p.ctx was nil when
+// enumerateDevices was called, which would cause a gRPC panic on any real
+// gRPC client.
+func TestProvider_Start_ContextSetBeforeEnumerate(t *testing.T) {
+	mockLib := NewMockLibrary()
+	mockLib.AddDevice(0, NewMockDevice("GPU-ctx-test", "NVIDIA A100"))
+
+	typedClient := testutil.NewTestGPUTypedClient(t)
+	capturingClient := newContextCapturingClient(typedClient)
+
+	provider := &Provider{
+		config:  Config{HealthCheckEnabled: false},
+		nvmllib: mockLib,
+		client:  capturingClient,
+		logger:  testLogger(),
+	}
+
+	ctx := context.Background()
+	err := provider.Start(ctx)
+	if err != nil {
+		t.Fatalf("Start() failed: %v", err)
+	}
+	defer provider.Stop()
+
+	// The capturing client recorded the context passed to Create during
+	// enumerateDevices. If the fix is missing, this will be nil because p.ctx
+	// was not set before enumerateDevices was called.
+	if capturingClient.capturedCtx == nil {
+		t.Fatal("Create was called with nil context; p.ctx must be set before enumerateDevices()")
+	}
+
+	// Also verify p.ctx is set after Start returns.
+	if provider.ctx == nil {
+		t.Fatal("p.ctx should be set after Start()")
+	}
+}
diff --git a/pkg/providers/nvml/shared.go b/pkg/providers/nvml/shared.go
new file mode 100644
index 000000000..d33c58619
--- /dev/null
+++ b/pkg/providers/nvml/shared.go
@@ -0,0 +1,85 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package nvml
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+)
+
+// Condition constants for NVML provider.
+const (
+	// ConditionTypeNVMLReady is the condition type for NVML health status.
+	ConditionTypeNVMLReady = "NVMLReady"
+
+	// ConditionSourceNVML is the source identifier for conditions set by NVML provider.
+	ConditionSourceNVML = "nvml-provider"
+
+	// ConditionStatusTrue indicates the condition is met.
+	ConditionStatusTrue = "True"
+
+	// ConditionStatusFalse indicates the condition is not met.
+	ConditionStatusFalse = "False"
+
+	// ConditionStatusUnknown indicates the condition status is unknown.
+	ConditionStatusUnknown = "Unknown"
+)
+
+// FormatBytes formats bytes to a human-readable string.
+func FormatBytes(bytes uint64) string {
+	const (
+		KB = 1024
+		MB = KB * 1024
+		GB = MB * 1024
+	)
+
+	switch {
+	case bytes >= GB:
+		return fmt.Sprintf("%.1f GB", float64(bytes)/float64(GB))
+	case bytes >= MB:
+		return fmt.Sprintf("%.1f MB", float64(bytes)/float64(MB))
+	case bytes >= KB:
+		return fmt.Sprintf("%.1f KB", float64(bytes)/float64(KB))
+	default:
+		return fmt.Sprintf("%d B", bytes)
+	}
+}
+
+// FindDriverLibrary locates the NVML library in the driver root.
+//
+// It searches common paths where libnvidia-ml.so.1 might be located.
+// Returns empty string if not found (will use system default).
+func FindDriverLibrary(driverRoot string) string {
+	if driverRoot == "" {
+		return ""
+	}
+
+	searchPaths := []string{
+		filepath.Join(driverRoot, "usr/lib64/libnvidia-ml.so.1"),
+		filepath.Join(driverRoot, "usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1"),
+		filepath.Join(driverRoot, "usr/lib/libnvidia-ml.so.1"),
+		filepath.Join(driverRoot, "lib64/libnvidia-ml.so.1"),
+		filepath.Join(driverRoot, "lib/libnvidia-ml.so.1"),
+	}
+
+	for _, path := range searchPaths {
+		if _, err := os.Stat(path); err == nil {
+			return path
+		}
+	}
+
+	return ""
+}
diff --git a/pkg/providers/nvml/stub.go b/pkg/providers/nvml/stub.go
new file mode 100644
index 000000000..c2b7baf7e
--- /dev/null
+++ b/pkg/providers/nvml/stub.go
@@ -0,0 +1,80 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build !nvml
+
+// Package nvml provides a built-in NVML-based health provider for the Device API Server.
+//
+// This stub file is used when NVML support is not compiled in (build without -tags=nvml).
+package nvml
+
+import (
+	"context"
+	"errors"
+
+	"k8s.io/klog/v2"
+
+	gpuclient "github.com/nvidia/nvsentinel/pkg/client-go/client/versioned/typed/device/v1alpha1"
+)
+
+// ErrNVMLNotCompiled is returned when NVML support is not compiled into the binary.
+var ErrNVMLNotCompiled = errors.New("NVML support not compiled in (build with -tags=nvml)")
+
+// Provider is the built-in NVML-based health provider (stub when not compiled).
+type Provider struct{}
+
+// Config holds configuration for the NVML provider.
+type Config struct {
+	DriverRoot            string
+	AdditionalIgnoredXids []uint64
+	HealthCheckEnabled    bool
+}
+
+// DefaultConfig returns a Config with sensible defaults.
+func DefaultConfig() Config {
+	return Config{
+		DriverRoot:            "/run/nvidia/driver",
+		AdditionalIgnoredXids: nil,
+		HealthCheckEnabled:    true,
+	}
+}
+
+// New creates a new NVML provider (stub).
+func New(cfg Config, client gpuclient.GPUInterface, logger klog.Logger) *Provider {
+	return &Provider{}
+}
+
+// Start initializes NVML (stub - always returns error).
+func (p *Provider) Start(ctx context.Context) error {
+	return ErrNVMLNotCompiled
+}
+
+// Stop shuts down the NVML provider (stub - no-op).
+func (p *Provider) Stop() {}
+
+// IsInitialized returns false (stub).
+func (p *Provider) IsInitialized() bool {
+	return false
+}
+
+// GPUCount returns 0 (stub).
+func (p *Provider) GPUCount() int {
+	return 0
+}
+
+// IsHealthMonitorRunning returns false (stub).
+func (p *Provider) IsHealthMonitorRunning() bool {
+	return false
+}
+
diff --git a/pkg/providers/nvml/xid.go b/pkg/providers/nvml/xid.go
new file mode 100644
index 000000000..718bb3814
--- /dev/null
+++ b/pkg/providers/nvml/xid.go
@@ -0,0 +1,213 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package nvml
+
+import (
+	"strconv"
+	"strings"
+)
+
+// XID errors documentation:
+// https://docs.nvidia.com/deploy/xid-errors/index.html
+
+// defaultIgnoredXids contains XID error codes that are typically caused by
+// application errors rather than hardware failures. These are ignored by
+// default to avoid false positives in health monitoring.
+//
+// Reference: https://docs.nvidia.com/deploy/xid-errors/index.html#topic_4
+var defaultIgnoredXids = map[uint64]bool{
+	// Application errors - GPU should still be healthy
+	13:  true, // Graphics Engine Exception
+	31:  true, // GPU memory page fault
+	43:  true, // GPU stopped processing
+	45:  true, // Preemptive cleanup, due to previous errors
+	68:  true, // Video processor exception
+	109: true, // Context Switch Timeout Error
+}
+
+// criticalXids contains XID error codes that indicate critical hardware
+// failures requiring immediate attention.
+var criticalXids = map[uint64]bool{
+	// Memory errors
+	48: true, // Double Bit ECC Error
+	63: true, // Row remapping failure
+	64: true, // Uncontained ECC error
+	74: true, // NVLink error
+	79: true, // GPU has fallen off the bus
+
+	// Fatal errors
+	94:  true, // Contained ECC error (severe)
+	95:  true, // Uncontained ECC error
+	119: true, // GSP (GPU System Processor) error
+	120: true, // GSP firmware error
+}
+
+// XidDescriptions provides human-readable descriptions for common XIDs.
+var XidDescriptions = map[uint64]string{
+	// Application errors (typically ignored)
+	13:  "Graphics Engine Exception",
+	31:  "GPU memory page fault",
+	43:  "GPU stopped processing",
+	45:  "Preemptive cleanup",
+	68:  "Video processor exception",
+	109: "Context Switch Timeout",
+
+	// Memory errors
+	48: "Double Bit ECC Error",
+	63: "Row remapping failure",
+	64: "Uncontained ECC error",
+	74: "NVLink error",
+	79: "GPU has fallen off the bus",
+	94: "Contained ECC error",
+	95: "Uncontained ECC error",
+
+	// Other notable XIDs
+	8:   "GPU not accessible",
+	32:  "Invalid or corrupted push buffer stream",
+	38:  "Driver firmware error",
+	56:  "Display engine error",
+	57:  "Error programming video memory interface",
+	62:  "Internal micro-controller halt (non-fatal)",
+	69:  "Graphics engine accessor error",
+	119: "GSP error",
+	120: "GSP firmware error",
+}
+
+// IsDefaultIgnored returns true if the XID is in the default ignored set.
+func IsDefaultIgnored(xid uint64) bool {
+	return defaultIgnoredXids[xid]
+}
+
+// IsCritical returns true if the XID is in the critical set.
+func IsCritical(xid uint64) bool {
+	return criticalXids[xid]
+}
+
+// DefaultIgnoredXidsList returns a copy of the default ignored XID set.
+func DefaultIgnoredXidsList() map[uint64]bool {
+	out := make(map[uint64]bool, len(defaultIgnoredXids))
+	for k, v := range defaultIgnoredXids {
+		out[k] = v
+	}
+	return out
+}
+
+// isIgnoredXid returns true if the XID should be ignored for health purposes.
+//
+// An XID is ignored if it's in the default ignored list OR in the additional
+// ignored map provided by the user. The map is built once at provider startup
+// from the config slice for O(1) lookup.
+func isIgnoredXid(xid uint64, additionalIgnored map[uint64]bool) bool {
+	if defaultIgnoredXids[xid] {
+		return true
+	}
+
+	return additionalIgnored[xid]
+}
+
+// IsCriticalXid returns true if the XID indicates a critical hardware failure.
+func IsCriticalXid(xid uint64) bool {
+	return criticalXids[xid]
+}
+
+// xidToString returns a human-readable description for an XID.
+func xidToString(xid uint64) string {
+	if desc, ok := XidDescriptions[xid]; ok {
+		return desc
+	}
+
+	return "Unknown XID"
+}
+
+// ParseIgnoredXids parses a comma-or-space-separated string of XID values.
+// Non-numeric tokens are silently skipped.
+func ParseIgnoredXids(input string) []uint64 {
+	if input == "" {
+		return nil
+	}
+
+	var result []uint64
+
+	tokens := strings.FieldsFunc(input, func(r rune) bool {
+		return r == ',' || r == ' '
+	})
+
+	for _, tok := range tokens {
+		v, err := strconv.ParseUint(tok, 10, 64)
+		if err != nil {
+			continue
+		}
+
+		result = append(result, v)
+	}
+
+	if len(result) == 0 {
+		return nil
+	}
+
+	return result
+}
+
+// XidSeverity represents the severity level of an XID error.
+type XidSeverity int
+
+const (
+	// XidSeverityUnknown indicates the XID severity is unknown.
+	XidSeverityUnknown XidSeverity = iota
+	// XidSeverityIgnored indicates the XID is typically caused by applications.
+	XidSeverityIgnored
+	// XidSeverityWarning indicates the XID may indicate a problem.
+	XidSeverityWarning
+	// XidSeverityCritical indicates the XID indicates a critical hardware failure.
+	XidSeverityCritical
+)
+
+// Severity string constants.
+const (
+	severityUnknown  = "unknown"
+	severityIgnored  = "ignored"
+	severityWarning  = "warning"
+	severityCritical = "critical"
+)
+
+// GetXidSeverity returns the severity level for an XID.
+func GetXidSeverity(xid uint64) XidSeverity {
+	if defaultIgnoredXids[xid] {
+		return XidSeverityIgnored
+	}
+
+	if criticalXids[xid] {
+		return XidSeverityCritical
+	}
+
+	// XIDs not in either list are treated as warnings
+	return XidSeverityWarning
+}
+
+// String returns a string representation of XidSeverity.
+func (s XidSeverity) String() string {
+	switch s {
+	case XidSeverityUnknown:
+		return severityUnknown
+	case XidSeverityIgnored:
+		return severityIgnored
+	case XidSeverityWarning:
+		return severityWarning
+	case XidSeverityCritical:
+		return severityCritical
+	default:
+		return severityUnknown
+	}
+}
diff --git a/pkg/providers/nvml/xid_test.go b/pkg/providers/nvml/xid_test.go
new file mode 100644
index 000000000..f6d9eadaf
--- /dev/null
+++ b/pkg/providers/nvml/xid_test.go
@@ -0,0 +1,279 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package nvml
+
+import (
+	"math"
+	"testing"
+)
+
+func TestIsIgnoredXid_DefaultIgnored(t *testing.T) {
+	// Test default ignored XIDs
+	defaultIgnored := []uint64{13, 31, 43, 45, 68, 109}
+
+	for _, xid := range defaultIgnored {
+		if !isIgnoredXid(xid, nil) {
+			t.Errorf("XID %d should be ignored by default", xid)
+		}
+	}
+}
+
+func TestIsIgnoredXid_CriticalNotIgnored(t *testing.T) {
+	// Test critical XIDs are not ignored by default
+	criticalXids := []uint64{48, 63, 64, 74, 79, 94, 95, 119, 120}
+
+	for _, xid := range criticalXids {
+		if isIgnoredXid(xid, nil) {
+			t.Errorf("Critical XID %d should not be ignored by default", xid)
+		}
+	}
+}
+
+func TestIsIgnoredXid_AdditionalIgnored(t *testing.T) {
+	// Test additional ignored XIDs
+	additionalIgnored := map[uint64]bool{48: true, 63: true} // Make critical XIDs ignored
+
+	// Normally critical, but now ignored
+	if !isIgnoredXid(48, additionalIgnored) {
+		t.Error("XID 48 should be ignored when in additional list")
+	}
+
+	if !isIgnoredXid(63, additionalIgnored) {
+		t.Error("XID 63 should be ignored when in additional list")
+	}
+
+	// Still critical (not in additional list)
+	if isIgnoredXid(64, additionalIgnored) {
+		t.Error("XID 64 should not be ignored (not in additional list)")
+	}
+}
+
+func TestIsIgnoredXid_UnknownXid(t *testing.T) {
+	// Unknown XIDs should not be ignored
+	unknownXids := []uint64{1, 2, 3, 999, 12345}
+
+	for _, xid := range unknownXids {
+		if isIgnoredXid(xid, nil) {
+			t.Errorf("Unknown XID %d should not be ignored", xid)
+		}
+	}
+}
+
+func TestIsIgnoredXid_BoundaryValues(t *testing.T) {
+	// Boundary values should not be ignored
+	if isIgnoredXid(0, nil) {
+		t.Error("XID 0 should not be ignored")
+	}
+
+	if isIgnoredXid(math.MaxUint64, nil) {
+		t.Error("XID MaxUint64 should not be ignored")
+	}
+}
+
+func TestIsCriticalXid(t *testing.T) {
+	tests := []struct {
+		xid      uint64
+		expected bool
+	}{
+		// Critical XIDs
+		{48, true},
+		{63, true},
+		{64, true},
+		{74, true},
+		{79, true},
+		{94, true},
+		{95, true},
+		{119, true},
+		{120, true},
+
+		// Non-critical XIDs
+		{13, false},
+		{31, false},
+		{43, false},
+		{1, false},
+		{999, false},
+
+		// Boundary values
+		{0, false},
+		{math.MaxUint64, false},
+	}
+
+	for _, tt := range tests {
+		result := IsCriticalXid(tt.xid)
+		if result != tt.expected {
+			t.Errorf("IsCriticalXid(%d) = %v, want %v", tt.xid, result, tt.expected)
+		}
+	}
+}
+
+func TestXidToString(t *testing.T) {
+	tests := []struct {
+		xid      uint64
+		expected string
+	}{
+		{13, "Graphics Engine Exception"},
+		{31, "GPU memory page fault"},
+		{48, "Double Bit ECC Error"},
+		{79, "GPU has fallen off the bus"},
+		{109, "Context Switch Timeout"},
+		{999, "Unknown XID"},
+		{0, "Unknown XID"},
+	}
+
+	for _, tt := range tests {
+		result := xidToString(tt.xid)
+		if result != tt.expected {
+			t.Errorf("xidToString(%d) = %q, want %q", tt.xid, result, tt.expected)
+		}
+	}
+}
+
+func TestParseIgnoredXids(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected []uint64
+	}{
+		{
+			name:     "empty string",
+			input:    "",
+			expected: nil,
+		},
+		{
+			name:     "single value",
+			input:    "48",
+			expected: []uint64{48},
+		},
+		{
+			name:     "multiple comma separated",
+			input:    "48,63,64",
+			expected: []uint64{48, 63, 64},
+		},
+		{
+			name:     "with spaces",
+			input:    "48, 63, 64",
+			expected: []uint64{48, 63, 64},
+		},
+		{
+			name:     "space separated",
+			input:    "48 63 64",
+			expected: []uint64{48, 63, 64},
+		},
+		{
+			name:     "mixed separators",
+			input:    "48, 63 64,65",
+			expected: []uint64{48, 63, 64, 65},
+		},
+		{
+			name:     "trailing comma",
+			input:    "48,63,",
+			expected: []uint64{48, 63},
+		},
+		{
+			name:     "leading comma",
+			input:    ",48,63",
+			expected: []uint64{48, 63},
+		},
+		{
+			name:     "non-numeric characters mixed in",
+			input:    "4a8,63",
+			expected: []uint64{63},
+		},
+		{
+			name:     "completely non-numeric",
+			input:    "abc",
+			expected: nil,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := ParseIgnoredXids(tt.input)
+
+			if len(result) != len(tt.expected) {
+				t.Errorf("ParseIgnoredXids(%q) len = %d, want %d", tt.input, len(result), len(tt.expected))
+				return
+			}
+
+			for i, v := range result {
+				if v != tt.expected[i] {
+					t.Errorf("ParseIgnoredXids(%q)[%d] = %d, want %d", tt.input, i, v, tt.expected[i])
+				}
+			}
+		})
+	}
+}
+
+func TestGetXidSeverity(t *testing.T) {
+	tests := []struct {
+		xid      uint64
+		expected XidSeverity
+	}{
+		// Ignored (application errors)
+		{13, XidSeverityIgnored},
+		{31, XidSeverityIgnored},
+		{43, XidSeverityIgnored},
+		{45, XidSeverityIgnored},
+		{68, XidSeverityIgnored},
+		{109, XidSeverityIgnored},
+
+		// Critical (hardware failures)
+		{48, XidSeverityCritical},
+		{63, XidSeverityCritical},
+		{64, XidSeverityCritical},
+		{74, XidSeverityCritical},
+		{79, XidSeverityCritical},
+		{94, XidSeverityCritical},
+		{95, XidSeverityCritical},
+		{119, XidSeverityCritical},
+		{120, XidSeverityCritical},
+
+		// Warning (unknown XIDs)
+		{1, XidSeverityWarning},
+		{2, XidSeverityWarning},
+		{999, XidSeverityWarning},
+
+		// Boundary values
+		{0, XidSeverityWarning},
+		{math.MaxUint64, XidSeverityWarning},
+	}
+
+	for _, tt := range tests {
+		result := GetXidSeverity(tt.xid)
+		if result != tt.expected {
+			t.Errorf("GetXidSeverity(%d) = %v, want %v", tt.xid, result, tt.expected)
+		}
+	}
+}
+
+func TestXidSeverity_String(t *testing.T) {
+	tests := []struct {
+		severity XidSeverity
+		expected string
+	}{
+		{XidSeverityUnknown, "unknown"},
+		{XidSeverityIgnored, "ignored"},
+		{XidSeverityWarning, "warning"},
+		{XidSeverityCritical, "critical"},
+		{XidSeverity(99), "unknown"}, // Invalid severity
+	}
+
+	for _, tt := range tests {
+		result := tt.severity.String()
+		if result != tt.expected {
+			t.Errorf("XidSeverity(%d).String() = %q, want %q", tt.severity, result, tt.expected)
+		}
+	}
+}
diff --git a/pkg/services/device/v1alpha1/gpu_provider.go b/pkg/services/device/v1alpha1/gpu_provider.go
index 32dc779bd..7f11c98e7 100644
--- a/pkg/services/device/v1alpha1/gpu_provider.go
+++ b/pkg/services/device/v1alpha1/gpu_provider.go
@@ -1,81 +1,76 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
-//  Licensed under the Apache License, Version 2.0 (the "License");
-//  you may not use this file except in compliance with the License.
-//  You may obtain a copy of the License at
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
 //
-//      http://www.apache.org/licenses/LICENSE-2.0
+//     http://www.apache.org/licenses/LICENSE-2.0
 //
-//  Unless required by applicable law or agreed to in writing, software
-//  distributed under the License is distributed on an "AS IS" BASIS,
-//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-//  See the License for the specific language governing permissions and
-//  limitations under the License.
-
-// Code generated by service-gen. DO NOT EDIT.
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 package v1alpha1
 
 import (
 	"fmt"
-	"path"
 
 	devicev1alpha1 "github.com/nvidia/nvsentinel/api/device/v1alpha1"
 	pb "github.com/nvidia/nvsentinel/internal/generated/device/v1alpha1"
 	"github.com/nvidia/nvsentinel/pkg/controlplane/apiserver/api"
 	"github.com/nvidia/nvsentinel/pkg/controlplane/apiserver/registry"
+	"github.com/nvidia/nvsentinel/pkg/storage/memory"
 	"google.golang.org/grpc"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/runtime/schema"
 	"k8s.io/apimachinery/pkg/runtime/serializer"
 	"k8s.io/apiserver/pkg/storage/storagebackend"
-	"k8s.io/apiserver/pkg/storage/storagebackend/factory"
 )
 
 func init() {
 	registry.Register(NewGPUServiceProvider())
 }
 
-type gpuServiceProvider struct{
+type gpuServiceProvider struct {
 	groupVersion schema.GroupVersion
 }
 
+// NewGPUServiceProvider returns a ServiceProvider that installs the GPU gRPC service.
 func NewGPUServiceProvider() api.ServiceProvider {
 	return &gpuServiceProvider{
 		groupVersion: devicev1alpha1.SchemeGroupVersion,
 	}
 }
 
-func (p *gpuServiceProvider) Install(svr *grpc.Server, storageConfig storagebackend.Config) (api.Service, error) {
+// Install creates the in-memory storage backend and registers the GPU service
+// on the provided gRPC server.
+func (p *gpuServiceProvider) Install(svr *grpc.Server, cfg storagebackend.Config) (api.Service, error) {
+	// Currently only in-memory storage is supported. The cfg parameter is
+	// accepted for future extensibility but not used for backend selection.
+	_ = cfg
+
 	gv := p.groupVersion.String()
 
 	scheme := runtime.NewScheme()
 	if err := devicev1alpha1.AddToScheme(scheme); err != nil {
 		return nil, fmt.Errorf("failed to add %q to scheme: %w", gv, err)
 	}
-	
-	codecs := serializer.NewCodecFactory(scheme)
-	codec := codecs.LegacyCodec(p.groupVersion)
 
-	configForResource := storagebackend.ConfigForResource{
-		Config: storageConfig,
+	codecs := serializer.NewCodecFactory(scheme)
+	info, ok := runtime.SerializerInfoForMediaType(codecs.SupportedMediaTypes(), runtime.ContentTypeJSON)
+	if !ok {
+		return nil, fmt.Errorf("no serializer found for %s in %s", runtime.ContentTypeJSON, gv)
 	}
-	configForResource.Config.Codec = codec
-
-	resourcePrefix := path.Join("/", p.groupVersion.Group, "gpus")
+	codec := codecs.CodecForVersions(info.Serializer, info.Serializer, schema.GroupVersions{p.groupVersion}, schema.GroupVersions{p.groupVersion})
 
-	s, destroyFunc, err := factory.Create(
-		configForResource,
-		func() runtime.Object { return &devicev1alpha1.GPU{} },
-		func() runtime.Object { return &devicev1alpha1.GPUList{} },
-		resourcePrefix,
-	)
+	s, destroyFunc, err := memory.CreateStorage(codec)
 	if err != nil {
-		return nil, fmt.Errorf("failed to create storage for %s: %w", resourcePrefix, err)
+		return nil, fmt.Errorf("failed to create in-memory storage for %s: %w", gv, err)
 	}
 
 	service := NewGPUService(s, destroyFunc)
-
 	pb.RegisterGpuServiceServer(svr, service)
 
 	return service, nil
diff --git a/pkg/services/device/v1alpha1/gpu_service.go b/pkg/services/device/v1alpha1/gpu_service.go
index 3bff930d5..f4434ef6d 100644
--- a/pkg/services/device/v1alpha1/gpu_service.go
+++ b/pkg/services/device/v1alpha1/gpu_service.go
@@ -1,18 +1,16 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
-//  Licensed under the Apache License, Version 2.0 (the "License");
-//  you may not use this file except in compliance with the License.
-//  You may obtain a copy of the License at
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
 //
-//      http://www.apache.org/licenses/LICENSE-2.0
+//     http://www.apache.org/licenses/LICENSE-2.0
 //
-//  Unless required by applicable law or agreed to in writing, software
-//  distributed under the License is distributed on an "AS IS" BASIS,
-//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-//  See the License for the specific language governing permissions and
-//  limitations under the License.
-
-// Code generated by service-gen. DO NOT EDIT.
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 package v1alpha1
 
@@ -21,9 +19,11 @@ import (
 	"fmt"
 	"path"
 	"reflect"
+	"regexp"
 
 	devicev1alpha1 "github.com/nvidia/nvsentinel/api/device/v1alpha1"
 	pb "github.com/nvidia/nvsentinel/internal/generated/device/v1alpha1"
+	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/status"
 	"google.golang.org/protobuf/types/known/emptypb"
@@ -38,21 +38,24 @@ import (
 
 type gpuService struct {
 	pb.UnimplementedGpuServiceServer
-	storage  storage.Interface
+	storage     storage.Interface
 	destroyFunc factory.DestroyFunc
 }
 
+// NewGPUService creates a new GPU gRPC service backed by the provided storage.
 func NewGPUService(storage storage.Interface, destroyFunc factory.DestroyFunc) *gpuService {
 	return &gpuService{
-		storage:  storage,
+		storage:     storage,
 		destroyFunc: destroyFunc,
 	}
 }
 
+// Name returns the fully qualified gRPC service name.
 func (s *gpuService) Name() string {
 	return pb.GpuService_ServiceDesc.ServiceName
 }
 
+// IsReady reports whether the underlying storage backend is healthy.
 func (s *gpuService) IsReady() bool {
 	if s.storage == nil {
 		return false
@@ -60,6 +63,7 @@ func (s *gpuService) IsReady() bool {
 	return s.storage.ReadinessCheck() == nil
 }
 
+// Cleanup shuts down the storage backend.
 func (s *gpuService) Cleanup() {
 	if s.destroyFunc != nil {
 		klog.V(2).InfoS("Shutting down storage backend", "service", s.Name())
@@ -67,22 +71,67 @@ func (s *gpuService) Cleanup() {
 	}
 }
 
-func (s *gpuService) storageKey(ns string, name string) string {
-	base := path.Join("/", devicev1alpha1.SchemeGroupVersion.Group, "gpus")
+// normalizeNamespace returns "default" if ns is empty.
+func normalizeNamespace(ns string) string {
+	if ns == "" {
+		return "default"
+	}
+	return ns
+}
+
+// validateNamespace checks that ns does not exceed the K8s maximum namespace length.
+// An empty namespace is valid (it defaults to "default" elsewhere).
+func validateNamespace(ns string) error {
+	if ns == "" {
+		return nil
+	}
+	if len(ns) > 253 { // K8s namespace max length
+		return status.Error(codes.InvalidArgument, "namespace exceeds maximum length of 253 characters")
+	}
+	return nil
+}
+
+// gpuUUIDPattern matches NVIDIA GPU UUIDs
+// (e.g., GPU-12345678-1234-1234-1234-123456789abc).
+var gpuUUIDPattern = regexp.MustCompile(
+	`^GPU-[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-` +
+		`[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$`,
+)
+
+// validateGPUName checks that name is non-empty and matches
+// the NVIDIA GPU UUID format.
+func validateGPUName(name string) error {
+	if name == "" {
+		return status.Error(codes.InvalidArgument, "name is required")
+	}
 
-	if ns == "" && name != "" {
-		ns = "default"
+	if !gpuUUIDPattern.MatchString(name) {
+		return status.Errorf(codes.InvalidArgument,
+			"name must be a valid GPU UUID "+
+				"(GPU-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx), got %q",
+			name)
 	}
 
-	// Pattern: /device.nvidia.com/gpus/<namespace>/<name>
+	return nil
+}
+
+func (s *gpuService) storageKey(ns string, name string) string {
+	base := path.Join("/", devicev1alpha1.SchemeGroupVersion.Group, "gpus")
+	if name != "" {
+		ns = normalizeNamespace(ns)
+	}
 	return path.Join(base, ns, name)
 }
 
+// GetGpu retrieves a single GPU resource.
 func (s *gpuService) GetGpu(ctx context.Context, req *pb.GetGpuRequest) (*pb.GetGpuResponse, error) {
 	logger := klog.FromContext(ctx)
 
-	if req.GetName() == "" {
-		return nil, status.Error(codes.InvalidArgument, "name is required")
+	if err := validateGPUName(req.GetName()); err != nil {
+		return nil, err
+	}
+	if err := validateNamespace(req.GetNamespace()); err != nil {
+		return nil, err
 	}
 
 	key := s.storageKey(req.GetNamespace(), req.GetName())
@@ -106,15 +155,20 @@ func (s *gpuService) GetGpu(ctx context.Context, req *pb.GetGpuRequest) (*pb.Get
 	}, nil
 }
 
+// ListGpus retrieves a list of GPU resources.
 func (s *gpuService) ListGpus(ctx context.Context, req *pb.ListGpusRequest) (*pb.ListGpusResponse, error) {
 	logger := klog.FromContext(ctx)
 
+	if err := validateNamespace(req.GetNamespace()); err != nil {
+		return nil, err
+	}
+
 	var gpus devicev1alpha1.GPUList
 
 	opts := storage.ListOptions{
 		ResourceVersion: req.GetOpts().GetResourceVersion(),
 		Recursive:       true,
-		Predicate:       storage.Everything, // TODO: selection predicate
+		Predicate:       storage.Everything,
 	}
 
 	key := s.storageKey(req.GetNamespace(), "")
@@ -125,7 +179,6 @@ func (s *gpuService) ListGpus(ctx context.Context, req *pb.ListGpusRequest) (*pb
 			if rv == 0 {
 				rvStr = req.GetOpts().GetResourceVersion()
 			}
-
 			return &pb.ListGpusResponse{
 				GpuList: &pb.GpuList{
 					Metadata: &pb.ListMeta{
@@ -150,7 +203,8 @@ func (s *gpuService) ListGpus(ctx context.Context, req *pb.ListGpusRequest) (*pb
 	}, nil
 }
 
-func (s *gpuService) WatchGpus(req *pb.WatchGpusRequest, stream pb.GpuService_WatchGpusServer) error {
+// WatchGpus streams lifecycle events for GPU resources.
+func (s *gpuService) WatchGpus(req *pb.WatchGpusRequest, stream grpc.ServerStreamingServer[pb.WatchGpusResponse]) error {
 	ctx := stream.Context()
 	logger := klog.FromContext(ctx)
 
@@ -159,9 +213,9 @@ func (s *gpuService) WatchGpus(req *pb.WatchGpusRequest, stream pb.GpuService_Wa
 
 	key := s.storageKey(req.GetNamespace(), "")
 	w, err := s.storage.Watch(ctx, key, storage.ListOptions{
-		ResourceVersion: req.GetOpts().GetResourceVersion(),
+		ResourceVersion: rv,
 		Recursive:       true,
-		Predicate:       storage.Everything, // TODO: selection predicate
+		Predicate:       storage.Everything,
 	})
 	if err != nil {
 		if storage.IsInvalidError(err) {
@@ -226,25 +280,25 @@ func (s *gpuService) WatchGpus(req *pb.WatchGpusRequest, stream pb.GpuService_Wa
 	}
 }
 
+// CreateGpu creates a single GPU resource.
 func (s *gpuService) CreateGpu(ctx context.Context, req *pb.CreateGpuRequest) (*pb.Gpu, error) {
 	logger := klog.FromContext(ctx)
 
 	if req.GetGpu() == nil {
 		return nil, status.Error(codes.InvalidArgument, "resource body is required")
 	}
-	if req.GetGpu().GetMetadata() == nil || req.GetGpu().GetMetadata().GetName() == "" {
+	if req.GetGpu().GetMetadata() == nil {
 		return nil, status.Error(codes.InvalidArgument, "metadata.name: Required value")
 	}
+	if err := validateGPUName(req.GetGpu().GetMetadata().GetName()); err != nil {
+		return nil, err
+	}
 
 	name := req.GetGpu().GetMetadata().GetName()
-	ns := req.GetGpu().GetMetadata().GetNamespace()
-	if ns == "" {
-		ns = "default"
-	}
+	ns := normalizeNamespace(req.GetGpu().GetMetadata().GetNamespace())
 	key := s.storageKey(ns, name)
 
 	gpu := devicev1alpha1.FromProto(req.Gpu)
-	// TODO: move into PrepareForCreate function?
 	gpu.SetNamespace(ns)
 	gpu.SetUID(uuid.NewUUID())
 	now := metav1.Now()
@@ -270,15 +324,19 @@ func (s *gpuService) CreateGpu(ctx context.Context, req *pb.CreateGpuRequest) (*
 	return devicev1alpha1.ToProto(out), nil
 }
 
+// UpdateGpu updates a single GPU resource (spec only).
 func (s *gpuService) UpdateGpu(ctx context.Context, req *pb.UpdateGpuRequest) (*pb.Gpu, error) {
 	logger := klog.FromContext(ctx)
 
 	if req.GetGpu() == nil {
 		return nil, status.Error(codes.InvalidArgument, "resource body is required")
 	}
-	if req.GetGpu().GetMetadata() == nil || req.GetGpu().GetMetadata().GetName() == "" {
+	if req.GetGpu().GetMetadata() == nil {
 		return nil, status.Error(codes.InvalidArgument, "metadata.name: Required value")
 	}
+	if err := validateGPUName(req.GetGpu().GetMetadata().GetName()); err != nil {
+		return nil, err
+	}
 
 	name := req.GetGpu().GetMetadata().GetName()
 	ns := req.GetGpu().GetMetadata().GetNamespace()
@@ -289,8 +347,8 @@ func (s *gpuService) UpdateGpu(ctx context.Context, req *pb.UpdateGpuRequest) (*
 		ctx,
 		key,
 		updatedGpu,
-		false, // ignoreNotFound
-		nil,   // TODO: preconditions
+		false,
+		nil,
 		func(input runtime.Object, res storage.ResponseMeta) (runtime.Object, *uint64, error) {
 			curr := input.(*devicev1alpha1.GPU)
 			incoming := devicev1alpha1.FromProto(req.GetGpu())
@@ -319,7 +377,7 @@ func (s *gpuService) UpdateGpu(ctx context.Context, req *pb.UpdateGpuRequest) (*
 
 			return clone, nil, nil
 		},
-		nil, // TODO: cachedExistingObject
+		nil,
 	)
 
 	if err != nil {
@@ -345,11 +403,76 @@ func (s *gpuService) UpdateGpu(ctx context.Context, req *pb.UpdateGpuRequest) (*
 	return devicev1alpha1.ToProto(updatedGpu), nil
 }
 
+// UpdateGpuStatus updates only the status subresource of a GPU.
+func (s *gpuService) UpdateGpuStatus(ctx context.Context, req *pb.UpdateGpuStatusRequest) (*pb.Gpu, error) {
+	logger := klog.FromContext(ctx)
+
+	if req.GetGpu() == nil {
+		return nil, status.Error(codes.InvalidArgument, "resource body is required")
+	}
+	if req.GetGpu().GetMetadata() == nil {
+		return nil, status.Error(codes.InvalidArgument, "metadata.name: Required value")
+	}
+	if err := validateGPUName(req.GetGpu().GetMetadata().GetName()); err != nil {
+		return nil, err
+	}
+	if req.GetGpu().GetStatus() == nil {
+		return nil, status.Error(codes.InvalidArgument, "status is required")
+	}
+
+	name := req.GetGpu().GetMetadata().GetName()
+	ns := req.GetGpu().GetMetadata().GetNamespace()
+	key := s.storageKey(ns, name)
+	updatedGpu := &devicev1alpha1.GPU{}
+
+	err := s.storage.GuaranteedUpdate(
+		ctx,
+		key,
+		updatedGpu,
+		false,
+		nil,
+		func(input runtime.Object, res storage.ResponseMeta) (runtime.Object, *uint64, error) {
+			curr := input.(*devicev1alpha1.GPU)
+			incoming := devicev1alpha1.FromProto(req.GetGpu())
+
+			if incoming.ResourceVersion != "" && incoming.ResourceVersion != curr.ResourceVersion {
+				return nil, nil, storage.NewResourceVersionConflictsError(key, 0)
+			}
+
+			clone := curr.DeepCopy()
+			clone.Status = incoming.Status
+
+			return clone, nil, nil
+		},
+		nil,
+	)
+
+	if err != nil {
+		if storage.IsNotFound(err) {
+			return nil, status.Errorf(codes.NotFound, "GPU %q not found", name)
+		}
+		if storage.IsConflict(err) {
+			return nil, status.Errorf(codes.Aborted,
+				"operation cannot be fulfilled on GPUs %q: the object has been modified", name)
+		}
+		logger.Error(err, "failed to update GPU status", "name", name, "namespace", ns)
+		return nil, status.Error(codes.Internal, "internal server error")
+	}
+
+	logger.V(2).Info("Successfully updated GPU status", "name", name, "namespace", ns, "resourceVersion", updatedGpu.ResourceVersion)
+
+	return devicev1alpha1.ToProto(updatedGpu), nil
+}
+
+// DeleteGpu deletes a single GPU resource.
 func (s *gpuService) DeleteGpu(ctx context.Context, req *pb.DeleteGpuRequest) (*emptypb.Empty, error) {
 	logger := klog.FromContext(ctx)
 
-	if req.GetName() == "" {
-		return nil, status.Error(codes.InvalidArgument, "name is required")
+	if err := validateGPUName(req.GetName()); err != nil {
+		return nil, err
+	}
+	if err := validateNamespace(req.GetNamespace()); err != nil {
+		return nil, err
 	}
 
 	name := req.GetName()
@@ -361,10 +484,10 @@ func (s *gpuService) DeleteGpu(ctx context.Context, req *pb.DeleteGpuRequest) (*
 		ctx,
 		key,
 		out,
-		nil, // TODO: preconditions (e.g., rv check)
+		nil,
 		storage.ValidateAllObjectFunc,
-		nil,                     // TODO: cachedExistingObject
-		storage.DeleteOptions{}, // TODO: DeleteOptions
+		nil,
+		storage.DeleteOptions{},
 	); err != nil {
 		if storage.IsNotFound(err) {
 			return nil, status.Errorf(codes.NotFound, "GPU %q not found", name)
diff --git a/pkg/services/device/v1alpha1/gpu_service_test.go b/pkg/services/device/v1alpha1/gpu_service_test.go
new file mode 100644
index 000000000..184869c97
--- /dev/null
+++ b/pkg/services/device/v1alpha1/gpu_service_test.go
@@ -0,0 +1,494 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v1alpha1
+
+import (
+	"context"
+	"strings"
+	"testing"
+
+	devicev1alpha1 "github.com/nvidia/nvsentinel/api/device/v1alpha1"
+	pb "github.com/nvidia/nvsentinel/internal/generated/device/v1alpha1"
+	"github.com/nvidia/nvsentinel/pkg/storage/memory"
+	"google.golang.org/grpc/codes"
+	"google.golang.org/grpc/status"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/apimachinery/pkg/runtime/serializer"
+)
+
+func newTestService(t *testing.T) *gpuService {
+	t.Helper()
+
+	scheme := runtime.NewScheme()
+	if err := devicev1alpha1.AddToScheme(scheme); err != nil {
+		t.Fatal(err)
+	}
+
+	codecs := serializer.NewCodecFactory(scheme)
+	gv := devicev1alpha1.SchemeGroupVersion
+	info, _ := runtime.SerializerInfoForMediaType(codecs.SupportedMediaTypes(), runtime.ContentTypeJSON)
+	codec := codecs.CodecForVersions(info.Serializer, info.Serializer, schema.GroupVersions{gv}, schema.GroupVersions{gv})
+
+	s, destroy, err := memory.CreateStorage(codec)
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Cleanup(destroy)
+
+	return NewGPUService(s, destroy)
+}
+
+func createTestGpu(t *testing.T, svc *gpuService, name string) *pb.Gpu {
+	t.Helper()
+
+	gpu, err := svc.CreateGpu(context.Background(), &pb.CreateGpuRequest{
+		Gpu: &pb.Gpu{
+			Metadata: &pb.ObjectMeta{
+				Name:      name,
+				Namespace: "default",
+			},
+			Spec: &pb.GpuSpec{
+				Uuid: name,
+			},
+		},
+	})
+	if err != nil {
+		t.Fatalf("failed to create GPU %q: %v", name, err)
+	}
+
+	return gpu
+}
+
+func TestGPUService_CreateAndGet(t *testing.T) {
+	svc := newTestService(t)
+	ctx := context.Background()
+
+	const gpuName = "GPU-00000000-0000-0000-0000-000000000000"
+	created := createTestGpu(t, svc, gpuName)
+
+	if created.GetMetadata().GetName() != gpuName {
+		t.Errorf("expected name %q, got %q", gpuName, created.GetMetadata().GetName())
+	}
+	if created.GetMetadata().GetUid() == "" {
+		t.Error("expected UID to be set on created GPU")
+	}
+
+	resp, err := svc.GetGpu(ctx, &pb.GetGpuRequest{
+		Name:      gpuName,
+		Namespace: "default",
+	})
+	if err != nil {
+		t.Fatalf("GetGpu failed: %v", err)
+	}
+
+	got := resp.GetGpu()
+	if got.GetMetadata().GetName() != gpuName {
+		t.Errorf("expected name %q, got %q", gpuName, got.GetMetadata().GetName())
+	}
+	if got.GetMetadata().GetUid() != created.GetMetadata().GetUid() {
+		t.Errorf("UID mismatch: expected %q, got %q",
+			created.GetMetadata().GetUid(), got.GetMetadata().GetUid())
+	}
+}
+
+func TestGPUService_CreateDuplicate(t *testing.T) {
+	svc := newTestService(t)
+
+	const gpuName = "GPU-11111111-1111-1111-1111-111111111111"
+	createTestGpu(t, svc, gpuName)
+
+	_, err := svc.CreateGpu(context.Background(), &pb.CreateGpuRequest{
+		Gpu: &pb.Gpu{
+			Metadata: &pb.ObjectMeta{
+				Name:      gpuName,
+				Namespace: "default",
+			},
+			Spec: &pb.GpuSpec{
+				Uuid: gpuName,
+			},
+		},
+	})
+	if err == nil {
+		t.Fatal("expected error for duplicate create, got nil")
+	}
+
+	st, ok := status.FromError(err)
+	if !ok {
+		t.Fatalf("expected gRPC status error, got %T: %v", err, err)
+	}
+	if st.Code() != codes.AlreadyExists {
+		t.Errorf("expected code %v, got %v: %s", codes.AlreadyExists, st.Code(), st.Message())
+	}
+}
+
+func TestGPUService_List(t *testing.T) {
+	svc := newTestService(t)
+	ctx := context.Background()
+
+	createTestGpu(t, svc, "GPU-aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa")
+	createTestGpu(t, svc, "GPU-bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb")
+
+	resp, err := svc.ListGpus(ctx, &pb.ListGpusRequest{
+		Namespace: "default",
+	})
+	if err != nil {
+		t.Fatalf("ListGpus failed: %v", err)
+	}
+
+	count := len(resp.GetGpuList().GetItems())
+	if count != 2 {
+		t.Errorf("expected 2 GPUs, got %d", count)
+	}
+}
+
+func TestGPUService_Delete(t *testing.T) {
+	svc := newTestService(t)
+	ctx := context.Background()
+
+	const gpuName = "GPU-22222222-2222-2222-2222-222222222222"
+	createTestGpu(t, svc, gpuName)
+
+	_, err := svc.DeleteGpu(ctx, &pb.DeleteGpuRequest{
+		Name:      gpuName,
+		Namespace: "default",
+	})
+	if err != nil {
+		t.Fatalf("DeleteGpu failed: %v", err)
+	}
+
+	_, err = svc.GetGpu(ctx, &pb.GetGpuRequest{
+		Name:      gpuName,
+		Namespace: "default",
+	})
+	if err == nil {
+		t.Fatal("expected NotFound after delete, got nil")
+	}
+
+	st, ok := status.FromError(err)
+	if !ok {
+		t.Fatalf("expected gRPC status error, got %T: %v", err, err)
+	}
+	if st.Code() != codes.NotFound {
+		t.Errorf("expected code %v, got %v: %s", codes.NotFound, st.Code(), st.Message())
+	}
+}
+
+func TestGPUService_DeleteNotFound(t *testing.T) {
+	svc := newTestService(t)
+
+	_, err := svc.DeleteGpu(context.Background(), &pb.DeleteGpuRequest{
+		Name:      "GPU-ffffffff-ffff-ffff-ffff-ffffffffffff",
+		Namespace: "default",
+	})
+	if err == nil {
+		t.Fatal("expected NotFound error, got nil")
+	}
+
+	st, ok := status.FromError(err)
+	if !ok {
+		t.Fatalf("expected gRPC status error, got %T: %v", err, err)
+	}
+	if st.Code() != codes.NotFound {
+		t.Errorf("expected code %v, got %v: %s", codes.NotFound, st.Code(), st.Message())
+	}
+}
+
+func TestGPUService_Update(t *testing.T) {
+	svc := newTestService(t)
+	ctx := context.Background()
+
+	const gpuName = "GPU-33333333-3333-3333-3333-333333333333"
+	created := createTestGpu(t, svc, gpuName)
+
+	updated, err := svc.UpdateGpu(ctx, &pb.UpdateGpuRequest{
+		Gpu: &pb.Gpu{
+			Metadata: &pb.ObjectMeta{
+				Name:      gpuName,
+				Namespace: "default",
+			},
+			Spec: &pb.GpuSpec{
+				Uuid: "GPU-new-uuid",
+			},
+		},
+	})
+	if err != nil {
+		t.Fatalf("UpdateGpu failed: %v", err)
+	}
+
+	if updated.GetSpec().GetUuid() != "GPU-new-uuid" {
+		t.Errorf("expected spec.uuid %q, got %q", "GPU-new-uuid", updated.GetSpec().GetUuid())
+	}
+	if updated.GetMetadata().GetGeneration() != created.GetMetadata().GetGeneration()+1 {
+		t.Errorf("expected generation %d, got %d",
+			created.GetMetadata().GetGeneration()+1, updated.GetMetadata().GetGeneration())
+	}
+}
+
+func TestGPUService_UpdateStatus(t *testing.T) {
+	svc := newTestService(t)
+	ctx := context.Background()
+
+	const gpuName = "GPU-44444444-4444-4444-4444-444444444444"
+	created := createTestGpu(t, svc, gpuName)
+
+	updated, err := svc.UpdateGpuStatus(ctx, &pb.UpdateGpuStatusRequest{
+		Gpu: &pb.Gpu{
+			Metadata: &pb.ObjectMeta{
+				Name:      gpuName,
+				Namespace: "default",
+			},
+			Status: &pb.GpuStatus{
+				RecommendedAction: "drain",
+			},
+		},
+	})
+	if err != nil {
+		t.Fatalf("UpdateGpuStatus failed: %v", err)
+	}
+
+	if updated.GetStatus().GetRecommendedAction() != "drain" {
+		t.Errorf("expected recommended action %q, got %q",
+			"drain", updated.GetStatus().GetRecommendedAction())
+	}
+
+	// Generation must NOT change on status-only updates.
+	if updated.GetMetadata().GetGeneration() != created.GetMetadata().GetGeneration() {
+		t.Errorf("expected generation %d (unchanged), got %d",
+			created.GetMetadata().GetGeneration(), updated.GetMetadata().GetGeneration())
+	}
+}
+
+func TestGPUService_UpdateStatus_StaleResourceVersion(t *testing.T) {
+	svc := newTestService(t)
+	ctx := context.Background()
+
+	const gpuName = "GPU-55555555-5555-5555-5555-555555555555"
+	created := createTestGpu(t, svc, gpuName)
+	staleRV := created.GetMetadata().GetResourceVersion()
+
+	// Update spec to increment the resource version.
+	_, err := svc.UpdateGpu(ctx, &pb.UpdateGpuRequest{
+		Gpu: &pb.Gpu{
+			Metadata: &pb.ObjectMeta{
+				Name:      gpuName,
+				Namespace: "default",
+			},
+			Spec: &pb.GpuSpec{
+				Uuid: "GPU-updated-uuid",
+			},
+		},
+	})
+	if err != nil {
+		t.Fatalf("UpdateGpu failed: %v", err)
+	}
+
+	// Attempt status update with the stale resource version.
+	_, err = svc.UpdateGpuStatus(ctx, &pb.UpdateGpuStatusRequest{
+		Gpu: &pb.Gpu{
+			Metadata: &pb.ObjectMeta{
+				Name:            gpuName,
+				Namespace:       "default",
+				ResourceVersion: staleRV,
+			},
+			Status: &pb.GpuStatus{
+				RecommendedAction: "drain",
+			},
+		},
+	})
+	if err == nil {
+		t.Fatal("expected error for stale resource version, got nil")
+	}
+
+	st, ok := status.FromError(err)
+	if !ok {
+		t.Fatalf("expected gRPC status error, got %T: %v", err, err)
+	}
+	if st.Code() != codes.Aborted {
+		t.Errorf("expected code %v, got %v: %s", codes.Aborted, st.Code(), st.Message())
+	}
+}
+
+func TestGPUService_UpdateStatus_NilStatus(t *testing.T) {
+	svc := newTestService(t)
+	ctx := context.Background()
+
+	const gpuName = "GPU-66666666-6666-6666-6666-666666666666"
+	createTestGpu(t, svc, gpuName)
+
+	_, err := svc.UpdateGpuStatus(ctx, &pb.UpdateGpuStatusRequest{
+		Gpu: &pb.Gpu{
+			Metadata: &pb.ObjectMeta{
+				Name:      gpuName,
+				Namespace: "default",
+			},
+			Status: nil,
+		},
+	})
+	if err == nil {
+		t.Fatal("expected error for nil status, got nil")
+	}
+
+	st, ok := status.FromError(err)
+	if !ok {
+		t.Fatalf("expected gRPC status error, got %T: %v", err, err)
+	}
+	if st.Code() != codes.InvalidArgument {
+		t.Errorf("expected code %v, got %v: %s", codes.InvalidArgument, st.Code(), st.Message())
+	}
+}
+
+func TestGPUService_UpdateStatus_EmptyConditions(t *testing.T) {
+	svc := newTestService(t)
+	ctx := context.Background()
+
+	const gpuName = "GPU-77777777-7777-7777-7777-777777777777"
+	createTestGpu(t, svc, gpuName)
+
+	// First set a condition.
+	_, err := svc.UpdateGpuStatus(ctx, &pb.UpdateGpuStatusRequest{
+		Gpu: &pb.Gpu{
+			Metadata: &pb.ObjectMeta{
+				Name:      gpuName,
+				Namespace: "default",
+			},
+			Status: &pb.GpuStatus{
+				Conditions: []*pb.Condition{
+					{
+						Type:   "Ready",
+						Status: "True",
+						Reason: "TestReason",
+					},
+				},
+				RecommendedAction: "drain",
+			},
+		},
+	})
+	if err != nil {
+		t.Fatalf("UpdateGpuStatus (set condition) failed: %v", err)
+	}
+
+	// Now update with empty conditions to clear them.
+	updated, err := svc.UpdateGpuStatus(ctx, &pb.UpdateGpuStatusRequest{
+		Gpu: &pb.Gpu{
+			Metadata: &pb.ObjectMeta{
+				Name:      gpuName,
+				Namespace: "default",
+			},
+			Status: &pb.GpuStatus{
+				Conditions:        []*pb.Condition{},
+				RecommendedAction: "none",
+			},
+		},
+	})
+	if err != nil {
+		t.Fatalf("UpdateGpuStatus (clear conditions) failed: %v", err)
+	}
+
+	if len(updated.GetStatus().GetConditions()) != 0 {
+		t.Errorf("expected 0 conditions after clearing, got %d", len(updated.GetStatus().GetConditions()))
+	}
+	if updated.GetStatus().GetRecommendedAction() != "none" {
+		t.Errorf("expected recommended action %q, got %q", "none", updated.GetStatus().GetRecommendedAction())
+	}
+}
+
+func TestGPUService_CreateValidation(t *testing.T) {
+	svc := newTestService(t)
+
+	tests := []struct {
+		name string
+		req  *pb.CreateGpuRequest
+	}{
+		{
+			name: "nil gpu body",
+			req:  &pb.CreateGpuRequest{},
+		},
+		{
+			name: "nil metadata",
+			req: &pb.CreateGpuRequest{
+				Gpu: &pb.Gpu{
+					Spec: &pb.GpuSpec{Uuid: "GPU-test"},
+				},
+			},
+		},
+		{
+			name: "empty name",
+			req: &pb.CreateGpuRequest{
+				Gpu: &pb.Gpu{
+					Metadata: &pb.ObjectMeta{Name: ""},
+					Spec:     &pb.GpuSpec{Uuid: "GPU-test"},
+				},
+			},
+		},
+		{
+			name: "invalid GPU UUID format",
+			req: &pb.CreateGpuRequest{
+				Gpu: &pb.Gpu{
+					Metadata: &pb.ObjectMeta{Name: "not-a-gpu-uuid"},
+					Spec:     &pb.GpuSpec{Uuid: "GPU-test"},
+				},
+			},
+		},
+		{
+			name: "path traversal in name",
+			req: &pb.CreateGpuRequest{
+				Gpu: &pb.Gpu{
+					Metadata: &pb.ObjectMeta{Name: "../../etc/passwd"},
+					Spec:     &pb.GpuSpec{Uuid: "GPU-test"},
+				},
+			},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			_, err := svc.CreateGpu(context.Background(), tc.req)
+			if err == nil {
+				t.Fatal("expected InvalidArgument error, got nil")
+			}
+
+			st, ok := status.FromError(err)
+			if !ok {
+				t.Fatalf("expected gRPC status error, got %T: %v", err, err)
+			}
+			if st.Code() != codes.InvalidArgument {
+				t.Errorf("expected code %v, got %v: %s", codes.InvalidArgument, st.Code(), st.Message())
+			}
+		})
+	}
+}
+
+func TestGPUService_NamespaceValidation(t *testing.T) {
+	svc := newTestService(t)
+	ctx := context.Background()
+
+	longNS := strings.Repeat("a", 254)
+
+	_, err := svc.GetGpu(ctx, &pb.GetGpuRequest{
+		Name:      "GPU-00000000-0000-0000-0000-000000000000",
+		Namespace: longNS,
+	})
+	if err == nil {
+		t.Fatal("expected InvalidArgument for long namespace, got nil")
+	}
+	st, ok := status.FromError(err)
+	if !ok {
+		t.Fatalf("expected gRPC status error, got %T: %v", err, err)
+	}
+	if st.Code() != codes.InvalidArgument {
+		t.Errorf("expected code %v, got %v: %s", codes.InvalidArgument, st.Code(), st.Message())
+	}
+}
diff --git a/pkg/services/device/v1alpha1/integration_test.go b/pkg/services/device/v1alpha1/integration_test.go
new file mode 100644
index 000000000..f84344575
--- /dev/null
+++ b/pkg/services/device/v1alpha1/integration_test.go
@@ -0,0 +1,408 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package v1alpha1_test
+
+import (
+	"io"
+	"testing"
+	"time"
+
+	pb "github.com/nvidia/nvsentinel/internal/generated/device/v1alpha1"
+	"github.com/nvidia/nvsentinel/pkg/testutil"
+	"google.golang.org/grpc/codes"
+	"google.golang.org/grpc/status"
+	"google.golang.org/protobuf/types/known/timestamppb"
+)
+
+// TestIntegration_CRUD performs a full Create→Get→List→Update→Delete cycle over gRPC.
+func TestIntegration_CRUD(t *testing.T) {
+	client := testutil.NewTestGPUClient(t)
+	ctx := t.Context()
+
+	const gpuName = "GPU-12345678-1234-1234-1234-123456789abc"
+
+	// Create a GPU
+	created, err := client.CreateGpu(ctx, &pb.CreateGpuRequest{
+		Gpu: &pb.Gpu{
+			Metadata: &pb.ObjectMeta{
+				Name:      gpuName,
+				Namespace: "default",
+			},
+			Spec: &pb.GpuSpec{
+				Uuid: "GPU-1234",
+			},
+		},
+	})
+	if err != nil {
+		t.Fatalf("CreateGpu failed: %v", err)
+	}
+
+	if created.GetMetadata().GetName() != gpuName {
+		t.Errorf("expected name %q, got %q", gpuName, created.GetMetadata().GetName())
+	}
+	if created.GetSpec().GetUuid() != "GPU-1234" {
+		t.Errorf("expected UUID %q, got %q", "GPU-1234", created.GetSpec().GetUuid())
+	}
+	if created.GetMetadata().GetUid() == "" {
+		t.Error("expected UID to be set")
+	}
+
+	// Get it back
+	getResp, err := client.GetGpu(ctx, &pb.GetGpuRequest{
+		Name:      gpuName,
+		Namespace: "default",
+	})
+	if err != nil {
+		t.Fatalf("GetGpu failed: %v", err)
+	}
+
+	got := getResp.GetGpu()
+	if got.GetSpec().GetUuid() != "GPU-1234" {
+		t.Errorf("expected UUID %q, got %q", "GPU-1234", got.GetSpec().GetUuid())
+	}
+
+	// List namespace "default"
+	listResp, err := client.ListGpus(ctx, &pb.ListGpusRequest{
+		Namespace: "default",
+	})
+	if err != nil {
+		t.Fatalf("ListGpus failed: %v", err)
+	}
+
+	if len(listResp.GetGpuList().GetItems()) != 1 {
+		t.Errorf("expected 1 GPU, got %d", len(listResp.GetGpuList().GetItems()))
+	}
+
+	// Update the spec (change UUID to "GPU-5678")
+	got.Spec.Uuid = "GPU-5678"
+	updated, err := client.UpdateGpu(ctx, &pb.UpdateGpuRequest{
+		Gpu: got,
+	})
+	if err != nil {
+		t.Fatalf("UpdateGpu failed: %v", err)
+	}
+
+	if updated.GetSpec().GetUuid() != "GPU-5678" {
+		t.Errorf("expected UUID %q, got %q", "GPU-5678", updated.GetSpec().GetUuid())
+	}
+
+	// Verify change persists
+	getResp2, err := client.GetGpu(ctx, &pb.GetGpuRequest{
+		Name:      gpuName,
+		Namespace: "default",
+	})
+	if err != nil {
+		t.Fatalf("GetGpu (after update) failed: %v", err)
+	}
+
+	if getResp2.GetGpu().GetSpec().GetUuid() != "GPU-5678" {
+		t.Errorf("expected UUID %q after update, got %q", "GPU-5678", getResp2.GetGpu().GetSpec().GetUuid())
+	}
+
+	// Delete it
+	_, err = client.DeleteGpu(ctx, &pb.DeleteGpuRequest{
+		Name:      gpuName,
+		Namespace: "default",
+	})
+	if err != nil {
+		t.Fatalf("DeleteGpu failed: %v", err)
+	}
+
+	// List again, verify count=0
+	listResp2, err := client.ListGpus(ctx, &pb.ListGpusRequest{
+		Namespace: "default",
+	})
+	if err != nil {
+		t.Fatalf("ListGpus (after delete) failed: %v", err)
+	}
+
+	if len(listResp2.GetGpuList().GetItems()) != 0 {
+		t.Errorf("expected 0 GPUs after delete, got %d", len(listResp2.GetGpuList().GetItems()))
+	}
+}
+
+// TestIntegration_Watch tests the streaming WatchGpus RPC.
+func TestIntegration_Watch(t *testing.T) {
+	client := testutil.NewTestGPUClient(t)
+	ctx := t.Context()
+
+	const gpuName = "GPU-aabbccdd-1122-3344-5566-778899aabbcc"
+
+	// Start a watch stream
+	stream, err := client.WatchGpus(ctx, &pb.WatchGpusRequest{
+		Namespace: "default",
+	})
+	if err != nil {
+		t.Fatalf("WatchGpus failed to start: %v", err)
+	}
+
+	// Create a GPU in a separate goroutine after a brief delay.
+	// The WatchGpus RPC returns a stream only after the server-side watch
+	// is established. However, the gRPC client dial and server handler setup
+	// may not be fully synchronized, so a small delay ensures the watch is
+	// ready to receive events. The main goroutine uses a 5s timeout on Recv
+	// as the real synchronization mechanism.
+	doneCh := make(chan struct{})
+	go func() {
+		defer close(doneCh)
+		time.Sleep(100 * time.Millisecond)
+		_, err := client.CreateGpu(ctx, &pb.CreateGpuRequest{
+			Gpu: &pb.Gpu{
+				Metadata: &pb.ObjectMeta{
+					Name:      gpuName,
+					Namespace: "default",
+				},
+				Spec: &pb.GpuSpec{
+					Uuid: "GPU-WATCH-1",
+				},
+			},
+		})
+		if err != nil {
+			t.Errorf("CreateGpu in watch test failed: %v", err)
+		}
+	}()
+
+	// Wait for the ADDED event
+	timeout := time.After(5 * time.Second)
+	receivedEvent := false
+
+	for !receivedEvent {
+		select {
+		case <-timeout:
+			t.Fatal("timeout waiting for watch event")
+		default:
+			event, err := stream.Recv()
+			if err == io.EOF {
+				t.Fatal("stream closed before receiving event")
+			}
+			if err != nil {
+				t.Fatalf("stream.Recv() failed: %v", err)
+			}
+
+			if event.GetType() == "ADDED" && event.GetObject().GetMetadata().GetName() == gpuName {
+				receivedEvent = true
+				if event.GetObject().GetSpec().GetUuid() != "GPU-WATCH-1" {
+					t.Errorf("expected UUID %q, got %q", "GPU-WATCH-1", event.GetObject().GetSpec().GetUuid())
+				}
+			}
+		}
+	}
+
+	// Wait for the create goroutine to finish
+	<-doneCh
+
+	// Clean up
+	_, err = client.DeleteGpu(ctx, &pb.DeleteGpuRequest{
+		Name:      gpuName,
+		Namespace: "default",
+	})
+	if err != nil {
+		t.Errorf("cleanup DeleteGpu failed: %v", err)
+	}
+}
+
+// TestIntegration_WatchWithResourceVersion_OutOfRange verifies that requesting
+// a watch from a specific ResourceVersion returns codes.OutOfRange, because the
+// in-memory store does not support watch resume.
+func TestIntegration_WatchWithResourceVersion_OutOfRange(t *testing.T) {
+	client := testutil.NewTestGPUClient(t)
+	ctx := t.Context()
+
+	stream, err := client.WatchGpus(ctx, &pb.WatchGpusRequest{
+		Namespace: "default",
+		Opts: &pb.ListOptions{
+			ResourceVersion: "1",
+		},
+	})
+	if err != nil {
+		t.Fatalf("WatchGpus failed to open stream: %v", err)
+	}
+
+	// In gRPC server streaming, handler errors surface on Recv.
+	_, err = stream.Recv()
+	if err == nil {
+		t.Fatal("expected OutOfRange error for non-empty ResourceVersion, got nil")
+	}
+	if status.Code(err) != codes.OutOfRange {
+		t.Errorf("expected codes.OutOfRange, got %v: %v", status.Code(err), err)
+	}
+}
+
+// TestIntegration_UpdateStatus tests the status subresource update.
+func TestIntegration_UpdateStatus(t *testing.T) {
+	client := testutil.NewTestGPUClient(t)
+	ctx := t.Context()
+
+	const gpuName = "GPU-55667788-aabb-ccdd-eeff-001122334455"
+
+	// Create a GPU
+	created, err := client.CreateGpu(ctx, &pb.CreateGpuRequest{
+		Gpu: &pb.Gpu{
+			Metadata: &pb.ObjectMeta{
+				Name:      gpuName,
+				Namespace: "default",
+			},
+			Spec: &pb.GpuSpec{
+				Uuid: "GPU-STATUS-1",
+			},
+		},
+	})
+	if err != nil {
+		t.Fatalf("CreateGpu failed: %v", err)
+	}
+
+	// Update the status with a condition
+	updatedGpu, err := client.UpdateGpuStatus(ctx, &pb.UpdateGpuStatusRequest{
+		Gpu: &pb.Gpu{
+			Metadata: &pb.ObjectMeta{
+				Name:            gpuName,
+				Namespace:       "default",
+				ResourceVersion: created.GetMetadata().GetResourceVersion(),
+			},
+			Status: &pb.GpuStatus{
+				Conditions: []*pb.Condition{
+					{
+						Type:               "Ready",
+						Status:             "True",
+						LastTransitionTime: timestamppb.Now(),
+						Reason:             "TestReason",
+						Message:            "Test message",
+					},
+				},
+				RecommendedAction: "No action needed",
+			},
+		},
+	})
+	if err != nil {
+		t.Fatalf("UpdateGpuStatus failed: %v", err)
+	}
+
+	if len(updatedGpu.GetStatus().GetConditions()) != 1 {
+		t.Errorf("expected 1 condition, got %d", len(updatedGpu.GetStatus().GetConditions()))
+	}
+
+	// Get the GPU and verify status was updated
+	getResp, err := client.GetGpu(ctx, &pb.GetGpuRequest{
+		Name:      gpuName,
+		Namespace: "default",
+	})
+	if err != nil {
+		t.Fatalf("GetGpu failed: %v", err)
+	}
+
+	gpu := getResp.GetGpu()
+	if len(gpu.GetStatus().GetConditions()) != 1 {
+		t.Errorf("expected 1 condition in retrieved GPU, got %d", len(gpu.GetStatus().GetConditions()))
+	}
+
+	cond := gpu.GetStatus().GetConditions()[0]
+	if cond.GetType() != "Ready" {
+		t.Errorf("expected condition type %q, got %q", "Ready", cond.GetType())
+	}
+	if cond.GetStatus() != "True" {
+		t.Errorf("expected condition status %q, got %q", "True", cond.GetStatus())
+	}
+	if cond.GetReason() != "TestReason" {
+		t.Errorf("expected condition reason %q, got %q", "TestReason", cond.GetReason())
+	}
+	if gpu.GetStatus().GetRecommendedAction() != "No action needed" {
+		t.Errorf("expected recommended action %q, got %q", "No action needed", gpu.GetStatus().GetRecommendedAction())
+	}
+
+	// Clean up
+	_, err = client.DeleteGpu(ctx, &pb.DeleteGpuRequest{
+		Name:      gpuName,
+		Namespace: "default",
+	})
+	if err != nil {
+		t.Errorf("cleanup DeleteGpu failed: %v", err)
+	}
+}
+
+// TestIntegration_ErrorCodes verifies correct gRPC error codes are returned.
+func TestIntegration_ErrorCodes(t *testing.T) {
+	client := testutil.NewTestGPUClient(t)
+	ctx := t.Context()
+
+	const gpuName = "GPU-deadbeef-dead-beef-dead-beefdeadbeef"
+
+	// Get non-existent GPU → codes.NotFound
+	_, err := client.GetGpu(ctx, &pb.GetGpuRequest{
+		Name:      gpuName,
+		Namespace: "default",
+	})
+	if err == nil {
+		t.Fatal("expected error for non-existent GPU")
+	}
+	if status.Code(err) != codes.NotFound {
+		t.Errorf("expected codes.NotFound, got %v", status.Code(err))
+	}
+
+	// Create a GPU
+	_, err = client.CreateGpu(ctx, &pb.CreateGpuRequest{
+		Gpu: &pb.Gpu{
+			Metadata: &pb.ObjectMeta{
+				Name:      gpuName,
+				Namespace: "default",
+			},
+			Spec: &pb.GpuSpec{
+				Uuid: "GPU-ERROR-1",
+			},
+		},
+	})
+	if err != nil {
+		t.Fatalf("CreateGpu failed: %v", err)
+	}
+
+	// Create duplicate → codes.AlreadyExists
+	_, err = client.CreateGpu(ctx, &pb.CreateGpuRequest{
+		Gpu: &pb.Gpu{
+			Metadata: &pb.ObjectMeta{
+				Name:      gpuName,
+				Namespace: "default",
+			},
+			Spec: &pb.GpuSpec{
+				Uuid: "GPU-ERROR-2",
+			},
+		},
+	})
+	if err == nil {
+		t.Fatal("expected error for duplicate GPU creation")
+	}
+	if status.Code(err) != codes.AlreadyExists {
+		t.Errorf("expected codes.AlreadyExists, got %v", status.Code(err))
+	}
+
+	// Delete the GPU
+	_, err = client.DeleteGpu(ctx, &pb.DeleteGpuRequest{
+		Name:      gpuName,
+		Namespace: "default",
+	})
+	if err != nil {
+		t.Fatalf("DeleteGpu failed: %v", err)
+	}
+
+	// Delete non-existent → codes.NotFound
+	_, err = client.DeleteGpu(ctx, &pb.DeleteGpuRequest{
+		Name:      gpuName,
+		Namespace: "default",
+	})
+	if err == nil {
+		t.Fatal("expected error for deleting non-existent GPU")
+	}
+	if status.Code(err) != codes.NotFound {
+		t.Errorf("expected codes.NotFound for delete, got %v", status.Code(err))
+	}
+}
diff --git a/pkg/storage/memory/factory.go b/pkg/storage/memory/factory.go
new file mode 100644
index 000000000..057dd2edb
--- /dev/null
+++ b/pkg/storage/memory/factory.go
@@ -0,0 +1,32 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package memory
+
+import (
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apiserver/pkg/storage"
+	"k8s.io/apiserver/pkg/storage/storagebackend/factory"
+)
+
+// CreateStorage returns a new in-memory storage.Interface, a DestroyFunc, and any error.
+// This mirrors the signature of storagebackend/factory.Create() so it can be
+// used as a drop-in replacement in ServiceProvider.Install().
+func CreateStorage(codec runtime.Codec) (storage.Interface, factory.DestroyFunc, error) {
+	store := NewStore(codec)
+	destroy := func() {
+		// No resources to release for in-memory storage.
+	}
+	return store, destroy, nil
+}
diff --git a/pkg/storage/memory/factory_test.go b/pkg/storage/memory/factory_test.go
new file mode 100644
index 000000000..49a749e62
--- /dev/null
+++ b/pkg/storage/memory/factory_test.go
@@ -0,0 +1,62 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package memory
+
+import (
+	"context"
+	"testing"
+
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apiserver/pkg/storage"
+)
+
+func TestCreateStorage(t *testing.T) {
+	s, destroy, err := CreateStorage(codec)
+	if err != nil {
+		t.Fatalf("CreateStorage failed: %v", err)
+	}
+	defer destroy()
+
+	if s == nil {
+		t.Fatal("expected non-nil storage.Interface")
+	}
+
+	// Verify it's functional by doing a basic Create + Get.
+	ctx := context.Background()
+	obj := newTestObject("factory-gpu", "default")
+	if err := s.Create(ctx, "/test/factory-gpu", obj, nil, 0); err != nil {
+		t.Fatalf("Create via factory storage failed: %v", err)
+	}
+
+	got := &unstructured.Unstructured{}
+	if err := s.Get(ctx, "/test/factory-gpu", storage.GetOptions{}, got); err != nil {
+		t.Fatalf("Get via factory storage failed: %v", err)
+	}
+
+	if got.GetName() != "factory-gpu" {
+		t.Errorf("expected name factory-gpu, got %s", got.GetName())
+	}
+}
+
+func TestCreateStorage_DestroyIsIdempotent(t *testing.T) {
+	_, destroy, err := CreateStorage(codec)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Should not panic when called multiple times.
+	destroy()
+	destroy()
+}
diff --git a/pkg/storage/memory/store.go b/pkg/storage/memory/store.go
new file mode 100644
index 000000000..27b085383
--- /dev/null
+++ b/pkg/storage/memory/store.go
@@ -0,0 +1,492 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package memory
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"strings"
+	"sync"
+
+	"k8s.io/apimachinery/pkg/api/meta"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/util/validation/field"
+	"k8s.io/apimachinery/pkg/watch"
+	"k8s.io/apiserver/pkg/storage"
+)
+
+// item holds an encoded object and its associated resource version.
+type item struct {
+	key  string
+	data []byte
+	rv   uint64
+}
+
+// Store is a thread-safe, in-memory implementation of storage.Interface.
+// Objects are stored as codec-encoded bytes keyed by hierarchical path strings.
+type Store struct {
+	codec    runtime.Codec
+	mu       sync.RWMutex
+	items    map[string]*item
+	rev      uint64
+	watchers *watchManager
+}
+
+// Compile-time interface compliance check.
+var _ storage.Interface = (*Store)(nil)
+
+// NewStore creates a new in-memory store that encodes and decodes objects
+// using the provided codec. The watch channel buffer uses the default size
+// (watchChannelSize). Use NewStoreWithOptions for custom buffer sizes.
+func NewStore(codec runtime.Codec) *Store {
+	return &Store{
+		codec:    codec,
+		items:    make(map[string]*item),
+		watchers: newWatchManager(watchChannelSize),
+	}
+}
+
+// Versioner returns the storage versioner used to manage resource versions on
+// API objects. This implementation uses the standard APIObjectVersioner.
+func (s *Store) Versioner() storage.Versioner {
+	return storage.APIObjectVersioner{}
+}
+
+// Create adds a new object at the given key. If an object already exists at
+// that key, a KeyExists error is returned. The out parameter, if non-nil, is
+// populated with the stored object including its assigned resource version.
+func (s *Store) Create(ctx context.Context, key string, obj, out runtime.Object, ttl uint64) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	if _, exists := s.items[key]; exists {
+		return storage.NewKeyExistsError(key, 0)
+	}
+
+	s.rev++
+	rv := s.rev
+
+	if err := s.Versioner().PrepareObjectForStorage(obj); err != nil {
+		return fmt.Errorf("PrepareObjectForStorage failed: %w", err)
+	}
+
+	if err := s.Versioner().UpdateObject(obj, rv); err != nil {
+		return fmt.Errorf("UpdateObject failed: %w", err)
+	}
+
+	data, err := s.encode(obj)
+	if err != nil {
+		return err
+	}
+
+	s.items[key] = &item{
+		key:  key,
+		data: data,
+		rv:   rv,
+	}
+
+	if out != nil {
+		if err := s.decode(data, out); err != nil {
+			return err
+		}
+	}
+
+	// DeepCopy is required: watchers must receive an isolated snapshot.
+	// The copy runs under s.mu write lock, so watch-heavy workloads
+	// should keep stored objects small.
+	s.watchers.sendLocked(watch.Event{
+		Type:   watch.Added,
+		Object: obj.DeepCopyObject(),
+	}, key)
+
+	return nil
+}
+
+// Delete removes the object at the given key. If the key does not exist,
+// a KeyNotFound error is returned. Preconditions and validation callbacks
+// are checked before deletion proceeds.
+func (s *Store) Delete(
+	ctx context.Context,
+	key string,
+	out runtime.Object,
+	preconditions *storage.Preconditions,
+	validateDeletion storage.ValidateObjectFunc,
+	cachedExistingObject runtime.Object,
+	opts storage.DeleteOptions,
+) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	existing, ok := s.items[key]
+	if !ok {
+		return storage.NewKeyNotFoundError(key, 0)
+	}
+
+	existingObj, err := s.decodeNew(existing.data)
+	if err != nil {
+		return err
+	}
+
+	if err := s.checkPreconditions(key, preconditions, existingObj); err != nil {
+		return err
+	}
+
+	// validateDeletion must be fast and non-blocking. It runs while the store
+	// write lock is held; a slow callback freezes all storage operations.
+	if validateDeletion != nil {
+		if err := validateDeletion(ctx, existingObj); err != nil {
+			return err
+		}
+	}
+
+	delete(s.items, key)
+
+	s.rev++
+
+	if out != nil {
+		if err := s.decode(existing.data, out); err != nil {
+			return err
+		}
+	}
+
+	// Deep copy for watcher isolation.
+	s.watchers.sendLocked(watch.Event{
+		Type:   watch.Deleted,
+		Object: existingObj.DeepCopyObject(),
+	}, key)
+
+	return nil
+}
+
+// Watch begins watching the specified key prefix. Events matching the key
+// prefix are sent on the returned watch.Interface. The watch is automatically
+// stopped when the context is cancelled.
+//
+// The in-memory store does not support resuming watches from a specific
+// ResourceVersion. Passing a non-empty ResourceVersion returns an error.
+func (s *Store) Watch(ctx context.Context, key string, opts storage.ListOptions) (watch.Interface, error) {
+	if opts.ResourceVersion != "" {
+		return nil, storage.NewInvalidError(field.ErrorList{
+			field.Invalid(
+				field.NewPath("resourceVersion"),
+				opts.ResourceVersion,
+				"in-memory store does not support watch resume from resource version",
+			),
+		})
+	}
+
+	w := s.watchers.watch(key)
+	done := w.done // capture before spawning goroutine
+
+	go func() {
+		select {
+		case <-ctx.Done():
+			w.Stop()
+		case <-done:
+			// Watcher was stopped directly; goroutine can exit.
+		}
+	}()
+
+	return w, nil
+}
+
+// Get retrieves the object stored at the given key and decodes it into objPtr.
+// If the key does not exist and opts.IgnoreNotFound is false, a KeyNotFound
+// error is returned. If IgnoreNotFound is true, objPtr is left at its zero value.
+func (s *Store) Get(ctx context.Context, key string, opts storage.GetOptions, objPtr runtime.Object) error {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+
+	existing, ok := s.items[key]
+	if !ok {
+		if opts.IgnoreNotFound {
+			return nil
+		}
+
+		return storage.NewKeyNotFoundError(key, 0)
+	}
+
+	return s.decode(existing.data, objPtr)
+}
+
+// GetList retrieves all objects whose keys match the given prefix (when
+// opts.Recursive is true) or the exact key (otherwise), and populates
+// listObj with the matching items. The list's resource version is set to
+// the store's current revision.
+func (s *Store) GetList(ctx context.Context, key string, opts storage.ListOptions, listObj runtime.Object) error {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+
+	prefix := key
+	if opts.Recursive && !strings.HasSuffix(prefix, "/") {
+		prefix += "/"
+	}
+
+	var objs []runtime.Object
+
+	for k, it := range s.items {
+		var match bool
+		if opts.Recursive {
+			match = strings.HasPrefix(k, prefix)
+		} else {
+			match = k == key
+		}
+
+		if !match {
+			continue
+		}
+
+		obj, err := s.decodeNew(it.data)
+		if err != nil {
+			return err
+		}
+
+		if !predicateEmpty(opts.Predicate) {
+			matches, err := opts.Predicate.Matches(obj)
+			if err != nil {
+				return err
+			}
+
+			if !matches {
+				continue
+			}
+		}
+
+		objs = append(objs, obj)
+	}
+
+	if err := meta.SetList(listObj, objs); err != nil {
+		return err
+	}
+
+	return s.setListRV(listObj, s.rev)
+}
+
+// GuaranteedUpdate reads the current object at the given key, passes it to
+// tryUpdate, and writes the result back. If the key does not exist and
+// ignoreNotFound is false, a KeyNotFound error is returned. The operation
+// is retried internally if the tryUpdate function returns a retriable error.
+func (s *Store) GuaranteedUpdate(
+	ctx context.Context,
+	key string,
+	destination runtime.Object,
+	ignoreNotFound bool,
+	preconditions *storage.Preconditions,
+	tryUpdate storage.UpdateFunc,
+	cachedExistingObject runtime.Object,
+) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	existing, ok := s.items[key]
+
+	var currentObj runtime.Object
+	var currentRV uint64
+
+	if ok {
+		obj, err := s.decodeNew(existing.data)
+		if err != nil {
+			return err
+		}
+
+		currentObj = obj
+		currentRV = existing.rv
+	} else {
+		if !ignoreNotFound {
+			return storage.NewKeyNotFoundError(key, 0)
+		}
+
+		currentObj = destination.DeepCopyObject()
+	}
+
+	if err := s.checkPreconditions(key, preconditions, currentObj); err != nil {
+		return err
+	}
+
+	updated, _, err := tryUpdate(currentObj, storage.ResponseMeta{ResourceVersion: currentRV})
+	if err != nil {
+		return err
+	}
+
+	s.rev++
+	rv := s.rev
+
+	if err := s.Versioner().UpdateObject(updated, rv); err != nil {
+		return fmt.Errorf("UpdateObject failed: %w", err)
+	}
+
+	data, err := s.encode(updated)
+	if err != nil {
+		return err
+	}
+
+	s.items[key] = &item{
+		key:  key,
+		data: data,
+		rv:   rv,
+	}
+
+	if err := s.decode(data, destination); err != nil {
+		return err
+	}
+
+	evType := watch.Modified
+	if !ok {
+		evType = watch.Added
+	}
+
+	// Deep copy for watcher isolation.
+	s.watchers.sendLocked(watch.Event{
+		Type:   evType,
+		Object: updated.DeepCopyObject(),
+	}, key)
+
+	return nil
+}
+
+// Stats returns basic storage statistics. Currently reports only the number
+// of stored objects.
+func (s *Store) Stats(ctx context.Context) (storage.Stats, error) {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+
+	return storage.Stats{
+		ObjectCount: int64(len(s.items)),
+	}, nil
+}
+
+// ReadinessCheck reports whether the store is ready. The in-memory store is
+// always ready, so this always returns nil.
+func (s *Store) ReadinessCheck() error {
+	return nil
+}
+
+// RequestWatchProgress is a no-op for the in-memory store. It exists to
+// satisfy the storage.Interface and is only meaningful for etcd-backed stores.
+func (s *Store) RequestWatchProgress(ctx context.Context) error {
+	return nil
+}
+
+// GetCurrentResourceVersion returns the store's current monotonic revision.
+func (s *Store) GetCurrentResourceVersion(ctx context.Context) (uint64, error) {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+
+	return s.rev, nil
+}
+
+// EnableResourceSizeEstimation is a no-op for the in-memory store. Size
+// estimation is only relevant for disk-backed storage backends.
+func (s *Store) EnableResourceSizeEstimation(storage.KeysFunc) error {
+	return nil
+}
+
+// CompactRevision returns the latest observed compacted revision. The
+// in-memory store does not perform compaction, so this always returns 0.
+func (s *Store) CompactRevision() int64 {
+	return 0
+}
+
+// --- internal helpers ---
+
+// encode serializes an object into bytes using the store's codec.
+func (s *Store) encode(obj runtime.Object) ([]byte, error) {
+	var buf bytes.Buffer
+	if err := s.codec.Encode(obj, &buf); err != nil {
+		return nil, fmt.Errorf("encode failed: %w", err)
+	}
+
+	return buf.Bytes(), nil
+}
+
+// decode deserializes bytes into an existing object using the store's codec.
+func (s *Store) decode(data []byte, into runtime.Object) error {
+	_, _, err := s.codec.Decode(data, nil, into)
+	if err != nil {
+		return fmt.Errorf("decode failed: %w", err)
+	}
+
+	return nil
+}
+
+// decodeNew deserializes bytes into a new object allocated by the codec.
+func (s *Store) decodeNew(data []byte) (runtime.Object, error) {
+	obj, _, err := s.codec.Decode(data, nil, nil)
+	if err != nil {
+		return nil, fmt.Errorf("decode failed: %w", err)
+	}
+
+	return obj, nil
+}
+
+// setListRV sets the resource version on a list object using the versioner.
+func (s *Store) setListRV(listObj runtime.Object, rv uint64) error {
+	return s.Versioner().UpdateList(listObj, rv, "", nil)
+}
+
+// predicateEmpty returns true if the predicate performs no filtering.
+// It guards against nil Label/Field selectors that would panic in
+// SelectionPredicate.Empty().
+func predicateEmpty(p storage.SelectionPredicate) bool {
+	if p.Label == nil && p.Field == nil {
+		return true
+	}
+
+	return p.Empty()
+}
+
+// checkPreconditions verifies that the given preconditions are met by the
+// existing object. Returns an error if UID or ResourceVersion do not match.
+func (s *Store) checkPreconditions(key string, preconditions *storage.Preconditions, obj runtime.Object) error {
+	if preconditions == nil {
+		return nil
+	}
+
+	if preconditions.UID != nil {
+		accessor, err := meta.Accessor(obj)
+		if err != nil {
+			return err
+		}
+
+		if accessor.GetUID() != *preconditions.UID {
+			return storage.NewInvalidObjError(key, fmt.Sprintf(
+				"precondition UID mismatch: expected %s, got %s",
+				*preconditions.UID, accessor.GetUID(),
+			))
+		}
+	}
+
+	if preconditions.ResourceVersion != nil {
+		rv, err := s.Versioner().ObjectResourceVersion(obj)
+		if err != nil {
+			return err
+		}
+
+		expectedRV, err := s.Versioner().ParseResourceVersion(*preconditions.ResourceVersion)
+		if err != nil {
+			return err
+		}
+
+		if rv != expectedRV {
+			return storage.NewInvalidObjError(key, fmt.Sprintf(
+				"precondition ResourceVersion mismatch: expected %d, got %d",
+				expectedRV, rv,
+			))
+		}
+	}
+
+	return nil
+}
diff --git a/pkg/storage/memory/store_test.go b/pkg/storage/memory/store_test.go
new file mode 100644
index 000000000..ffd6edc0f
--- /dev/null
+++ b/pkg/storage/memory/store_test.go
@@ -0,0 +1,794 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package memory
+
+import (
+	"context"
+	"fmt"
+	"testing"
+	"time"
+
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/apimachinery/pkg/watch"
+	"k8s.io/apiserver/pkg/storage"
+)
+
+// codec is the shared codec used by all tests. UnstructuredJSONScheme handles
+// encoding and decoding of unstructured.Unstructured objects without needing
+// a registered scheme or concrete Go types.
+var codec runtime.Codec = unstructured.UnstructuredJSONScheme
+
+// newTestObject builds an *unstructured.Unstructured with the given name and
+// namespace, suitable for storage in the test store.
+func newTestObject(name, namespace string) *unstructured.Unstructured {
+	return &unstructured.Unstructured{
+		Object: map[string]any{
+			"apiVersion": "v1",
+			"kind":       "GPU",
+			"metadata": map[string]any{
+				"name":      name,
+				"namespace": namespace,
+			},
+		},
+	}
+}
+
+func TestStore_CreateAndGet(t *testing.T) {
+	s := NewStore(codec)
+	ctx := context.Background()
+
+	obj := newTestObject("gpu-0", "default")
+	out := &unstructured.Unstructured{}
+
+	if err := s.Create(ctx, "/gpus/default/gpu-0", obj, out, 0); err != nil {
+		t.Fatalf("Create failed: %v", err)
+	}
+
+	// Verify resourceVersion was set on the output object.
+	rv := out.GetResourceVersion()
+	if rv == "" {
+		t.Fatal("expected resourceVersion to be set on out, got empty string")
+	}
+
+	if rv != "1" {
+		t.Fatalf("expected resourceVersion '1', got %q", rv)
+	}
+
+	// Get the object back.
+	got := &unstructured.Unstructured{}
+	if err := s.Get(ctx, "/gpus/default/gpu-0", storage.GetOptions{}, got); err != nil {
+		t.Fatalf("Get failed: %v", err)
+	}
+
+	if got.GetName() != "gpu-0" {
+		t.Fatalf("expected name 'gpu-0', got %q", got.GetName())
+	}
+
+	if got.GetResourceVersion() != "1" {
+		t.Fatalf("expected resourceVersion '1', got %q", got.GetResourceVersion())
+	}
+}
+
+func TestStore_CreateDuplicate(t *testing.T) {
+	s := NewStore(codec)
+	ctx := context.Background()
+
+	obj := newTestObject("gpu-0", "default")
+
+	if err := s.Create(ctx, "/gpus/default/gpu-0", obj, nil, 0); err != nil {
+		t.Fatalf("first Create failed: %v", err)
+	}
+
+	err := s.Create(ctx, "/gpus/default/gpu-0", obj, nil, 0)
+	if err == nil {
+		t.Fatal("expected error on duplicate Create, got nil")
+	}
+
+	if !storage.IsExist(err) {
+		t.Fatalf("expected IsExist error, got: %v", err)
+	}
+}
+
+func TestStore_GetNotFound(t *testing.T) {
+	s := NewStore(codec)
+	ctx := context.Background()
+
+	got := &unstructured.Unstructured{}
+	err := s.Get(ctx, "/gpus/default/gpu-missing", storage.GetOptions{}, got)
+
+	if err == nil {
+		t.Fatal("expected error on Get for missing key, got nil")
+	}
+
+	if !storage.IsNotFound(err) {
+		t.Fatalf("expected IsNotFound error, got: %v", err)
+	}
+}
+
+func TestStore_GetList(t *testing.T) {
+	s := NewStore(codec)
+	ctx := context.Background()
+
+	// Create 3 objects under the same prefix.
+	for _, name := range []string{"gpu-0", "gpu-1", "gpu-2"} {
+		obj := newTestObject(name, "default")
+		if err := s.Create(ctx, "/gpus/default/"+name, obj, nil, 0); err != nil {
+			t.Fatalf("Create %s failed: %v", name, err)
+		}
+	}
+
+	list := &unstructured.UnstructuredList{}
+	opts := storage.ListOptions{
+		Recursive: true,
+		Predicate: storage.SelectionPredicate{},
+	}
+
+	if err := s.GetList(ctx, "/gpus/default", opts, list); err != nil {
+		t.Fatalf("GetList failed: %v", err)
+	}
+
+	if len(list.Items) != 3 {
+		t.Fatalf("expected 3 items, got %d", len(list.Items))
+	}
+
+	// Verify the list has a resource version.
+	if list.GetResourceVersion() == "" {
+		t.Fatal("expected list resourceVersion to be set")
+	}
+}
+
+func TestStore_GuaranteedUpdate(t *testing.T) {
+	s := NewStore(codec)
+	ctx := context.Background()
+
+	obj := newTestObject("gpu-0", "default")
+	if err := s.Create(ctx, "/gpus/default/gpu-0", obj, nil, 0); err != nil {
+		t.Fatalf("Create failed: %v", err)
+	}
+
+	dest := &unstructured.Unstructured{}
+	err := s.GuaranteedUpdate(ctx, "/gpus/default/gpu-0", dest, false, nil,
+		func(input runtime.Object, res storage.ResponseMeta) (runtime.Object, *uint64, error) {
+			u := input.(*unstructured.Unstructured)
+			labels := u.GetLabels()
+			if labels == nil {
+				labels = make(map[string]string)
+			}
+
+			labels["test-key"] = "test-value"
+			u.SetLabels(labels)
+
+			return u, nil, nil
+		}, nil)
+	if err != nil {
+		t.Fatalf("GuaranteedUpdate failed: %v", err)
+	}
+
+	// Verify the label was persisted.
+	got := &unstructured.Unstructured{}
+	if err := s.Get(ctx, "/gpus/default/gpu-0", storage.GetOptions{}, got); err != nil {
+		t.Fatalf("Get after update failed: %v", err)
+	}
+
+	labels := got.GetLabels()
+	if labels["test-key"] != "test-value" {
+		t.Fatalf("expected label 'test-key'='test-value', got labels: %v", labels)
+	}
+
+	// Verify resourceVersion was incremented.
+	if got.GetResourceVersion() != "2" {
+		t.Fatalf("expected resourceVersion '2' after update, got %q", got.GetResourceVersion())
+	}
+}
+
+func TestStore_GuaranteedUpdate_NotFound(t *testing.T) {
+	s := NewStore(codec)
+	ctx := context.Background()
+
+	dest := &unstructured.Unstructured{}
+	err := s.GuaranteedUpdate(ctx, "/gpus/default/gpu-missing", dest, false, nil,
+		func(input runtime.Object, res storage.ResponseMeta) (runtime.Object, *uint64, error) {
+			return input, nil, nil
+		}, nil)
+
+	if err == nil {
+		t.Fatal("expected error on GuaranteedUpdate for missing key with ignoreNotFound=false")
+	}
+
+	if !storage.IsNotFound(err) {
+		t.Fatalf("expected IsNotFound error, got: %v", err)
+	}
+}
+
+func TestStore_Delete(t *testing.T) {
+	s := NewStore(codec)
+	ctx := context.Background()
+
+	obj := newTestObject("gpu-0", "default")
+	if err := s.Create(ctx, "/gpus/default/gpu-0", obj, nil, 0); err != nil {
+		t.Fatalf("Create failed: %v", err)
+	}
+
+	out := &unstructured.Unstructured{}
+	err := s.Delete(ctx, "/gpus/default/gpu-0", out, nil, nil, nil, storage.DeleteOptions{})
+	if err != nil {
+		t.Fatalf("Delete failed: %v", err)
+	}
+
+	if out.GetName() != "gpu-0" {
+		t.Fatalf("expected deleted object name 'gpu-0', got %q", out.GetName())
+	}
+
+	// Verify the object is gone.
+	got := &unstructured.Unstructured{}
+	err = s.Get(ctx, "/gpus/default/gpu-0", storage.GetOptions{}, got)
+
+	if err == nil {
+		t.Fatal("expected NotFound error after delete, got nil")
+	}
+
+	if !storage.IsNotFound(err) {
+		t.Fatalf("expected IsNotFound error, got: %v", err)
+	}
+}
+
+func TestStore_DeleteNotFound(t *testing.T) {
+	s := NewStore(codec)
+	ctx := context.Background()
+
+	out := &unstructured.Unstructured{}
+	err := s.Delete(ctx, "/gpus/default/gpu-missing", out, nil, nil, nil, storage.DeleteOptions{})
+
+	if err == nil {
+		t.Fatal("expected error on Delete for missing key, got nil")
+	}
+
+	if !storage.IsNotFound(err) {
+		t.Fatalf("expected IsNotFound error, got: %v", err)
+	}
+}
+
+func TestStore_Watch(t *testing.T) {
+	s := NewStore(codec)
+	ctx := t.Context()
+
+	// Watch subscription is synchronous — the watcher is registered before
+	// Watch() returns. The subsequent Create() will acquire the store lock
+	// and broadcast to all registered watchers, including ours.
+	w, err := s.Watch(ctx, "/gpus/default/", storage.ListOptions{})
+	if err != nil {
+		t.Fatalf("Watch failed: %v", err)
+	}
+
+	defer w.Stop()
+
+	// Create object — guaranteed to notify our watcher.
+	obj := newTestObject("gpu-0", "default")
+	if err := s.Create(ctx, "/gpus/default/gpu-0", obj, nil, 0); err != nil {
+		t.Fatalf("Create failed: %v", err)
+	}
+
+	select {
+	case ev := <-w.ResultChan():
+		if ev.Type != watch.Added {
+			t.Fatalf("expected ADDED event, got %v", ev.Type)
+		}
+
+		u, ok := ev.Object.(*unstructured.Unstructured)
+		if !ok {
+			t.Fatalf("expected *unstructured.Unstructured, got %T", ev.Object)
+		}
+
+		if u.GetName() != "gpu-0" {
+			t.Fatalf("expected event object name 'gpu-0', got %q", u.GetName())
+		}
+	case <-time.After(2 * time.Second):
+		t.Fatal("timed out waiting for watch event")
+	}
+}
+
+func TestStore_Watch_Delete(t *testing.T) {
+	s := NewStore(codec)
+	ctx := t.Context()
+
+	// Create the object first, before starting the watch.
+	obj := newTestObject("gpu-0", "default")
+	if err := s.Create(ctx, "/gpus/default/gpu-0", obj, nil, 0); err != nil {
+		t.Fatalf("Create failed: %v", err)
+	}
+
+	w, err := s.Watch(ctx, "/gpus/default/", storage.ListOptions{})
+	if err != nil {
+		t.Fatalf("Watch failed: %v", err)
+	}
+
+	defer w.Stop()
+
+	// Delete the object; the watcher should receive a DELETED event.
+	out := &unstructured.Unstructured{}
+	if err := s.Delete(ctx, "/gpus/default/gpu-0", out, nil, nil, nil, storage.DeleteOptions{}); err != nil {
+		t.Fatalf("Delete failed: %v", err)
+	}
+
+	select {
+	case ev := <-w.ResultChan():
+		if ev.Type != watch.Deleted {
+			t.Fatalf("expected DELETED event, got %v", ev.Type)
+		}
+
+		u, ok := ev.Object.(*unstructured.Unstructured)
+		if !ok {
+			t.Fatalf("expected *unstructured.Unstructured, got %T", ev.Object)
+		}
+
+		if u.GetName() != "gpu-0" {
+			t.Fatalf("expected event object name 'gpu-0', got %q", u.GetName())
+		}
+	case <-time.After(2 * time.Second):
+		t.Fatal("timed out waiting for DELETED watch event")
+	}
+}
+
+func TestStore_Stats(t *testing.T) {
+	s := NewStore(codec)
+	ctx := context.Background()
+
+	for _, name := range []string{"gpu-0", "gpu-1"} {
+		obj := newTestObject(name, "default")
+		if err := s.Create(ctx, "/gpus/default/"+name, obj, nil, 0); err != nil {
+			t.Fatalf("Create %s failed: %v", name, err)
+		}
+	}
+
+	stats, err := s.Stats(ctx)
+	if err != nil {
+		t.Fatalf("Stats failed: %v", err)
+	}
+
+	if stats.ObjectCount != 2 {
+		t.Fatalf("expected ObjectCount 2, got %d", stats.ObjectCount)
+	}
+}
+
+func TestStore_ReadinessCheck(t *testing.T) {
+	s := NewStore(codec)
+
+	if err := s.ReadinessCheck(); err != nil {
+		t.Fatalf("ReadinessCheck failed: %v", err)
+	}
+}
+
+func TestStore_GetCurrentResourceVersion(t *testing.T) {
+	s := NewStore(codec)
+	ctx := context.Background()
+
+	rv0, err := s.GetCurrentResourceVersion(ctx)
+	if err != nil {
+		t.Fatalf("GetCurrentResourceVersion failed: %v", err)
+	}
+
+	if rv0 != 0 {
+		t.Fatalf("expected initial resourceVersion 0, got %d", rv0)
+	}
+
+	// Create two objects; each should increment the revision.
+	for _, name := range []string{"gpu-0", "gpu-1"} {
+		obj := newTestObject(name, "default")
+		if err := s.Create(ctx, "/gpus/default/"+name, obj, nil, 0); err != nil {
+			t.Fatalf("Create %s failed: %v", name, err)
+		}
+	}
+
+	rv2, err := s.GetCurrentResourceVersion(ctx)
+	if err != nil {
+		t.Fatalf("GetCurrentResourceVersion failed: %v", err)
+	}
+
+	if rv2 != 2 {
+		t.Fatalf("expected resourceVersion 2 after two creates, got %d", rv2)
+	}
+}
+
+func TestStore_DeleteWithPreconditions(t *testing.T) {
+	s := NewStore(codec)
+	ctx := context.Background()
+
+	obj := newTestObject("gpu-0", "default")
+	obj.SetUID("test-uid-123")
+
+	out := &unstructured.Unstructured{}
+	if err := s.Create(ctx, "/gpus/default/gpu-0", obj, out, 0); err != nil {
+		t.Fatalf("Create failed: %v", err)
+	}
+
+	// Delete with wrong UID precondition should fail.
+	wrongUID := types.UID("wrong-uid")
+	precond := &storage.Preconditions{UID: &wrongUID}
+	delOut := &unstructured.Unstructured{}
+	err := s.Delete(ctx, "/gpus/default/gpu-0", delOut, precond, nil, nil, storage.DeleteOptions{})
+	if err == nil {
+		t.Fatal("expected error on Delete with wrong UID precondition, got nil")
+	}
+
+	// Verify the object still exists.
+	got := &unstructured.Unstructured{}
+	if err := s.Get(ctx, "/gpus/default/gpu-0", storage.GetOptions{}, got); err != nil {
+		t.Fatalf("Get after failed delete should succeed: %v", err)
+	}
+
+	// Delete with correct UID precondition should succeed.
+	correctUID := types.UID("test-uid-123")
+	precond = &storage.Preconditions{UID: &correctUID}
+	delOut = &unstructured.Unstructured{}
+	if err := s.Delete(ctx, "/gpus/default/gpu-0", delOut, precond, nil, nil, storage.DeleteOptions{}); err != nil {
+		t.Fatalf("Delete with correct UID precondition failed: %v", err)
+	}
+
+	if delOut.GetName() != "gpu-0" {
+		t.Fatalf("expected deleted object name 'gpu-0', got %q", delOut.GetName())
+	}
+
+	// Verify the object is gone.
+	err = s.Get(ctx, "/gpus/default/gpu-0", storage.GetOptions{}, &unstructured.Unstructured{})
+	if err == nil {
+		t.Fatal("expected NotFound error after delete, got nil")
+	}
+
+	if !storage.IsNotFound(err) {
+		t.Fatalf("expected IsNotFound error, got: %v", err)
+	}
+}
+
+func TestStore_GuaranteedUpdate_Preconditions(t *testing.T) {
+	s := NewStore(codec)
+	ctx := context.Background()
+
+	obj := newTestObject("gpu-0", "default")
+	obj.SetUID("known-uid-456")
+
+	if err := s.Create(ctx, "/gpus/default/gpu-0", obj, nil, 0); err != nil {
+		t.Fatalf("Create failed: %v", err)
+	}
+
+	// GuaranteedUpdate with wrong UID precondition should fail.
+	wrongUID := types.UID("wrong-uid")
+	precond := &storage.Preconditions{UID: &wrongUID}
+	dest := &unstructured.Unstructured{}
+	err := s.GuaranteedUpdate(ctx, "/gpus/default/gpu-0", dest, false, precond,
+		func(input runtime.Object, res storage.ResponseMeta) (runtime.Object, *uint64, error) {
+			return input, nil, nil
+		}, nil)
+	if err == nil {
+		t.Fatal("expected error on GuaranteedUpdate with wrong UID precondition, got nil")
+	}
+
+	// Verify the object was not modified (still at resourceVersion 1).
+	got := &unstructured.Unstructured{}
+	if err := s.Get(ctx, "/gpus/default/gpu-0", storage.GetOptions{}, got); err != nil {
+		t.Fatalf("Get failed: %v", err)
+	}
+
+	if got.GetResourceVersion() != "1" {
+		t.Fatalf("expected resourceVersion '1' (unmodified), got %q", got.GetResourceVersion())
+	}
+
+	// GuaranteedUpdate with correct UID precondition should succeed.
+	correctUID := types.UID("known-uid-456")
+	precond = &storage.Preconditions{UID: &correctUID}
+	dest = &unstructured.Unstructured{}
+	err = s.GuaranteedUpdate(ctx, "/gpus/default/gpu-0", dest, false, precond,
+		func(input runtime.Object, res storage.ResponseMeta) (runtime.Object, *uint64, error) {
+			u := input.(*unstructured.Unstructured)
+			labels := u.GetLabels()
+			if labels == nil {
+				labels = make(map[string]string)
+			}
+
+			labels["updated"] = "true"
+			u.SetLabels(labels)
+
+			return u, nil, nil
+		}, nil)
+	if err != nil {
+		t.Fatalf("GuaranteedUpdate with correct UID precondition failed: %v", err)
+	}
+
+	// Verify the update was applied.
+	got = &unstructured.Unstructured{}
+	if err := s.Get(ctx, "/gpus/default/gpu-0", storage.GetOptions{}, got); err != nil {
+		t.Fatalf("Get after update failed: %v", err)
+	}
+
+	if got.GetLabels()["updated"] != "true" {
+		t.Fatalf("expected label 'updated'='true', got labels: %v", got.GetLabels())
+	}
+
+	if got.GetResourceVersion() != "2" {
+		t.Fatalf("expected resourceVersion '2' after update, got %q", got.GetResourceVersion())
+	}
+}
+
+func TestStore_GuaranteedUpdate_IgnoreNotFound(t *testing.T) {
+	s := NewStore(codec)
+	ctx := context.Background()
+
+	dest := &unstructured.Unstructured{}
+	var receivedEmpty bool
+	err := s.GuaranteedUpdate(ctx, "/gpus/default/gpu-new", dest, true, nil,
+		func(input runtime.Object, res storage.ResponseMeta) (runtime.Object, *uint64, error) {
+			u := input.(*unstructured.Unstructured)
+			// When ignoreNotFound is true and the key doesn't exist, the input
+			// should be a zero-value object (deep copy of destination).
+			if u.GetName() == "" && u.GetNamespace() == "" {
+				receivedEmpty = true
+			}
+
+			// Populate the object so it gets created.
+			u.SetUnstructuredContent(map[string]any{
+				"apiVersion": "v1",
+				"kind":       "GPU",
+				"metadata": map[string]any{
+					"name":      "gpu-new",
+					"namespace": "default",
+				},
+			})
+
+			return u, nil, nil
+		}, nil)
+	if err != nil {
+		t.Fatalf("GuaranteedUpdate with ignoreNotFound=true failed: %v", err)
+	}
+
+	if !receivedEmpty {
+		t.Fatal("expected tryUpdate to receive a zero-value object, but it did not")
+	}
+
+	// Verify the object was created and can be retrieved.
+	got := &unstructured.Unstructured{}
+	if err := s.Get(ctx, "/gpus/default/gpu-new", storage.GetOptions{}, got); err != nil {
+		t.Fatalf("Get after GuaranteedUpdate (ignoreNotFound) failed: %v", err)
+	}
+
+	if got.GetName() != "gpu-new" {
+		t.Fatalf("expected name 'gpu-new', got %q", got.GetName())
+	}
+
+	if got.GetResourceVersion() == "" {
+		t.Fatal("expected resourceVersion to be set, got empty string")
+	}
+}
+
+func TestStore_Watch_Modified(t *testing.T) {
+	s := NewStore(codec)
+	ctx := t.Context()
+
+	w, err := s.Watch(ctx, "/gpus/default/", storage.ListOptions{})
+	if err != nil {
+		t.Fatalf("Watch failed: %v", err)
+	}
+
+	defer w.Stop()
+
+	// Create an object.
+	obj := newTestObject("gpu-0", "default")
+	if err := s.Create(ctx, "/gpus/default/gpu-0", obj, nil, 0); err != nil {
+		t.Fatalf("Create failed: %v", err)
+	}
+
+	// Consume the ADDED event.
+	select {
+	case ev := <-w.ResultChan():
+		if ev.Type != watch.Added {
+			t.Fatalf("expected ADDED event, got %v", ev.Type)
+		}
+	case <-time.After(2 * time.Second):
+		t.Fatal("timed out waiting for ADDED watch event")
+	}
+
+	// Update the object via GuaranteedUpdate.
+	dest := &unstructured.Unstructured{}
+	err = s.GuaranteedUpdate(ctx, "/gpus/default/gpu-0", dest, false, nil,
+		func(input runtime.Object, res storage.ResponseMeta) (runtime.Object, *uint64, error) {
+			u := input.(*unstructured.Unstructured)
+			labels := u.GetLabels()
+			if labels == nil {
+				labels = make(map[string]string)
+			}
+
+			labels["modified"] = "true"
+			u.SetLabels(labels)
+
+			return u, nil, nil
+		}, nil)
+	if err != nil {
+		t.Fatalf("GuaranteedUpdate failed: %v", err)
+	}
+
+	// Verify a MODIFIED event is received.
+	select {
+	case ev := <-w.ResultChan():
+		if ev.Type != watch.Modified {
+			t.Fatalf("expected MODIFIED event, got %v", ev.Type)
+		}
+
+		u, ok := ev.Object.(*unstructured.Unstructured)
+		if !ok {
+			t.Fatalf("expected *unstructured.Unstructured, got %T", ev.Object)
+		}
+
+		if u.GetLabels()["modified"] != "true" {
+			t.Fatalf("expected label 'modified'='true' on event object, got labels: %v", u.GetLabels())
+		}
+	case <-time.After(2 * time.Second):
+		t.Fatal("timed out waiting for MODIFIED watch event")
+	}
+}
+
+func TestStore_Watch_KeyPrefixFiltering(t *testing.T) {
+	s := NewStore(codec)
+	ctx := t.Context()
+
+	// Watch only the /gpus/default/ prefix.
+	w, err := s.Watch(ctx, "/gpus/default/", storage.ListOptions{})
+	if err != nil {
+		t.Fatalf("Watch failed: %v", err)
+	}
+
+	defer w.Stop()
+
+	// Create an object under a different namespace; should NOT produce an event.
+	otherObj := newTestObject("gpu-0", "other-ns")
+	if err := s.Create(ctx, "/gpus/other-ns/gpu-0", otherObj, nil, 0); err != nil {
+		t.Fatalf("Create other-ns object failed: %v", err)
+	}
+
+	// Verify no event is received within a short timeout.
+	select {
+	case ev := <-w.ResultChan():
+		t.Fatalf("expected no event for other-ns object, but got %v event", ev.Type)
+	case <-time.After(500 * time.Millisecond):
+		// Good: no event received.
+	}
+
+	// Create an object under the watched prefix; SHOULD produce an ADDED event.
+	defaultObj := newTestObject("gpu-0", "default")
+	if err := s.Create(ctx, "/gpus/default/gpu-0", defaultObj, nil, 0); err != nil {
+		t.Fatalf("Create default object failed: %v", err)
+	}
+
+	select {
+	case ev := <-w.ResultChan():
+		if ev.Type != watch.Added {
+			t.Fatalf("expected ADDED event, got %v", ev.Type)
+		}
+
+		u, ok := ev.Object.(*unstructured.Unstructured)
+		if !ok {
+			t.Fatalf("expected *unstructured.Unstructured, got %T", ev.Object)
+		}
+
+		if u.GetName() != "gpu-0" {
+			t.Fatalf("expected event object name 'gpu-0', got %q", u.GetName())
+		}
+	case <-time.After(2 * time.Second):
+		t.Fatal("timed out waiting for ADDED watch event for default namespace object")
+	}
+}
+
+func TestStore_GetIgnoreNotFound(t *testing.T) {
+	s := NewStore(codec)
+	ctx := context.Background()
+
+	got := &unstructured.Unstructured{}
+	err := s.Get(ctx, "/gpus/default/gpu-missing", storage.GetOptions{IgnoreNotFound: true}, got)
+	if err != nil {
+		t.Fatalf("expected no error with IgnoreNotFound=true, got: %v", err)
+	}
+
+	// The object should be at its zero value (no name set).
+	if got.GetName() != "" {
+		t.Fatalf("expected empty name on zero-value object, got %q", got.GetName())
+	}
+}
+
+func TestStore_GetList_NonRecursive(t *testing.T) {
+	s := NewStore(codec)
+	ctx := context.Background()
+
+	// Create two objects under the same prefix.
+	for _, name := range []string{"gpu-0", "gpu-1"} {
+		obj := newTestObject(name, "default")
+		if err := s.Create(ctx, "/gpus/default/"+name, obj, nil, 0); err != nil {
+			t.Fatalf("Create %s failed: %v", name, err)
+		}
+	}
+
+	// GetList with Recursive=false on an exact key should return only that one item.
+	list := &unstructured.UnstructuredList{}
+	opts := storage.ListOptions{
+		Recursive: false,
+		Predicate: storage.SelectionPredicate{},
+	}
+
+	if err := s.GetList(ctx, "/gpus/default/gpu-0", opts, list); err != nil {
+		t.Fatalf("GetList failed: %v", err)
+	}
+
+	if len(list.Items) != 1 {
+		t.Fatalf("expected 1 item with non-recursive GetList, got %d", len(list.Items))
+	}
+
+	if list.Items[0].GetName() != "gpu-0" {
+		t.Fatalf("expected item name 'gpu-0', got %q", list.Items[0].GetName())
+	}
+}
+
+func TestStore_ImplementsInterface(t *testing.T) {
+	// Compile-time check that *Store satisfies storage.Interface.
+	var _ storage.Interface = (*Store)(nil)
+}
+
+func TestStore_Watch_RejectsResourceVersion(t *testing.T) {
+	s := NewStore(codec)
+	ctx := t.Context()
+
+	_, err := s.Watch(ctx, "/gpus/default/", storage.ListOptions{
+		ResourceVersion: "5",
+	})
+	if err == nil {
+		t.Fatal("expected error when Watch is called with non-empty ResourceVersion, got nil")
+	}
+}
+
+func TestStore_Watch_EventDropOnFullBuffer(t *testing.T) {
+	s := NewStore(codec)
+	ctx := t.Context()
+
+	w, err := s.Watch(ctx, "/gpus/default/", storage.ListOptions{})
+	if err != nil {
+		t.Fatalf("Watch failed: %v", err)
+	}
+	defer w.Stop()
+
+	// Fill the channel buffer (watchChannelSize = 100) plus overflow.
+	for i := 0; i < watchChannelSize+10; i++ {
+		name := fmt.Sprintf("gpu-%d", i)
+		obj := newTestObject(name, "default")
+		if err := s.Create(ctx, "/gpus/default/"+name, obj, nil, 0); err != nil {
+			t.Fatalf("Create %s failed: %v", name, err)
+		}
+	}
+
+	// Drain the channel. We should get exactly watchChannelSize events
+	// (the rest were dropped because the buffer was full).
+	received := 0
+	for {
+		select {
+		case _, ok := <-w.ResultChan():
+			if !ok {
+				t.Fatal("channel unexpectedly closed")
+			}
+			received++
+		default:
+			goto done
+		}
+	}
+done:
+	if received != watchChannelSize {
+		t.Fatalf("expected %d events (buffer size), got %d", watchChannelSize, received)
+	}
+}
diff --git a/pkg/storage/memory/watch.go b/pkg/storage/memory/watch.go
new file mode 100644
index 000000000..6d6f7dd9b
--- /dev/null
+++ b/pkg/storage/memory/watch.go
@@ -0,0 +1,130 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package memory
+
+import (
+	"strings"
+	"sync"
+	"sync/atomic"
+
+	"k8s.io/apimachinery/pkg/watch"
+	"k8s.io/klog/v2"
+)
+
+const watchChannelSize = 100
+
+// watchManager tracks active watchers and broadcasts events to them.
+// It uses its own mutex, separate from Store.mu, because sendLocked
+// is called while the Store write lock is held.
+type watchManager struct {
+	mu              sync.Mutex
+	watchers        map[int]*memoryWatcher
+	nextID          int
+	watchBufferSize int
+}
+
+func newWatchManager(bufferSize int) *watchManager {
+	return &watchManager{
+		watchers:        make(map[int]*memoryWatcher),
+		watchBufferSize: bufferSize,
+	}
+}
+
+// watch creates a new watcher for the given key prefix and registers it.
+// The caller must cancel the context or call Stop() to clean up.
+func (wm *watchManager) watch(key string) *memoryWatcher {
+	wm.mu.Lock()
+	defer wm.mu.Unlock()
+
+	id := wm.nextID
+	wm.nextID++
+
+	w := &memoryWatcher{
+		id:     id,
+		key:    key,
+		ch:     make(chan watch.Event, wm.watchBufferSize),
+		done:   make(chan struct{}),
+		parent: wm,
+	}
+
+	wm.watchers[id] = w
+
+	return w
+}
+
+// sendLocked broadcasts an event to all registered watchers whose key prefix
+// matches the event's object key. This method is called while Store.mu is
+// held (write lock), so it uses its own mutex for watcher iteration.
+// Sends are non-blocking: if a watcher's channel is full, the event is dropped.
+func (wm *watchManager) sendLocked(ev watch.Event, objectKey string) {
+	wm.mu.Lock()
+	defer wm.mu.Unlock()
+
+	for _, w := range wm.watchers {
+		if !strings.HasPrefix(objectKey, w.key) {
+			continue
+		}
+
+		select {
+		case w.ch <- ev:
+		default:
+			w.droppedEvents.Add(1)
+		}
+	}
+}
+
+// remove unregisters a watcher by ID.
+func (wm *watchManager) remove(id int) {
+	wm.mu.Lock()
+	defer wm.mu.Unlock()
+
+	delete(wm.watchers, id)
+}
+
+// memoryWatcher implements watch.Interface for in-memory storage events.
+type memoryWatcher struct {
+	id            int
+	key           string
+	ch            chan watch.Event
+	done          chan struct{}
+	once          sync.Once
+	parent        *watchManager
+	droppedEvents atomic.Int64
+}
+
+var _ watch.Interface = (*memoryWatcher)(nil)
+
+// ResultChan returns the channel that receives watch events.
+func (w *memoryWatcher) ResultChan() <-chan watch.Event {
+	return w.ch
+}
+
+// Stop terminates the watcher, unregisters it from the parent manager,
+// and closes the result channel. It is safe to call multiple times.
+func (w *memoryWatcher) Stop() {
+	w.once.Do(func() {
+		if dropped := w.droppedEvents.Load(); dropped > 0 {
+			klog.V(2).InfoS("Watch stopped with dropped events",
+				"watcherID", w.id,
+				"key", w.key,
+				"droppedEvents", dropped,
+			)
+		}
+
+		w.parent.remove(w.id)
+		close(w.done)
+		close(w.ch)
+	})
+}
diff --git a/pkg/storage/storagebackend/config.go b/pkg/storage/storagebackend/config.go
index f6867f337..840f52708 100644
--- a/pkg/storage/storagebackend/config.go
+++ b/pkg/storage/storagebackend/config.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
@@ -28,6 +28,10 @@ type Config struct {
 	KineSocketPath string
 	DatabaseDir    string
 
+	// InMemory skips Kine/SQLite entirely. Services supply their own
+	// in-memory storage.Interface, so the backend only needs to report ready.
+	InMemory bool
+
 	StorageConfig apistorage.Config
 }
 
@@ -40,6 +44,7 @@ func NewConfig(ctx context.Context, opts options.CompletedOptions) (*Config, err
 		KineConfig:     opts.KineConfig,
 		KineSocketPath: opts.KineSocketPath,
 		DatabaseDir:    opts.DatabaseDir,
+		InMemory:       opts.InMemory,
 	}
 
 	if err := opts.ApplyTo(&config.StorageConfig); err != nil {
diff --git a/pkg/storage/storagebackend/config_test.go b/pkg/storage/storagebackend/config_test.go
index ed5b5fcc3..e665891c8 100644
--- a/pkg/storage/storagebackend/config_test.go
+++ b/pkg/storage/storagebackend/config_test.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
@@ -25,6 +25,7 @@ func TestNewConfig(t *testing.T) {
 	ctx := context.Background()
 
 	opts := options.NewOptions()
+	opts.InMemory = false
 	opts.DatabasePath = "/tmp/nvsentinel/test.db"
 
 	completedOpts, err := opts.Complete()
diff --git a/pkg/storage/storagebackend/options/options.go b/pkg/storage/storagebackend/options/options.go
index 306d02b4f..8951abbcf 100644
--- a/pkg/storage/storagebackend/options/options.go
+++ b/pkg/storage/storagebackend/options/options.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
@@ -28,6 +28,10 @@ import (
 )
 
 type Options struct {
+	// InMemory skips the Kine/SQLite storage backend entirely.
+	// When true, services provide their own in-memory storage.Interface.
+	InMemory bool
+
 	DatabasePath                string
 	CompactionInterval          time.Duration
 	CompactionBatchSize         int64
@@ -49,6 +53,7 @@ type CompletedOptions struct {
 
 func NewOptions() *Options {
 	return &Options{
+		InMemory:                    true,
 		DatabasePath:                "/var/lib/nvidia-device-api/state.db",
 		CompactionInterval:          5 * time.Minute,
 		CompactionBatchSize:         1000,
@@ -64,6 +69,9 @@ func (o *Options) AddFlags(fss *cliflag.NamedFlagSets) {
 
 	storageFs := fss.FlagSet("storage")
 
+	storageFs.BoolVar(&o.InMemory, "in-memory", o.InMemory,
+		"Use in-memory storage instead of SQLite/Kine. Services provide their own storage.Interface.")
+
 	storageFs.StringVar(&o.DatabasePath, "database-path", o.DatabasePath,
 		"The path to the SQLite database file. Must be an absolute path.")
 
@@ -80,6 +88,12 @@ func (o *Options) Complete() (CompletedOptions, error) {
 		return CompletedOptions{}, nil
 	}
 
+	// In-memory mode skips all Kine/SQLite configuration.
+	if o.InMemory {
+		completed := completedOptions{Options: *o}
+		return CompletedOptions{completedOptions: &completed}, nil
+	}
+
 	if o.KineSocketPath == "" {
 		o.KineSocketPath = "/var/run/nvidia-device-api/kine.sock"
 	}
@@ -127,6 +141,11 @@ func (o *Options) Validate() []error {
 		return nil
 	}
 
+	// In-memory mode requires no Kine/SQLite configuration.
+	if o.InMemory {
+		return nil
+	}
+
 	allErrors := []error{}
 
 	if o.DatabasePath == "" {
diff --git a/pkg/storage/storagebackend/options/options_test.go b/pkg/storage/storagebackend/options/options_test.go
index 9079915fb..e5cfe1e83 100644
--- a/pkg/storage/storagebackend/options/options_test.go
+++ b/pkg/storage/storagebackend/options/options_test.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
@@ -61,6 +61,7 @@ func TestAddFlags(t *testing.T) {
 func TestComplete(t *testing.T) {
 	t.Run("Default assignments", func(t *testing.T) {
 		opts := NewOptions()
+		opts.InMemory = false
 		opts.DatabasePath = ""
 		opts.KineSocketPath = ""
 
@@ -85,6 +86,7 @@ func TestComplete(t *testing.T) {
 
 	t.Run("Trims unix prefix from SocketPath", func(t *testing.T) {
 		opts := NewOptions()
+		opts.InMemory = false
 		opts.KineSocketPath = "unix:///tmp/test.sock"
 
 		completed, _ := opts.Complete()
@@ -95,6 +97,7 @@ func TestComplete(t *testing.T) {
 
 	t.Run("Maps intervals to KineConfig", func(t *testing.T) {
 		opts := NewOptions()
+		opts.InMemory = false
 		opts.CompactionInterval = 10 * time.Minute
 		opts.WatchProgressNotifyInterval = 15 * time.Second
 
@@ -181,6 +184,7 @@ func TestValidate(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			opts := NewOptions()
+			opts.InMemory = false
 			tt.modify(opts)
 
 			completed, err := opts.Complete()
@@ -211,6 +215,7 @@ func TestValidate(t *testing.T) {
 
 func TestApplyTo(t *testing.T) {
 	opts := NewOptions()
+	opts.InMemory = false
 	completed, _ := opts.Complete()
 
 	storageCfg := &apistorage.Config{}
diff --git a/pkg/storage/storagebackend/storage.go b/pkg/storage/storagebackend/storage.go
index 2502efac9..ab790b4f5 100644
--- a/pkg/storage/storagebackend/storage.go
+++ b/pkg/storage/storagebackend/storage.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@ import (
 	"path/filepath"
 	"strings"
 	"sync/atomic"
+	"syscall"
 	"time"
 
 	"github.com/k3s-io/kine/pkg/endpoint"
@@ -39,6 +40,10 @@ type Storage struct {
 	StorageConfig apistorage.Config
 	ETCDConfig    *endpoint.ETCDConfig
 
+	// InMemory skips Kine/SQLite entirely. When true, the storage backend
+	// reports ready immediately and services use their own in-memory storage.
+	InMemory bool
+
 	isReady atomic.Bool
 }
 
@@ -52,10 +57,15 @@ func (c *CompletedConfig) New() (*Storage, error) {
 		KineSocketPath: c.KineSocketPath,
 		DatabaseDir:    c.DatabaseDir,
 		StorageConfig:  c.StorageConfig,
+		InMemory:       c.InMemory,
 	}, nil
 }
 
 func (s *Storage) PrepareRun(ctx context.Context) (preparedStorage, error) {
+	if s.InMemory {
+		return preparedStorage{s}, nil
+	}
+
 	if err := s.prepareFilesystem(ctx); err != nil {
 		return preparedStorage{}, err
 	}
@@ -101,9 +111,22 @@ func (s *preparedStorage) Run(ctx context.Context) error {
 func (s *Storage) run(ctx context.Context) error {
 	logger := klog.FromContext(ctx)
 
+	if s.InMemory {
+		logger.V(2).Info("Starting in-memory storage backend (no persistence)")
+		s.isReady.Store(true)
+		<-ctx.Done()
+		logger.Info("Shutting down in-memory storage backend")
+		s.isReady.Store(false)
+		return nil
+	}
+
 	logger.V(2).Info("Starting storage backend", "database", s.KineConfig.Endpoint)
 	s.isReady.Store(false)
 
+	// Restrict permissions on new files (socket) before Kine creates it.
+	oldUmask := syscall.Umask(0117) // Creates socket as 0660 from the start
+	defer syscall.Umask(oldUmask)
+
 	etcdConfig, err := endpoint.Listen(ctx, s.KineConfig)
 	if err != nil {
 		return fmt.Errorf("failed to start storage backend: %w", err)
@@ -114,7 +137,7 @@ func (s *Storage) run(ctx context.Context) error {
 	socketPath := strings.TrimPrefix(s.KineSocketPath, "unix://")
 	defer func() {
 		if err := netutils.CleanupUDS(socketPath); err != nil {
-			klog.V(2).ErrorS(err, "Failed to cleanup socket", "path", socketPath)
+			klog.ErrorS(err, "Failed to cleanup kine socket", "path", socketPath)
 		}
 	}()
 
@@ -157,8 +180,14 @@ func (s *Storage) waitForSocket(ctx context.Context) error {
 			}
 			conn.Close() //nolint:wsl_v5
 
+			//nolint:gosec // G302: 0660 intentional — server and provider share a group
 			if err := os.Chmod(socketPath, 0660); err != nil {
+				if os.IsPermission(err) {
+					return false, fmt.Errorf("failed to secure kine socket %q: %w", socketPath, err)
+				}
+
 				logger.V(4).Error(err, "Failed to secure socket, retrying", "path", socketPath)
+
 				return false, nil
 			}
 
@@ -169,8 +198,6 @@ func (s *Storage) waitForSocket(ctx context.Context) error {
 		return fmt.Errorf("timed out waiting to connect to socket: %w", err)
 	}
 
-	s.isReady.Store(true)
-
 	return nil
 }
 
diff --git a/pkg/storage/storagebackend/storage_test.go b/pkg/storage/storagebackend/storage_test.go
index b992d0602..7d446eadf 100644
--- a/pkg/storage/storagebackend/storage_test.go
+++ b/pkg/storage/storagebackend/storage_test.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
@@ -113,6 +113,43 @@ func TestStorage_SocketInUse(t *testing.T) {
 	}
 }
 
+func TestStorage_InMemoryMode(t *testing.T) {
+	s := &Storage{InMemory: true}
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	ps, err := s.PrepareRun(ctx)
+	if err != nil {
+		t.Fatalf("PrepareRun failed: %v", err)
+	}
+
+	runErr := make(chan error, 1)
+	go func() {
+		runErr <- ps.Run(ctx)
+	}()
+
+	// In-memory should become ready almost immediately.
+	waitErr := wait.PollUntilContextTimeout(ctx, 10*time.Millisecond, 2*time.Second, true, func(ctx context.Context) (bool, error) {
+		return s.IsReady(), nil
+	})
+	if waitErr != nil {
+		t.Fatal("In-memory storage did not become ready")
+	}
+
+	cancel()
+
+	select {
+	case <-runErr:
+	case <-time.After(2 * time.Second):
+		t.Error("In-memory storage did not shut down gracefully")
+	}
+
+	if s.IsReady() {
+		t.Error("In-memory storage should not be ready after shutdown")
+	}
+}
+
 func TestStorage_WaitForSocket_Timeout(t *testing.T) {
 	socketPath := testutils.NewUnixAddr(t)
 	socketURL := "unix://" + socketPath
diff --git a/pkg/testutil/grpcserver.go b/pkg/testutil/grpcserver.go
new file mode 100644
index 000000000..3e9971474
--- /dev/null
+++ b/pkg/testutil/grpcserver.go
@@ -0,0 +1,118 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package testutil provides shared test infrastructure for gRPC integration tests.
+package testutil
+
+import (
+	"context"
+	"net"
+	"testing"
+
+	clientset "github.com/nvidia/nvsentinel/pkg/client-go/client/versioned"
+	gpuclient "github.com/nvidia/nvsentinel/pkg/client-go/client/versioned/typed/device/v1alpha1"
+
+	pb "github.com/nvidia/nvsentinel/internal/generated/device/v1alpha1"
+	svc "github.com/nvidia/nvsentinel/pkg/services/device/v1alpha1"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials/insecure"
+	"google.golang.org/grpc/test/bufconn"
+	apistorage "k8s.io/apiserver/pkg/storage/storagebackend"
+)
+
+// NewTestGPUClient creates a bufconn-backed gRPC client for testing.
+// It spins up a real gRPC server with the GPU service backed by in-memory storage.
+// All resources are cleaned up when t finishes.
+func NewTestGPUClient(t *testing.T) pb.GpuServiceClient {
+	t.Helper()
+
+	lis := bufconn.Listen(1024 * 1024)
+	srv := grpc.NewServer()
+
+	provider := svc.NewGPUServiceProvider()
+	service, err := provider.Install(srv, apistorage.Config{})
+	if err != nil {
+		t.Fatalf("failed to install GPU service: %v", err)
+	}
+
+	go func() {
+		if err := srv.Serve(lis); err != nil {
+			t.Logf("server stopped: %v", err)
+		}
+	}()
+
+	conn, err := grpc.NewClient(
+		"passthrough:///bufconn",
+		grpc.WithContextDialer(func(context.Context, string) (net.Conn, error) {
+			return lis.Dial()
+		}),
+		grpc.WithTransportCredentials(insecure.NewCredentials()),
+	)
+	if err != nil {
+		t.Fatalf("failed to create gRPC client: %v", err)
+	}
+
+	t.Cleanup(func() {
+		conn.Close()
+		service.Cleanup()
+		srv.Stop()
+		lis.Close()
+	})
+
+	return pb.NewGpuServiceClient(conn)
+}
+
+// NewTestGPUTypedClient creates a bufconn-backed typed GPU client for testing.
+// It spins up a real gRPC server with the GPU service backed by in-memory storage,
+// and returns a GPUInterface from the generated client SDK.
+// All resources are cleaned up when t finishes.
+func NewTestGPUTypedClient(t *testing.T) gpuclient.GPUInterface {
+	t.Helper()
+
+	lis := bufconn.Listen(1024 * 1024)
+	srv := grpc.NewServer()
+
+	provider := svc.NewGPUServiceProvider()
+	service, err := provider.Install(srv, apistorage.Config{})
+	if err != nil {
+		t.Fatalf("failed to install GPU service: %v", err)
+	}
+
+	go func() {
+		if err := srv.Serve(lis); err != nil {
+			t.Logf("server stopped: %v", err)
+		}
+	}()
+
+	conn, err := grpc.NewClient(
+		"passthrough:///bufconn",
+		grpc.WithContextDialer(func(context.Context, string) (net.Conn, error) {
+			return lis.Dial()
+		}),
+		grpc.WithTransportCredentials(insecure.NewCredentials()),
+	)
+	if err != nil {
+		t.Fatalf("failed to create gRPC client: %v", err)
+	}
+
+	t.Cleanup(func() {
+		conn.Close()
+		service.Cleanup()
+		srv.Stop()
+		lis.Close()
+	})
+
+	cs := clientset.New(conn)
+	return cs.DeviceV1alpha1().GPUs()
+}
diff --git a/pkg/testutil/grpcserver_test.go b/pkg/testutil/grpcserver_test.go
new file mode 100644
index 000000000..460f3c489
--- /dev/null
+++ b/pkg/testutil/grpcserver_test.go
@@ -0,0 +1,57 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package testutil
+
+import (
+	"testing"
+
+	pb "github.com/nvidia/nvsentinel/internal/generated/device/v1alpha1"
+)
+
+func TestNewTestGPUClient_CreateAndGet(t *testing.T) {
+	client := NewTestGPUClient(t)
+	ctx := t.Context()
+
+	const gpuName = "GPU-01234567-89ab-cdef-0123-456789abcdef"
+
+	created, err := client.CreateGpu(ctx, &pb.CreateGpuRequest{
+		Gpu: &pb.Gpu{
+			Metadata: &pb.ObjectMeta{
+				Name:      gpuName,
+				Namespace: "default",
+			},
+			Spec: &pb.GpuSpec{
+				Uuid: "GPU-TEST-1",
+			},
+		},
+	})
+	if err != nil {
+		t.Fatalf("CreateGpu failed: %v", err)
+	}
+	if created.GetMetadata().GetName() != gpuName {
+		t.Errorf("expected name %q, got %q", gpuName, created.GetMetadata().GetName())
+	}
+
+	resp, err := client.GetGpu(ctx, &pb.GetGpuRequest{
+		Name:      gpuName,
+		Namespace: "default",
+	})
+	if err != nil {
+		t.Fatalf("GetGpu failed: %v", err)
+	}
+	if resp.GetGpu().GetSpec().GetUuid() != "GPU-TEST-1" {
+		t.Errorf("expected UUID %q, got %q", "GPU-TEST-1", resp.GetGpu().GetSpec().GetUuid())
+	}
+}
diff --git a/pkg/util/net/uds.go b/pkg/util/net/uds.go
index 1083f4352..25072e73b 100644
--- a/pkg/util/net/uds.go
+++ b/pkg/util/net/uds.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
@@ -55,6 +55,9 @@ func CreateUDSListener(ctx context.Context, socketPath string, perm os.FileMode)
 
 	lc := net.ListenConfig{}
 
+	// Note: There is a residual TOCTOU window between CleanupUDS and Listen.
+	// This is acceptable because Listen will fail with EADDRINUSE if another
+	// process binds the socket in that window.
 	lis, err := lc.Listen(ctx, "unix", socketPath)
 	if err != nil {
 		return nil, nil, fmt.Errorf("failed to listen on unix socket %q: %w", socketPath, err)
diff --git a/pkg/util/verflag/verflag.go b/pkg/util/verflag/verflag.go
index 592a41f71..1dae5d3b9 100644
--- a/pkg/util/verflag/verflag.go
+++ b/pkg/util/verflag/verflag.go
@@ -1,4 +1,4 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+//  Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 //
 //  Licensed under the Apache License, Version 2.0 (the "License");
 //  you may not use this file except in compliance with the License.
@@ -21,7 +21,7 @@ import (
 	"strconv"
 	"text/tabwriter"
 
-	"github.com/nvidia/nvsentinel/pkg/util/version"
+	"github.com/nvidia/nvsentinel/pkg/version"
 	"github.com/spf13/pflag"
 )
 
@@ -111,7 +111,7 @@ func printVersionTable() {
 
 	fmt.Fprintf(w, "%s\n", programName)
 	fmt.Fprintf(w, "---\t---\n")
-	fmt.Fprintf(w, "Version\t%s\n", v.GitVersion)
+	fmt.Fprintf(w, "Version\t%s\n", v.Version)
 	fmt.Fprintf(w, "GitCommit\t%s\n", v.GitCommit)
 	fmt.Fprintf(w, "BuildDate\t%s\n", v.BuildDate)
 	fmt.Fprintf(w, "GoVersion\t%s\n", v.GoVersion)
diff --git a/pkg/util/version/version.go b/pkg/util/version/version.go
deleted file mode 100644
index dac336d55..000000000
--- a/pkg/util/version/version.go
+++ /dev/null
@@ -1,94 +0,0 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-//
-//  Licensed under the Apache License, Version 2.0 (the "License");
-//  you may not use this file except in compliance with the License.
-//  You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-//  Unless required by applicable law or agreed to in writing, software
-//  distributed under the License is distributed on an "AS IS" BASIS,
-//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-//  See the License for the specific language governing permissions and
-//  limitations under the License.
-
-package version
-
-import (
-	"encoding/json"
-	"fmt"
-	"net/http"
-	"runtime"
-
-	utilversion "k8s.io/apimachinery/pkg/util/version"
-	"k8s.io/component-base/compatibility"
-)
-
-var (
-	GitVersion = "v0.0.0-devel"
-	GitCommit  = "unknown"
-	BuildDate  = "unknown"
-)
-
-type Info struct {
-	GitVersion string
-	GitCommit  string
-	BuildDate  string
-	GoVersion  string
-	Compiler   string
-	Platform   string
-}
-
-func Get() Info {
-	return Info{
-		GitVersion: GitVersion,
-		GitCommit:  GitCommit,
-		BuildDate:  BuildDate,
-		GoVersion:  runtime.Version(),
-		Compiler:   runtime.Compiler,
-		Platform:   fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH),
-	}
-}
-
-func (i Info) String() string {
-	return i.GitVersion
-}
-
-// UserAgent returns the standard user agent string for clients.
-func UserAgent() string {
-	return fmt.Sprintf("nvidia-device-api/%s (%s)", GitVersion, Get().Platform)
-}
-
-func RegisterComponent(registry compatibility.ComponentGlobalsRegistry) error {
-	v, err := utilversion.ParseSemantic(GitVersion)
-	if err != nil {
-		v = utilversion.MustParseSemantic("v0.0.1")
-	}
-
-	binaryVersion := v
-	emulationVersion := v
-	minCompatibilityVersion := v
-
-	effectiveVer := compatibility.NewEffectiveVersion(
-		binaryVersion,
-		false,
-		emulationVersion,
-		minCompatibilityVersion,
-	)
-
-	if err := registry.Register("nvidia-device-api", effectiveVer, nil); err != nil {
-		return fmt.Errorf("failed to register component with compatibility registry: %w", err)
-	}
-
-	return nil
-}
-
-func Handler() http.Handler {
-	return http.HandlerFunc(versionHandler)
-}
-
-func versionHandler(w http.ResponseWriter, r *http.Request) {
-	w.Header().Set("Content-Type", "application/json")
-	w.WriteHeader(http.StatusOK)
-	_ = json.NewEncoder(w).Encode(Get())
-}
diff --git a/pkg/util/version/version_test.go b/pkg/util/version/version_test.go
deleted file mode 100644
index 3548c63d9..000000000
--- a/pkg/util/version/version_test.go
+++ /dev/null
@@ -1,86 +0,0 @@
-//  Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-//
-//  Licensed under the Apache License, Version 2.0 (the "License");
-//  you may not use this file except in compliance with the License.
-//  You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-//  Unless required by applicable law or agreed to in writing, software
-//  distributed under the License is distributed on an "AS IS" BASIS,
-//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-//  See the License for the specific language governing permissions and
-//  limitations under the License.
-
-package version
-
-import (
-	"strings"
-	"testing"
-
-	"k8s.io/component-base/compatibility"
-)
-
-func TestGet(t *testing.T) {
-	info := Get()
-
-	if info.GitVersion != GitVersion {
-		t.Errorf("expected GitVersion %s, got %s", GitVersion, info.GitVersion)
-	}
-
-	if info.GoVersion == "" || info.Platform == "" {
-		t.Error("runtime info (GoVersion/Platform) should not be empty")
-	}
-}
-
-func TestUserAgent(t *testing.T) {
-	ua := UserAgent()
-	expectedPrefix := "nvidia-device-api/" + GitVersion
-
-	if !strings.HasPrefix(ua, expectedPrefix) {
-		t.Errorf("UserAgent %s does not start with %s", ua, expectedPrefix)
-	}
-}
-
-func TestRegisterComponent(t *testing.T) {
-	tests := []struct {
-		name       string
-		gitVersion string
-	}{
-		{
-			name:       "valid semver",
-			gitVersion: "v1.2.3",
-		},
-		{
-			name:       "invalid semver uses fallback",
-			gitVersion: "development-build",
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			oldVersion := GitVersion
-			GitVersion = tt.gitVersion
-			defer func() { GitVersion = oldVersion }()
-
-			registry := compatibility.NewComponentGlobalsRegistry()
-
-			defer func() {
-				if r := recover(); r != nil {
-					t.Errorf("RegisterComponent panicked for version %s: %v", tt.gitVersion, r)
-				}
-			}()
-
-			RegisterComponent(registry)
-
-			effective := registry.EffectiveVersionFor("nvidia-device-api")
-			if effective == nil {
-				t.Fatal("component was not registered in the registry")
-			}
-
-			if effective.BinaryVersion() == nil {
-				t.Error("EffectiveVersion has nil BinaryVersion")
-			}
-		})
-	}
-}
diff --git a/pkg/version/version.go b/pkg/version/version.go
new file mode 100644
index 000000000..f2f31aa6f
--- /dev/null
+++ b/pkg/version/version.go
@@ -0,0 +1,98 @@
+// Copyright (c) 2026-2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package version provides version information for the Device API Server.
+// These values are set at build time via ldflags.
+package version
+
+import (
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"runtime"
+)
+
+// Build information set at compile time via -ldflags.
+var (
+	// Version is the semantic version of the build.
+	Version = "dev"
+
+	// GitCommit is the git commit SHA at build time.
+	GitCommit = "unknown"
+
+	// GitTreeState indicates if the git tree was clean or dirty.
+	GitTreeState = "unknown"
+
+	// BuildDate is the date of the build in ISO 8601 format.
+	BuildDate = "unknown"
+)
+
+// Info contains version information.
+type Info struct {
+	Version      string `json:"version"`
+	GitCommit    string `json:"gitCommit"`
+	GitTreeState string `json:"gitTreeState"`
+	BuildDate    string `json:"buildDate"`
+	GoVersion    string `json:"goVersion"`
+	Compiler     string `json:"compiler"`
+	Platform     string `json:"platform"`
+}
+
+// Get returns the version information.
+func Get() Info {
+	return Info{
+		Version:      Version,
+		GitCommit:    GitCommit,
+		GitTreeState: GitTreeState,
+		BuildDate:    BuildDate,
+		GoVersion:    runtime.Version(),
+		Compiler:     runtime.Compiler,
+		Platform:     fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH),
+	}
+}
+
+// String returns version information as a human-readable string.
+func (i Info) String() string {
+	return fmt.Sprintf(
+		"Version: %s\nGit Commit: %s\nGit Tree State: %s\nBuild Date: %s\nGo Version: %s\nCompiler: %s\nPlatform: %s",
+		i.Version,
+		i.GitCommit,
+		i.GitTreeState,
+		i.BuildDate,
+		i.GoVersion,
+		i.Compiler,
+		i.Platform,
+	)
+}
+
+// Short returns a short version string.
+func (i Info) Short() string {
+	return fmt.Sprintf("%s (%s)", i.Version, i.GitCommit)
+}
+
+// UserAgent returns the standard user agent string for clients.
+func UserAgent() string {
+	return fmt.Sprintf("nvidia-device-api/%s (%s)", Version, Get().Platform)
+}
+
+// Handler returns an HTTP handler that responds with version information as JSON.
+func Handler() http.Handler {
+	return http.HandlerFunc(versionHandler)
+}
+
+func versionHandler(w http.ResponseWriter, r *http.Request) {
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(http.StatusOK)
+	_ = json.NewEncoder(w).Encode(Get())
+}
diff --git a/pkg/version/version_test.go b/pkg/version/version_test.go
new file mode 100644
index 000000000..78c66358e
--- /dev/null
+++ b/pkg/version/version_test.go
@@ -0,0 +1,68 @@
+// Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package version
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+)
+
+func TestGet(t *testing.T) {
+	info := Get()
+
+	if info.Version != Version {
+		t.Errorf("expected Version %s, got %s", Version, info.Version)
+	}
+
+	if info.GoVersion == "" || info.Platform == "" {
+		t.Error("runtime info (GoVersion/Platform) should not be empty")
+	}
+}
+
+func TestUserAgent(t *testing.T) {
+	ua := UserAgent()
+	expectedPrefix := "nvidia-device-api/" + Version
+
+	if !strings.HasPrefix(ua, expectedPrefix) {
+		t.Errorf("UserAgent %s does not start with %s", ua, expectedPrefix)
+	}
+}
+
+func TestHandler(t *testing.T) {
+	req := httptest.NewRequest(http.MethodGet, "/version", nil)
+	w := httptest.NewRecorder()
+
+	Handler().ServeHTTP(w, req)
+
+	if w.Code != http.StatusOK {
+		t.Errorf("expected status %d, got %d", http.StatusOK, w.Code)
+	}
+
+	if ct := w.Header().Get("Content-Type"); ct != "application/json" {
+		t.Errorf("expected Content-Type application/json, got %s", ct)
+	}
+
+	var info Info
+	if err := json.NewDecoder(w.Body).Decode(&info); err != nil {
+		t.Fatalf("failed to decode response body: %v", err)
+	}
+
+	if info.Version != Version {
+		t.Errorf("expected version %s in response, got %s", Version, info.Version)
+	}
+}
diff --git a/test/integration/client-go/device/v1alpha1/clientset_test.go b/test/integration/client-go/device/v1alpha1/clientset_test.go
deleted file mode 100644
index 6745e3003..000000000
--- a/test/integration/client-go/device/v1alpha1/clientset_test.go
+++ /dev/null
@@ -1,196 +0,0 @@
-// Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package v1alpha1_test
-
-import (
-	"context"
-	"encoding/json"
-	"fmt"
-	"strconv"
-	"testing"
-	"time"
-
-	devicev1alpha1 "github.com/nvidia/nvsentinel/api/device/v1alpha1"
-	"github.com/nvidia/nvsentinel/cmd/device-apiserver/app"
-	"github.com/nvidia/nvsentinel/cmd/device-apiserver/app/options"
-	"github.com/nvidia/nvsentinel/pkg/client-go/client/versioned"
-	"github.com/nvidia/nvsentinel/pkg/grpc/client"
-	"github.com/nvidia/nvsentinel/pkg/util/testutils"
-	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
-)
-
-func TestEndToEnd(t *testing.T) {
-	ctx, cancel := context.WithCancel(context.Background())
-	defer cancel()
-
-	tmpDir := t.TempDir()
-
-	socketPath := testutils.NewUnixAddr(t)
-	kineSocket := fmt.Sprintf("unix://%s", testutils.NewUnixAddr(t))
-	healthAddr := testutils.GetFreeTCPAddress(t)
-
-	opts := options.NewServerRunOptions()
-	opts.NodeName = "test-node"
-	opts.GRPC.BindAddress = "unix://" + socketPath
-	opts.HealthAddress = healthAddr
-	opts.Storage.DatabaseDir = tmpDir
-	opts.Storage.DatabasePath = tmpDir + "state.db"
-	opts.Storage.KineSocketPath = kineSocket
-	opts.Storage.KineConfig.Endpoint = fmt.Sprintf("sqlite://%s/db.sqlite", tmpDir)
-	opts.Storage.KineConfig.Listener = kineSocket
-
-	completed, err := opts.Complete(ctx)
-	if err != nil {
-		t.Fatalf("Failed to complete options: %v", err)
-	}
-
-	go func() {
-		if err := app.Run(ctx, completed); err != nil && err != context.Canceled {
-			t.Errorf("Server exited with error: %v", err)
-		}
-	}()
-
-	testutils.WaitForStatus(t, healthAddr, "", 5*time.Second, testutils.IsServing)
-
-	config := &client.Config{Target: "unix://" + socketPath}
-	client, err := versioned.NewForConfig(config)
-	if err != nil {
-		t.Fatalf("Failed to create clientset: %v", err)
-	}
-
-	var created *devicev1alpha1.GPU
-
-	t.Run("Create", func(t *testing.T) {
-		gpu := &devicev1alpha1.GPU{
-			ObjectMeta: metav1.ObjectMeta{
-				Name: "gpu-ad2367dd-a40e-6b86-6fc3-c44a2cc92c7e",
-			},
-			Spec: devicev1alpha1.GPUSpec{
-				UUID: "GPU-ad2367dd-a40e-6b86-6fc3-c44a2cc92c7e",
-			},
-			Status: devicev1alpha1.GPUStatus{
-				Conditions: []metav1.Condition{
-					{
-						Type:    "Ready",
-						Status:  metav1.ConditionFalse,
-						Reason:  "DriverNotReaady",
-						Message: "Driver is posting ready status",
-					},
-				},
-			},
-		}
-
-		created, err = client.DeviceV1alpha1().GPUs().Create(ctx, gpu, metav1.CreateOptions{})
-		if err != nil {
-			t.Fatalf("Failed to create GPU: %v", err)
-		}
-
-		// Client generated fields
-		if created.Kind != "GPU" {
-			t.Errorf("expected kind 'GPU', got %s", created.Kind)
-		}
-		if created.APIVersion != devicev1alpha1.SchemeGroupVersion.String() {
-			t.Errorf("expected version %s, got %s", devicev1alpha1.SchemeGroupVersion.String(), created.APIVersion)
-		}
-
-		// Server generated fields
-		if created.Namespace != "default" {
-			t.Error("Server failed to set default namespace")
-		}
-		if created.UID == "" {
-			t.Error("Server failed to generate a UID for the GPU")
-		}
-		if created.ResourceVersion == "" {
-			t.Error("Server failed to generate a ResourceVersion")
-		}
-		if created.Generation != 1 {
-			t.Error("Server failed to set initial Generation")
-		}
-		if created.CreationTimestamp.IsZero() {
-			t.Error("Server failed to set a CreationTimestamp")
-		}
-
-		// Data integrity
-		if created.Name != gpu.Name {
-			t.Errorf("expected name %q, got %q", gpu.Name, created.Name)
-		}
-		if created.Spec.UUID != gpu.Spec.UUID {
-			t.Errorf("expected UUID %q, got %q", gpu.Spec.UUID, created.Spec.UUID)
-		}
-
-		// Data integrity: Status
-		if len(created.Status.Conditions) != len(gpu.Status.Conditions) {
-			t.Fatalf("expected %d conditions, got %d", len(gpu.Status.Conditions), len(created.Status.Conditions))
-		}
-
-		cond := created.Status.Conditions[0]
-		expected := gpu.Status.Conditions[0]
-
-		if cond.Type != expected.Type {
-			t.Errorf("expected condition Type %q, got %q", expected.Type, cond.Type)
-		}
-		if cond.Status != expected.Status {
-			t.Errorf("expected condition Status %q, got %q", expected.Status, cond.Status)
-		}
-		if cond.Reason != expected.Reason {
-			t.Errorf("expected condition Reason %q, got %q", expected.Reason, cond.Reason)
-		}
-		if cond.Message != expected.Message {
-			t.Errorf("expected condition Message %q, got %q", expected.Message, cond.Message)
-		}
-		if cond.LastTransitionTime.IsZero() {
-			t.Error("condition LastTransitionTime should not be zero")
-		}
-
-		// TODO: remove
-		objJson, _ := json.MarshalIndent(created, "", "  ")
-		fmt.Printf("\n--- [Object After Creation] ---\n%s\n", string(objJson))
-	})
-
-	t.Run("Update", func(t *testing.T) {
-		if created == nil {
-			t.Skip("Skipping: Create failed")
-		}
-
-		toUpdate := created.DeepCopy()
-		toUpdate.Spec.UUID = "GPU-cd2367dd-a40e-6b86-6fc3-c44a2cc92c7d"
-
-		updated, err := client.DeviceV1alpha1().GPUs().Update(ctx, toUpdate, metav1.UpdateOptions{})
-		if err != nil {
-			t.Fatalf("Failed to update GPU: %v", err)
-		}
-
-		if updated.Spec.UUID != toUpdate.Spec.UUID {
-			t.Errorf("expected UUID %q, got %q", toUpdate.Spec.UUID, updated.Spec.UUID)
-		}
-
-		oldRV, _ := strconv.ParseInt(created.ResourceVersion, 10, 64)
-		updatedRV, _ := strconv.ParseInt(updated.ResourceVersion, 10, 64)
-
-		if updatedRV <= oldRV {
-			t.Errorf("expected ResourceVersion to increase, got %d (old) and %d (new)", oldRV, updatedRV)
-		}
-
-		if updated.Generation <= created.Generation {
-			t.Errorf("expected Generation to increase, got %d (old) and %d (new)", created.Generation, updated.Generation)
-		}
-
-		// TODO: remove
-		objJson, _ := json.MarshalIndent(updated, "", "  ")
-		fmt.Printf("\n--- [Object After Update] ---\n%s\n", string(objJson))
-	})
-
-	// TODO: add tests for Delete, List, Watch
-}