diff --git a/cmd/katalyst-agent/app/agent/qrm/gpu_plugin.go b/cmd/katalyst-agent/app/agent/qrm/gpu_plugin.go
new file mode 100644
index 0000000000..e98d9a6c7e
--- /dev/null
+++ b/cmd/katalyst-agent/app/agent/qrm/gpu_plugin.go
@@ -0,0 +1,67 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package qrm
+
+import (
+	"fmt"
+	"strings"
+	"sync"
+
+	"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent"
+	phconsts "github.com/kubewharf/katalyst-core/pkg/agent/utilcomponent/periodicalhandler/consts"
+	"github.com/kubewharf/katalyst-core/pkg/config"
+)
+
+const (
+	QRMPluginNameGPU = "qrm_gpu_plugin"
+)
+
+var QRMGPUPluginPeriodicalHandlerGroupName = strings.Join([]string{
+	QRMPluginNameGPU,
+	phconsts.PeriodicalHandlersGroupNameSuffix,
+}, phconsts.GroupNameSeparator)
+
+// gpuPolicyInitializers is used to store the initializing function for gpu resource plugin policies
+var gpuPolicyInitializers sync.Map
+
+// RegisterGPUPolicyInitializer is used to register user-defined resource plugin init functions
+func RegisterGPUPolicyInitializer(name string, initFunc agent.InitFunc) {
+	gpuPolicyInitializers.Store(name, initFunc)
+}
+
+// getIOPolicyInitializers returns those policies with initialized functions
+func getGPUPolicyInitializers() map[string]agent.InitFunc {
+	agents := make(map[string]agent.InitFunc)
+	gpuPolicyInitializers.Range(func(key, value interface{}) bool {
+		agents[key.(string)] = value.(agent.InitFunc)
+		return true
+	})
+	return agents
+}
+
+// InitQRMGPUPlugins initializes the gpu QRM plugins
+func InitQRMGPUPlugins(agentCtx *agent.GenericContext, conf *config.Configuration, extraConf interface{}, agentName string) (bool, agent.Component, error) {
+	initializers := getGPUPolicyInitializers()
+	policyName := conf.GPUQRMPluginConfig.PolicyName
+
+	initFunc, ok := initializers[policyName]
+	if !ok {
+		return false, agent.ComponentStub{}, fmt.Errorf("invalid policy name %v for gpu resource plugin", policyName)
+	}
+
+	return initFunc(agentCtx, conf, extraConf, agentName)
+}
diff --git a/cmd/katalyst-agent/app/enableagents.go b/cmd/katalyst-agent/app/enableagents.go
index 615ef00c93..713ea6925a 100644
--- a/cmd/katalyst-agent/app/enableagents.go
+++ b/cmd/katalyst-agent/app/enableagents.go
@@ -24,6 +24,7 @@ import (
 	"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent"
 	"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent/qrm"
 	_ "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu"
+	_ "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu"
 	_ "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/io"
 	_ "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory"
 	_ "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/network"
@@ -57,6 +58,7 @@ func init() {
 	agentInitializers.Store(qrm.QRMPluginNameMemory, AgentStarter{Init: qrm.InitQRMMemoryPlugins})
 	agentInitializers.Store(qrm.QRMPluginNameNetwork, AgentStarter{Init: qrm.InitQRMNetworkPlugins})
 	agentInitializers.Store(qrm.QRMPluginNameIO, AgentStarter{Init: qrm.InitQRMIOPlugins})
+	agentInitializers.Store(qrm.QRMPluginNameGPU, AgentStarter{Init: qrm.InitQRMGPUPlugins})
 }
 
 // RegisterAgentInitializer is used to register user-defined agents
diff --git a/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/reclaimed_resources_eviction.go b/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/reclaimed_resources_eviction.go
index e969c9374d..4f7f1faeb5 100644
--- a/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/reclaimed_resources_eviction.go
+++ b/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/reclaimed_resources_eviction.go
@@ -35,6 +35,7 @@ func NewReclaimedResourcesEvictionOptions() *ReclaimedResourcesEvictionOptions {
 		EvictionThreshold: native.ResourceThreshold{
 			consts.ReclaimedResourceMilliCPU: 5.0,
 			consts.ReclaimedResourceMemory:   5.0,
+			consts.ResourceGPUMemory:         5.0,
 		},
 		GracePeriod:                   60,
 		ThresholdMetToleranceDuration: 0,
diff --git a/cmd/katalyst-agent/app/options/qrm/gpu_plugin.go b/cmd/katalyst-agent/app/options/qrm/gpu_plugin.go
new file mode 100644
index 0000000000..28b09fdec1
--- /dev/null
+++ b/cmd/katalyst-agent/app/options/qrm/gpu_plugin.go
@@ -0,0 +1,75 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package qrm
+
+import (
+	"k8s.io/apimachinery/pkg/api/resource"
+	cliflag "k8s.io/component-base/cli/flag"
+
+	"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/options/qrm/gpustrategy"
+	qrmconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/qrm"
+)
+
+type GPUOptions struct {
+	PolicyName                 string
+	GPUDeviceNames             []string
+	GPUMemoryAllocatablePerGPU string
+	SkipGPUStateCorruption     bool
+	RDMADeviceNames            []string
+
+	GPUStrategyOptions *gpustrategy.GPUStrategyOptions
+}
+
+func NewGPUOptions() *GPUOptions {
+	return &GPUOptions{
+		PolicyName:                 "static",
+		GPUDeviceNames:             []string{"nvidia.com/gpu"},
+		GPUMemoryAllocatablePerGPU: "100",
+		RDMADeviceNames:            []string{},
+		GPUStrategyOptions:         gpustrategy.NewGPUStrategyOptions(),
+	}
+}
+
+func (o *GPUOptions) AddFlags(fss *cliflag.NamedFlagSets) {
+	fs := fss.FlagSet("gpu_resource_plugin")
+
+	fs.StringVar(&o.PolicyName, "gpu-resource-plugin-policy",
+		o.PolicyName, "The policy gpu resource plugin should use")
+	fs.StringSliceVar(&o.GPUDeviceNames, "gpu-resource-names", o.GPUDeviceNames, "The name of the GPU resource")
+	fs.StringVar(&o.GPUMemoryAllocatablePerGPU, "gpu-memory-allocatable-per-gpu",
+		o.GPUMemoryAllocatablePerGPU, "The total memory allocatable for each GPU, e.g. 100")
+	fs.BoolVar(&o.SkipGPUStateCorruption, "skip-gpu-state-corruption",
+		o.SkipGPUStateCorruption, "skip gpu state corruption, and it will be used after updating state properties")
+	fs.StringSliceVar(&o.RDMADeviceNames, "rdma-resource-names", o.RDMADeviceNames, "The name of the RDMA resource")
+	o.GPUStrategyOptions.AddFlags(fss)
+}
+
+func (o *GPUOptions) ApplyTo(conf *qrmconfig.GPUQRMPluginConfig) error {
+	conf.PolicyName = o.PolicyName
+	conf.GPUDeviceNames = o.GPUDeviceNames
+	gpuMemory, err := resource.ParseQuantity(o.GPUMemoryAllocatablePerGPU)
+	if err != nil {
+		return err
+	}
+	conf.GPUMemoryAllocatablePerGPU = gpuMemory
+	conf.SkipGPUStateCorruption = o.SkipGPUStateCorruption
+	conf.RDMADeviceNames = o.RDMADeviceNames
+	if err := o.GPUStrategyOptions.ApplyTo(conf.GPUStrategyConfig); err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/cmd/katalyst-agent/app/options/qrm/gpustrategy/allocate.go b/cmd/katalyst-agent/app/options/qrm/gpustrategy/allocate.go
new file mode 100644
index 0000000000..470ec40012
--- /dev/null
+++ b/cmd/katalyst-agent/app/options/qrm/gpustrategy/allocate.go
@@ -0,0 +1,59 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gpustrategy
+
+import (
+	"strings"
+
+	cliflag "k8s.io/component-base/cli/flag"
+
+	"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm/gpustrategy"
+)
+
+type AllocateStrategyOptions struct {
+	CustomFilteringStrategies map[string]string
+	CustomSortingStrategy     map[string]string
+	CustomBindingStrategy     map[string]string
+	CustomAllocationStrategy  map[string]string
+}
+
+func NewGPUAllocateStrategyOptions() *AllocateStrategyOptions {
+	return &AllocateStrategyOptions{}
+}
+
+func (o *AllocateStrategyOptions) AddFlags(fss *cliflag.NamedFlagSets) {
+	fs := fss.FlagSet("allocate_strategy")
+	fs.StringToStringVar(&o.CustomFilteringStrategies, "gpu-allocate-custom-filtering-strategies",
+		o.CustomFilteringStrategies, "The filtering strategies for each resource, e.g. gpu:filtering1/filtering2")
+	fs.StringToStringVar(&o.CustomSortingStrategy, "gpu-allocate-custom-sorting-strategy", o.CustomSortingStrategy, "The sorting strategy for each resource")
+	fs.StringToStringVar(&o.CustomBindingStrategy, "gpu-allocate-custom-binding-strategy", o.CustomBindingStrategy, "The binding strategy for each resource")
+	fs.StringToStringVar(&o.CustomAllocationStrategy, "gpu-allocate-custom-allocation-strategy", o.CustomAllocationStrategy, "The allocation strategy for each resource")
+}
+
+func (o *AllocateStrategyOptions) ApplyTo(c *gpustrategy.AllocateStrategyConfig) error {
+	for resourceName, strategies := range o.CustomFilteringStrategies {
+		filteringStrategies := strings.Split(strategies, "/")
+		for _, strategyName := range filteringStrategies {
+			c.CustomFilteringStrategies[resourceName] = append(c.CustomFilteringStrategies[resourceName], strategyName)
+		}
+	}
+
+	c.CustomSortingStrategy = o.CustomSortingStrategy
+	c.CustomBindingStrategy = o.CustomBindingStrategy
+	c.CustomAllocationStrategy = o.CustomAllocationStrategy
+	return nil
+}
diff --git a/cmd/katalyst-agent/app/options/qrm/gpustrategy/strategy_base.go b/cmd/katalyst-agent/app/options/qrm/gpustrategy/strategy_base.go
new file mode 100644
index 0000000000..ed60713090
--- /dev/null
+++ b/cmd/katalyst-agent/app/options/qrm/gpustrategy/strategy_base.go
@@ -0,0 +1,44 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gpustrategy
+
+import (
+	cliflag "k8s.io/component-base/cli/flag"
+
+	"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm/gpustrategy"
+)
+
+type GPUStrategyOptions struct {
+	*AllocateStrategyOptions
+}
+
+func NewGPUStrategyOptions() *GPUStrategyOptions {
+	return &GPUStrategyOptions{
+		AllocateStrategyOptions: NewGPUAllocateStrategyOptions(),
+	}
+}
+
+func (o *GPUStrategyOptions) AddFlags(fss *cliflag.NamedFlagSets) {
+	o.AllocateStrategyOptions.AddFlags(fss)
+}
+
+func (o *GPUStrategyOptions) ApplyTo(conf *gpustrategy.GPUStrategyConfig) error {
+	if err := o.AllocateStrategyOptions.ApplyTo(conf.AllocateStrategyConfig); err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/cmd/katalyst-agent/app/options/qrm/qrm_base.go b/cmd/katalyst-agent/app/options/qrm/qrm_base.go
index c02a6ddd9d..139c26ae77 100644
--- a/cmd/katalyst-agent/app/options/qrm/qrm_base.go
+++ b/cmd/katalyst-agent/app/options/qrm/qrm_base.go
@@ -88,6 +88,7 @@ type QRMPluginsOptions struct {
 	MemoryOptions  *MemoryOptions
 	NetworkOptions *NetworkOptions
 	IOOptions      *IOOptions
+	GPUOptions     *GPUOptions
 }
 
 func NewQRMPluginsOptions() *QRMPluginsOptions {
@@ -96,6 +97,7 @@ func NewQRMPluginsOptions() *QRMPluginsOptions {
 		MemoryOptions:  NewMemoryOptions(),
 		NetworkOptions: NewNetworkOptions(),
 		IOOptions:      NewIOOptions(),
+		GPUOptions:     NewGPUOptions(),
 	}
 }
 
@@ -104,6 +106,7 @@ func (o *QRMPluginsOptions) AddFlags(fss *cliflag.NamedFlagSets) {
 	o.MemoryOptions.AddFlags(fss)
 	o.NetworkOptions.AddFlags(fss)
 	o.IOOptions.AddFlags(fss)
+	o.GPUOptions.AddFlags(fss)
 }
 
 func (o *QRMPluginsOptions) ApplyTo(conf *qrmconfig.QRMPluginsConfiguration) error {
@@ -119,5 +122,8 @@ func (o *QRMPluginsOptions) ApplyTo(conf *qrmconfig.QRMPluginsConfiguration) err
 	if err := o.IOOptions.ApplyTo(conf.IOQRMPluginConfig); err != nil {
 		return err
 	}
+	if err := o.GPUOptions.ApplyTo(conf.GPUQRMPluginConfig); err != nil {
+		return err
+	}
 	return nil
 }
diff --git a/go.mod b/go.mod
index 56953cf18e..ca6acb5cc0 100644
--- a/go.mod
+++ b/go.mod
@@ -16,6 +16,7 @@ require (
 	github.com/golang/mock v1.6.0
 	github.com/golang/protobuf v1.5.3
 	github.com/google/cadvisor v0.44.2
+	github.com/google/go-cmp v0.5.9
 	github.com/google/uuid v1.3.0
 	github.com/h2non/gock v1.2.0
 	github.com/klauspost/cpuid/v2 v2.2.6
@@ -100,7 +101,6 @@ require (
 	github.com/godbus/dbus/v5 v5.0.6 // indirect
 	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
 	github.com/google/gnostic v0.6.9 // indirect
-	github.com/google/go-cmp v0.5.9 // indirect
 	github.com/google/gofuzz v1.2.0 // indirect
 	github.com/gopherjs/gopherjs v0.0.0-20200217142428-fce0ec30dd00 // indirect
 	github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect
@@ -175,6 +175,7 @@ require (
 )
 
 replace (
+	github.com/kubewharf/katalyst-api => github.com/luomingmeng/katalyst-api v0.0.0-20251225062836-a81d80885d97
 	k8s.io/api => k8s.io/api v0.24.6
 	k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.24.6
 	k8s.io/apimachinery => k8s.io/apimachinery v0.24.6
@@ -196,7 +197,7 @@ replace (
 	k8s.io/kube-proxy => k8s.io/kube-proxy v0.24.6
 	k8s.io/kube-scheduler => k8s.io/kube-scheduler v0.24.6
 	k8s.io/kubectl => k8s.io/kubectl v0.24.6
-	k8s.io/kubelet => github.com/kubewharf/kubelet v1.24.6-kubewharf.9
+	k8s.io/kubelet => github.com/yehlemias/kubelet v0.0.0-20250929105636-c5bb000496f2
 	k8s.io/kubernetes => k8s.io/kubernetes v1.24.6
 	k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v0.24.6
 	k8s.io/metrics => k8s.io/metrics v0.24.6
diff --git a/go.sum b/go.sum
index 816e098731..1701a16c1e 100644
--- a/go.sum
+++ b/go.sum
@@ -574,10 +574,6 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
 github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
-github.com/kubewharf/katalyst-api v0.5.8-0.20251212030746-894fa2521a86 h1:GCqe9PcoTQ7akNDyAmavhnSrPV7sMAoYJ5jKEaJg4Ac=
-github.com/kubewharf/katalyst-api v0.5.8-0.20251212030746-894fa2521a86/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k=
-github.com/kubewharf/kubelet v1.24.6-kubewharf.9 h1:jOTYZt7h/J7I8xQMKMUcJjKf5UFBv37jHWvNp5VRFGc=
-github.com/kubewharf/kubelet v1.24.6-kubewharf.9/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c=
 github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8=
 github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
 github.com/libopenstorage/openstorage v1.0.0/go.mod h1:Sp1sIObHjat1BeXhfMqLZ14wnOzEhNx2YQedreMcUyc=
@@ -587,6 +583,8 @@ github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0U
 github.com/lithammer/dedent v1.1.0/go.mod h1:jrXYCQtgg0nJiN+StA2KgR7w6CiQNv9Fd/Z9BP0jIOc=
 github.com/logrusorgru/aurora v0.0.0-20181002194514-a7b3b318ed4e/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4=
 github.com/lpabon/godbc v0.1.1/go.mod h1:Jo9QV0cf3U6jZABgiJ2skINAXb9j8m51r07g4KI92ZA=
+github.com/luomingmeng/katalyst-api v0.0.0-20251225062836-a81d80885d97 h1:1vDUtGKw5ZI9yAKcbWQw+1y3BvQi3Sp4nGhCCi00vZs=
+github.com/luomingmeng/katalyst-api v0.0.0-20251225062836-a81d80885d97/go.mod h1:BZMVGVl3EP0eCn5xsDgV41/gjYkoh43abIYxrB10e3k=
 github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ=
 github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
 github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
@@ -937,6 +935,8 @@ github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5
 github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
 github.com/xlab/treeprint v0.0.0-20181112141820-a009c3971eca/go.mod h1:ce1O1j6UtZfjr22oyGxGLbauSBp2YVXpARAosm7dHBg=
 github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
+github.com/yehlemias/kubelet v0.0.0-20250929105636-c5bb000496f2 h1:km3N0XyOxD5yh/xdKwLdXnMx01wFRKeDz4/CT8ui8a0=
+github.com/yehlemias/kubelet v0.0.0-20250929105636-c5bb000496f2/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c=
 github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
diff --git a/pkg/agent/evictionmanager/manager.go b/pkg/agent/evictionmanager/manager.go
index 48c8ed3eb3..1457930842 100644
--- a/pkg/agent/evictionmanager/manager.go
+++ b/pkg/agent/evictionmanager/manager.go
@@ -143,6 +143,7 @@ func NewInnerEvictionPluginInitializers() map[string]plugin.InitFunc {
 	innerEvictionPluginInitializers := make(map[string]plugin.InitFunc)
 	innerEvictionPluginInitializers[resource.ReclaimedResourcesEvictionPluginName] = resource.NewReclaimedResourcesEvictionPlugin
 	innerEvictionPluginInitializers[resource.ReclaimedNumaResourcesEvictionPluginName] = resource.NewReclaimedNumaResourcesEvictionPlugin
+	innerEvictionPluginInitializers[resource.ReclaimedGPUResourcesEvictionPluginName] = resource.NewReclaimedGPUResourcesEvictionPlugin
 	innerEvictionPluginInitializers[memory.EvictionPluginNameNumaMemoryPressure] = memory.NewNumaMemoryPressureEvictionPlugin
 	innerEvictionPluginInitializers[memory.EvictionPluginNameSystemMemoryPressure] = memory.NewSystemPressureEvictionPlugin
 	innerEvictionPluginInitializers[memory.EvictionPluginNameRssOveruse] = memory.NewRssOveruseEvictionPlugin
diff --git a/pkg/agent/evictionmanager/plugin/resource/reclaimed_gpu_resources.go b/pkg/agent/evictionmanager/plugin/resource/reclaimed_gpu_resources.go
new file mode 100644
index 0000000000..08fff40113
--- /dev/null
+++ b/pkg/agent/evictionmanager/plugin/resource/reclaimed_gpu_resources.go
@@ -0,0 +1,86 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package resource
+
+import (
+	"time"
+
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/client-go/tools/events"
+
+	"github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1"
+	"github.com/kubewharf/katalyst-core/pkg/agent/evictionmanager/plugin"
+	"github.com/kubewharf/katalyst-core/pkg/client"
+	"github.com/kubewharf/katalyst-core/pkg/config"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver"
+	"github.com/kubewharf/katalyst-core/pkg/metrics"
+	"github.com/kubewharf/katalyst-core/pkg/util/process"
+)
+
+const (
+	ReclaimedGPUResourcesEvictionPluginName = "reclaimed-gpu-resource-pressure-eviction-plugin"
+)
+
+const (
+	thresholdMetToleranceDurationForGPU = 15
+)
+
+type ReclaimedGPUResourcesPlugin struct {
+	*process.StopControl
+	*ZoneResourcesPlugin
+}
+
+// NewReclaimedGPUResourcesEvictionPlugin constructs a GPU topology-aware eviction plugin.
+// It wires threshold/deletion/tolerance getters from dynamic configuration and
+// reuses the generic ZoneResourcesPlugin with zoneType=GPU to preserve behavior.
+func NewReclaimedGPUResourcesEvictionPlugin(_ *client.GenericClientSet, _ events.EventRecorder,
+	metaServer *metaserver.MetaServer, emitter metrics.MetricEmitter, conf *config.Configuration,
+) plugin.EvictionPlugin {
+	reclaimedThresholdGetter := func(resourceName v1.ResourceName) *float64 {
+		if threshold, ok := conf.GetDynamicConfiguration().EvictionThreshold[resourceName]; !ok {
+			return nil
+		} else {
+			return &threshold
+		}
+	}
+
+	deletionGracePeriodGetter := func() int64 {
+		return conf.GetDynamicConfiguration().ReclaimedResourcesEvictionConfiguration.DeletionGracePeriod
+	}
+
+	thresholdMetToleranceDurationGetter := func() int64 {
+		return int64(thresholdMetToleranceDurationForGPU)
+	}
+
+	p := NewZoneResourcesPlugin(
+		ReclaimedGPUResourcesEvictionPluginName,
+		v1alpha1.TopologyTypeGPU,
+		metaServer,
+		emitter,
+		nil,
+		reclaimedThresholdGetter,
+		deletionGracePeriodGetter,
+		thresholdMetToleranceDurationGetter,
+		conf.SkipZeroQuantityResourceNames,
+		conf.CheckReclaimedQoSForPod,
+	)
+
+	return &ReclaimedGPUResourcesPlugin{
+		StopControl:         process.NewStopControl(time.Time{}),
+		ZoneResourcesPlugin: p,
+	}
+}
diff --git a/pkg/agent/evictionmanager/plugin/resource/reclaimed_gpu_resources_test.go b/pkg/agent/evictionmanager/plugin/resource/reclaimed_gpu_resources_test.go
new file mode 100644
index 0000000000..deb21399c2
--- /dev/null
+++ b/pkg/agent/evictionmanager/plugin/resource/reclaimed_gpu_resources_test.go
@@ -0,0 +1,161 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package resource
+
+import (
+	"context"
+	"testing"
+
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/client-go/tools/events"
+
+	nodev1alpha1 "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1"
+	pluginapi "github.com/kubewharf/katalyst-api/pkg/protocol/evictionplugin/v1alpha1"
+	katalyst_base "github.com/kubewharf/katalyst-core/cmd/base"
+	"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/options"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver/agent"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver/agent/cnr"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver/agent/node"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver/agent/pod"
+	"github.com/kubewharf/katalyst-core/pkg/metrics"
+)
+
+func TestNewReclaimedGPUResourcesEvictionPlugin_Behavior(t *testing.T) {
+	t.Parallel()
+
+	testNodeName := "gpu-node"
+	testConf, err := options.NewOptions().Config()
+	if err != nil {
+		t.Fatalf("config error: %v", err)
+	}
+	testConf.NodeName = testNodeName
+	// configure threshold for GPU resource
+	if testConf.GetDynamicConfiguration().EvictionThreshold == nil {
+		testConf.GetDynamicConfiguration().EvictionThreshold = map[corev1.ResourceName]float64{}
+	}
+	testConf.GetDynamicConfiguration().EvictionThreshold[corev1.ResourceName("nvidia.com/gpu")] = 0.5
+
+	// pods
+	podA := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "pod-a",
+			Namespace: "default",
+			UID:       "uid-a",
+			Annotations: map[string]string{
+				"katalyst.kubewharf.io/qos_level": "reclaimed_cores",
+			},
+		},
+	}
+	podB := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "pod-b",
+			Namespace: "default",
+			UID:       "uid-b",
+			Annotations: map[string]string{
+				"katalyst.kubewharf.io/qos_level": "reclaimed_cores",
+			},
+		},
+	}
+	pods := []*corev1.Pod{podA, podB}
+
+	// CNR with GPU topology and allocations
+	cnrObj := &nodev1alpha1.CustomNodeResource{
+		ObjectMeta: metav1.ObjectMeta{Name: testNodeName},
+		Status: nodev1alpha1.CustomNodeResourceStatus{
+			TopologyZone: []*nodev1alpha1.TopologyZone{
+				{
+					Name: "0",
+					Type: nodev1alpha1.TopologyTypeGPU,
+					Resources: nodev1alpha1.Resources{
+						Allocatable: &corev1.ResourceList{
+							corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("2"),
+						},
+					},
+					Allocations: []*nodev1alpha1.Allocation{
+						{
+							Consumer: "default/pod-a/uid-a",
+							Requests: &corev1.ResourceList{
+								corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("1"),
+							},
+						},
+						{
+							Consumer: "default/pod-b/uid-b",
+							Requests: &corev1.ResourceList{
+								corev1.ResourceName("nvidia.com/gpu"): resource.MustParse("1"),
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+
+	ctx, err := katalyst_base.GenerateFakeGenericContext(nil, []runtime.Object{cnrObj}, nil)
+	if err != nil {
+		t.Fatalf("context error: %v", err)
+	}
+
+	ms := &metaserver.MetaServer{
+		MetaAgent: &agent.MetaAgent{
+			PodFetcher: &pod.PodFetcherStub{PodList: pods},
+			NodeFetcher: node.NewRemoteNodeFetcher(testConf.BaseConfiguration, testConf.NodeConfiguration,
+				ctx.Client.KubeClient.CoreV1().Nodes()),
+			CNRFetcher: cnr.NewCachedCNRFetcher(testConf.BaseConfiguration, testConf.CNRConfiguration,
+				ctx.Client.InternalClient.NodeV1alpha1().CustomNodeResources()),
+		},
+	}
+
+	plugin := NewReclaimedGPUResourcesEvictionPlugin(ctx.Client, &events.FakeRecorder{}, ms, metrics.DummyMetrics{}, testConf)
+	if plugin == nil {
+		t.Fatalf("plugin nil")
+	}
+
+	// ThresholdMet
+	met, err := plugin.ThresholdMet(context.TODO(), &pluginapi.GetThresholdMetRequest{})
+	if err != nil {
+		t.Fatalf("threshold error: %v", err)
+	}
+	if met == nil {
+		t.Fatalf("threshold nil")
+	}
+
+	// GetTopEvictionPods
+	podsResp, err := plugin.GetTopEvictionPods(context.TODO(), &pluginapi.GetTopEvictionPodsRequest{
+		ActivePods:    pods,
+		TopN:          1,
+		EvictionScope: met.EvictionScope,
+	})
+	if err != nil {
+		t.Fatalf("top eviction error: %v", err)
+	}
+	if len(podsResp.GetTargetPods()) == 0 {
+		t.Fatalf("no target pods")
+	}
+
+	// GetEvictPods (currently returns empty but should be non-nil)
+	evictResp, err := plugin.GetEvictPods(context.TODO(), &pluginapi.GetEvictPodsRequest{ActivePods: pods})
+	if err != nil {
+		t.Fatalf("evict pods error: %v", err)
+	}
+	if evictResp == nil {
+		t.Fatalf("evict resp nil")
+	}
+}
diff --git a/pkg/agent/evictionmanager/plugin/resource/resources.go b/pkg/agent/evictionmanager/plugin/resource/resources.go
index 5908a757dd..b7163bb647 100644
--- a/pkg/agent/evictionmanager/plugin/resource/resources.go
+++ b/pkg/agent/evictionmanager/plugin/resource/resources.go
@@ -108,6 +108,11 @@ func (b *ResourcesEvictionPlugin) Start() {
 // ThresholdMet evict pods when the beset effort resources usage is greater than
 // the supply (after considering toleration).
 func (b *ResourcesEvictionPlugin) ThresholdMet(ctx context.Context, _ *pluginapi.GetThresholdMetRequest) (*pluginapi.ThresholdMetResponse, error) {
+	var err error
+	defer func() {
+		_ = general.UpdateHealthzStateByError(b.pluginName, err)
+	}()
+
 	activePods, err := b.metaServer.GetPodList(ctx, native.PodIsActive)
 	if err != nil {
 		errMsg := fmt.Sprintf("failed to list pods from metaServer: %v", err)
diff --git a/pkg/agent/evictionmanager/plugin/resource/zone_resources.go b/pkg/agent/evictionmanager/plugin/resource/zone_resources.go
index 0e17453fcf..4f511927d5 100644
--- a/pkg/agent/evictionmanager/plugin/resource/zone_resources.go
+++ b/pkg/agent/evictionmanager/plugin/resource/zone_resources.go
@@ -121,6 +121,11 @@ func (p *ZoneResourcesPlugin) Start() {
 // - Skips resources with zero total when configured in skip list.
 // - Returns HARD_MET with GREATER_THAN semantics when used > threshold(total).
 func (p *ZoneResourcesPlugin) ThresholdMet(ctx context.Context, _ *pluginapi.GetThresholdMetRequest) (*pluginapi.ThresholdMetResponse, error) {
+	var err error
+	defer func() {
+		_ = general.UpdateHealthzStateByError(p.pluginName, err)
+	}()
+
 	activePods, err := p.metaServer.GetPodList(ctx, native.PodIsActive)
 	if err != nil {
 		errWrapped := fmt.Errorf("list pods from metaServer: %w", err)
diff --git a/pkg/agent/orm/endpoint/resource_plugin_stub.go b/pkg/agent/orm/endpoint/resource_plugin_stub.go
index 990118f7ed..425189b2fe 100644
--- a/pkg/agent/orm/endpoint/resource_plugin_stub.go
+++ b/pkg/agent/orm/endpoint/resource_plugin_stub.go
@@ -52,6 +52,11 @@ type Stub struct {
 	getTopologyAwareAllocatableResourcesFunc stubGetTopologyAwareAllocatableResourcesFunc
 	getTopologyAwareResourcesFunc            stubGetTopologyAwareResourcesFunc
 
+	// allocAssociatedDeviceFunc is used for handling associated device allocation requests.
+	allocAssociatedDeviceFunc stubAllocAssociatedDeviceFunc
+	// getAssociatedDeviceFunc is used for handling associated device get topology hints requests.
+	getAssociatedTopologyHintsFunc stubGetAssociatedDeviceTopologyHintsFunc
+
 	registrationStatus chan watcherapi.RegistrationStatus // for testing
 	endpoint           string                             // for testing
 }
@@ -69,6 +74,10 @@ type stubGetTopologyAwareAllocatableResourcesFunc func(r *pluginapi.GetTopologyA
 
 type stubGetTopologyAwareResourcesFunc func(r *pluginapi.GetTopologyAwareResourcesRequest) (*pluginapi.GetTopologyAwareResourcesResponse, error)
 
+type stubAllocAssociatedDeviceFunc func(r *pluginapi.AssociatedDeviceRequest) (*pluginapi.AssociatedDeviceAllocationResponse, error)
+
+type stubGetAssociatedDeviceTopologyHintsFunc func(r *pluginapi.AssociatedDeviceRequest) (*pluginapi.AssociatedDeviceHintsResponse, error)
+
 func defaultAllocFunc(r *pluginapi.ResourceRequest) (*pluginapi.ResourceAllocationResponse, error) {
 	var response pluginapi.ResourceAllocationResponse
 
@@ -80,6 +89,16 @@ func defaultGetAllocFunc(r *pluginapi.GetResourcesAllocationRequest) (*pluginapi
 	return &response, nil
 }
 
+func defaultAllocateAssociatedDeviceFunc(r *pluginapi.AssociatedDeviceRequest) (*pluginapi.AssociatedDeviceAllocationResponse, error) {
+	var response pluginapi.AssociatedDeviceAllocationResponse
+	return &response, nil
+}
+
+func defaultGetAssociatedDeviceTopologyHintsFunc(r *pluginapi.AssociatedDeviceRequest) (*pluginapi.AssociatedDeviceHintsResponse, error) {
+	var response pluginapi.AssociatedDeviceHintsResponse
+	return &response, nil
+}
+
 // NewResourcePluginStub returns an initialized ResourcePlugin Stub.
 func NewResourcePluginStub(socket string, name string, preStartContainerFlag bool) *Stub {
 	return &Stub{
@@ -89,8 +108,10 @@ func NewResourcePluginStub(socket string, name string, preStartContainerFlag boo
 
 		stop: make(chan interface{}),
 
-		allocFunc1: defaultAllocFunc,
-		allocFunc2: defaultGetAllocFunc,
+		allocFunc1:                     defaultAllocFunc,
+		allocFunc2:                     defaultGetAllocFunc,
+		allocAssociatedDeviceFunc:      defaultAllocateAssociatedDeviceFunc,
+		getAssociatedTopologyHintsFunc: defaultGetAssociatedDeviceTopologyHintsFunc,
 	}
 }
 
@@ -103,6 +124,11 @@ func (m *Stub) SetGetAllocFunc(f stubAllocFunc2) {
 	m.allocFunc2 = f
 }
 
+// SetAssociatedDeviceFunc sets the allocation function for associated devices.
+func (m *Stub) SetAssociatedDeviceFunc(f stubAllocAssociatedDeviceFunc) {
+	m.allocAssociatedDeviceFunc = f
+}
+
 func (m *Stub) SetGetTopologyAwareAllocatableResourcesFunc(f stubGetTopologyAwareAllocatableResourcesFunc) {
 	m.getTopologyAwareAllocatableResourcesFunc = f
 }
@@ -289,9 +315,26 @@ func (m *Stub) GetPodTopologyHints(ctx context.Context, r *pluginapi.PodResource
 	return &pluginapi.PodResourceHintsResponse{}, nil
 }
 
-// Notify the resource plugin that the pod has beed deleted,
+// RemovePod Notify the resource plugin that the pod has beed deleted,
 // and the plugin should do some clear-up work.
 func (m *Stub) RemovePod(ctx context.Context, r *pluginapi.RemovePodRequest) (*pluginapi.RemovePodResponse, error) {
 	log.Printf("RemovePod, %+v", r)
 	return &pluginapi.RemovePodResponse{}, nil
 }
+
+func (m *Stub) UpdateAllocatableAssociatedDevices(ctx context.Context, r *pluginapi.UpdateAllocatableAssociatedDevicesRequest) (*pluginapi.UpdateAllocatableAssociatedDevicesResponse, error) {
+	log.Printf("UpdateAllocatableAssociatedDevices, %+v", r)
+	return &pluginapi.UpdateAllocatableAssociatedDevicesResponse{}, nil
+}
+
+// AllocateAssociatedDevice is the gRPC implementation of the allocation function for associated devices.
+// It calls the registered allocation function.
+func (m *Stub) AllocateAssociatedDevice(ctx context.Context, r *pluginapi.AssociatedDeviceRequest) (*pluginapi.AssociatedDeviceAllocationResponse, error) {
+	log.Printf("AllocateAssociatedDevice, %+v", r)
+	return m.allocAssociatedDeviceFunc(r)
+}
+
+func (m *Stub) GetAssociatedDeviceTopologyHints(ctx context.Context, request *pluginapi.AssociatedDeviceRequest) (*pluginapi.AssociatedDeviceHintsResponse, error) {
+	log.Printf("GetAssociatedDeviceTopologyHints, %+v", request)
+	return m.getAssociatedTopologyHintsFunc(request)
+}
diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go
index d1de701337..8f1d17d1f4 100644
--- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go
+++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go
@@ -85,6 +85,8 @@ const (
 // and adjust resource requirements and configurations
 type DynamicPolicy struct {
 	sync.RWMutex
+	pluginapi.UnimplementedResourcePluginServer
+
 	name    string
 	stopCh  chan struct{}
 	started bool
diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers.go
index 471f737075..48ad1a609a 100644
--- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers.go
+++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers.go
@@ -259,7 +259,6 @@ func (p *DynamicPolicy) calculateHints(
 		numaBound = minNUMAsCountNeeded + 1
 	}
 
-	preferredHintIndexes := []int{}
 	var availableNumaHints []*pluginapi.TopologyHint
 	machine.IterateBitMasks(numaNodes, numaBound, func(mask machine.BitMask) {
 		maskCount := mask.Count()
@@ -295,10 +294,6 @@ func (p *DynamicPolicy) calculateHints(
 			Nodes:     machine.MaskToUInt64Array(mask),
 			Preferred: preferred,
 		})
-
-		if preferred {
-			preferredHintIndexes = append(preferredHintIndexes, len(availableNumaHints)-1)
-		}
 	})
 
 	// todo support numa_binding without numa_exclusive in the future
diff --git a/pkg/agent/qrm-plugins/cpu/nativepolicy/policy.go b/pkg/agent/qrm-plugins/cpu/nativepolicy/policy.go
index e220770356..3417f17615 100644
--- a/pkg/agent/qrm-plugins/cpu/nativepolicy/policy.go
+++ b/pkg/agent/qrm-plugins/cpu/nativepolicy/policy.go
@@ -58,6 +58,8 @@ const (
 // NativePolicy is a policy compatible with Kubernetes native semantics and is used in topology-aware scheduling scenarios.
 type NativePolicy struct {
 	sync.RWMutex
+	pluginapi.UnimplementedResourcePluginServer
+
 	name    string
 	stopCh  chan struct{}
 	started bool
diff --git a/pkg/agent/qrm-plugins/gpu/baseplugin/base.go b/pkg/agent/qrm-plugins/gpu/baseplugin/base.go
new file mode 100644
index 0000000000..a63aef6bb8
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/baseplugin/base.go
@@ -0,0 +1,229 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package baseplugin
+
+import (
+	"fmt"
+	"sync"
+
+	v1 "k8s.io/api/core/v1"
+	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent"
+	gpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/consts"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state"
+	"github.com/kubewharf/katalyst-core/pkg/config"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver"
+	"github.com/kubewharf/katalyst-core/pkg/metrics"
+	"github.com/kubewharf/katalyst-core/pkg/util/general"
+	"github.com/kubewharf/katalyst-core/pkg/util/machine"
+)
+
+const (
+	GPUPluginStateFileName = "gpu_plugin_state"
+)
+
+// BasePlugin is a shared plugin that provides common functionalities and fields for GPU resource plugins and custom device plugins.
+type BasePlugin struct {
+	mu   sync.RWMutex
+	Conf *config.Configuration
+
+	Emitter    metrics.MetricEmitter
+	MetaServer *metaserver.MetaServer
+	AgentCtx   *agent.GenericContext
+
+	PodAnnotationKeptKeys []string
+	PodLabelKeptKeys      []string
+
+	// Map of checkpoints for each sub-plugin
+	State state.State
+	// Registry of device topology providers
+	DeviceTopologyRegistry *machine.DeviceTopologyRegistry
+	// ShareGPUManager is a manager that manages share GPU devices
+	ShareGPUManager ShareGPUManager
+
+	// Registry of default resource state generators
+	DefaultResourceStateGeneratorRegistry *state.DefaultResourceStateGeneratorRegistry
+
+	// Map of specific device name to device type
+	deviceNameToTypeMap map[string]string
+}
+
+func NewBasePlugin(
+	agentCtx *agent.GenericContext, conf *config.Configuration, wrappedEmitter metrics.MetricEmitter,
+) (*BasePlugin, error) {
+	return &BasePlugin{
+		Conf: conf,
+
+		Emitter:    wrappedEmitter,
+		MetaServer: agentCtx.MetaServer,
+		AgentCtx:   agentCtx,
+
+		PodAnnotationKeptKeys: conf.PodAnnotationKeptKeys,
+		PodLabelKeptKeys:      conf.PodLabelKeptKeys,
+
+		DeviceTopologyRegistry:                machine.NewDeviceTopologyRegistry(),
+		DefaultResourceStateGeneratorRegistry: state.NewDefaultResourceStateGeneratorRegistry(),
+		ShareGPUManager:                       NewShareGPUManager(),
+
+		deviceNameToTypeMap: make(map[string]string),
+	}, nil
+}
+
+// InitState initializes the state of the plugin.
+func (p *BasePlugin) InitState() error {
+	stateImpl, err := state.NewCheckpointState(p.Conf.QRMPluginsConfiguration, p.Conf.GenericQRMPluginConfiguration.StateFileDirectory, GPUPluginStateFileName,
+		gpuconsts.GPUResourcePluginPolicyNameStatic, p.DefaultResourceStateGeneratorRegistry, p.Conf.SkipGPUStateCorruption, p.Emitter)
+	if err != nil {
+		return fmt.Errorf("NewCheckpointState failed with error: %v", err)
+	}
+
+	p.State = stateImpl
+	return nil
+}
+
+func (p *BasePlugin) Run(stopCh <-chan struct{}) error {
+	go p.ShareGPUManager.Run(p, stopCh)
+	return nil
+}
+
+func (p *BasePlugin) PackAllocationResponse(
+	req *pluginapi.ResourceRequest, allocationInfo *state.AllocationInfo,
+	resourceAllocationAnnotations map[string]string, resourceName string,
+) (*pluginapi.ResourceAllocationResponse, error) {
+	if allocationInfo == nil {
+		return nil, fmt.Errorf("packAllocationResponse got nil allocationInfo")
+	} else if req == nil {
+		return nil, fmt.Errorf("packAllocationResponse got nil request")
+	}
+
+	return &pluginapi.ResourceAllocationResponse{
+		PodUid:         req.PodUid,
+		PodNamespace:   req.PodNamespace,
+		PodName:        req.PodName,
+		ContainerName:  req.ContainerName,
+		ContainerType:  req.ContainerType,
+		ContainerIndex: req.ContainerIndex,
+		PodRole:        req.PodRole,
+		PodType:        req.PodType,
+		ResourceName:   req.ResourceName,
+		AllocationResult: &pluginapi.ResourceAllocation{
+			ResourceAllocation: map[string]*pluginapi.ResourceAllocationInfo{
+				resourceName: {
+					IsNodeResource:    true,
+					IsScalarResource:  true, // to avoid re-allocating
+					AllocatedQuantity: allocationInfo.AllocatedAllocation.Quantity,
+					Annotations:       resourceAllocationAnnotations,
+					ResourceHints: &pluginapi.ListOfTopologyHints{
+						Hints: []*pluginapi.TopologyHint{
+							req.Hint,
+						},
+					},
+				},
+			},
+		},
+		Labels:      general.DeepCopyMap(req.Labels),
+		Annotations: general.DeepCopyMap(req.Annotations),
+	}, nil
+}
+
+// UpdateAllocatableAssociatedDevicesByDeviceType updates the topology provider with topology information of the
+// given device type.
+func (p *BasePlugin) UpdateAllocatableAssociatedDevicesByDeviceType(
+	request *pluginapi.UpdateAllocatableAssociatedDevicesRequest, deviceType string,
+) (*pluginapi.UpdateAllocatableAssociatedDevicesResponse, error) {
+	deviceTopology := &machine.DeviceTopology{
+		Devices: make(map[string]machine.DeviceInfo, len(request.Devices)),
+	}
+
+	for _, device := range request.Devices {
+		var numaNode []int
+		if device.Topology != nil {
+			numaNode = make([]int, 0, len(device.Topology.Nodes))
+
+			for _, node := range device.Topology.Nodes {
+				if node == nil {
+					continue
+				}
+				numaNode = append(numaNode, int(node.ID))
+			}
+		}
+
+		deviceTopology.Devices[device.ID] = machine.DeviceInfo{
+			Health:         device.Health,
+			NumaNodes:      numaNode,
+			DeviceAffinity: make(map[machine.AffinityPriority]machine.DeviceIDs),
+		}
+	}
+
+	err := p.DeviceTopologyRegistry.SetDeviceTopology(deviceType, deviceTopology)
+	if err != nil {
+		general.Errorf("set device topology failed with error: %v", err)
+		return nil, fmt.Errorf("set device topology failed with error: %v", err)
+	}
+
+	general.Infof("got device %s topology success: %v", request.DeviceName, deviceTopology)
+
+	return &pluginapi.UpdateAllocatableAssociatedDevicesResponse{}, nil
+}
+
+// GenerateResourceStateFromPodEntries returns an AllocationMap of a certain resource based on pod entries
+// 1. If podEntries is nil, it will get pod entries from state
+// 2. If the generator is not found, it will return an error
+func (p *BasePlugin) GenerateResourceStateFromPodEntries(
+	resourceName string,
+	podEntries state.PodEntries,
+) (state.AllocationMap, error) {
+	if podEntries == nil {
+		podEntries = p.State.GetPodEntries(v1.ResourceName(resourceName))
+	}
+
+	generator, ok := p.DefaultResourceStateGeneratorRegistry.GetGenerator(resourceName)
+	if !ok {
+		return nil, fmt.Errorf("could not find generator for resource %s", resourceName)
+	}
+
+	return state.GenerateResourceStateFromPodEntries(podEntries, generator)
+}
+
+func (p *BasePlugin) GenerateMachineStateFromPodEntries(
+	podResourceEntries state.PodResourceEntries,
+) (state.AllocationResourcesMap, error) {
+	return state.GenerateMachineStateFromPodEntries(podResourceEntries, p.DefaultResourceStateGeneratorRegistry)
+}
+
+// RegisterDeviceNameToType is used to map device name to device type.
+// For example, we may have multiple device names for a same device type, e.g. "nvidia.com/gpu" and "nvidia.com/be-gpu",
+// so we map them to the same device type, which allows us to allocate them interchangeably.
+func (p *BasePlugin) RegisterDeviceNameToType(resourceNames []string, deviceType string) {
+	for _, resourceName := range resourceNames {
+		p.deviceNameToTypeMap[resourceName] = deviceType
+	}
+}
+
+func (p *BasePlugin) GetResourceTypeFromDeviceName(deviceName string) (string, error) {
+	deviceType, ok := p.deviceNameToTypeMap[deviceName]
+	if !ok {
+		return "", fmt.Errorf("no device type found for device name %s", deviceName)
+	}
+	return deviceType, nil
+}
+
+// RegisterTopologyAffinityProvider is a hook to set device affinity for a certain device type
+func (p *BasePlugin) RegisterTopologyAffinityProvider(deviceType string, deviceAffinityProvider machine.DeviceAffinityProvider) {
+	p.DeviceTopologyRegistry.RegisterTopologyAffinityProvider(deviceType, deviceAffinityProvider)
+}
diff --git a/pkg/agent/qrm-plugins/gpu/baseplugin/base_test.go b/pkg/agent/qrm-plugins/gpu/baseplugin/base_test.go
new file mode 100644
index 0000000000..00f0453cb9
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/baseplugin/base_test.go
@@ -0,0 +1,175 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package baseplugin
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/util/uuid"
+	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	katalyst_base "github.com/kubewharf/katalyst-core/cmd/base"
+	"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state"
+	"github.com/kubewharf/katalyst-core/pkg/config"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver"
+	"github.com/kubewharf/katalyst-core/pkg/metrics"
+)
+
+func generateTestConfiguration(t *testing.T) *config.Configuration {
+	conf := config.NewConfiguration()
+	tmpDir := t.TempDir()
+	conf.QRMPluginSocketDirs = []string{tmpDir}
+	conf.CheckpointManagerDir = tmpDir
+
+	return conf
+}
+
+func generateTestGenericContext(t *testing.T, conf *config.Configuration) *agent.GenericContext {
+	genericCtx, err := katalyst_base.GenerateFakeGenericContext([]runtime.Object{})
+	if err != nil {
+		t.Fatalf("unable to generate test generic context: %v", err)
+	}
+
+	metaServer, err := metaserver.NewMetaServer(genericCtx.Client, metrics.DummyMetrics{}, conf)
+	if err != nil {
+		t.Fatalf("unable to generate test meta server: %v", err)
+	}
+
+	agentCtx := &agent.GenericContext{
+		GenericContext: genericCtx,
+		MetaServer:     metaServer,
+		PluginManager:  nil,
+	}
+
+	agentCtx.MetaServer = metaServer
+	return agentCtx
+}
+
+func TestBasePlugin_PackAllocationResponse(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name           string
+		req            *pluginapi.ResourceRequest
+		allocationInfo *state.AllocationInfo
+		annotations    map[string]string
+		resourceName   string
+		expectedResp   *pluginapi.ResourceAllocationResponse
+		expectedErr    bool
+	}{
+		{
+			name: "nil allocation info",
+			req: &pluginapi.ResourceRequest{
+				PodUid:         string(uuid.NewUUID()),
+				PodNamespace:   "test",
+				PodName:        "test",
+				ContainerName:  "test",
+				ContainerType:  pluginapi.ContainerType_MAIN,
+				ContainerIndex: 0,
+			},
+			resourceName: "test-resource",
+			expectedErr:  true,
+		},
+		{
+			name: "nil request",
+			allocationInfo: &state.AllocationInfo{
+				AllocatedAllocation: state.Allocation{
+					Quantity:  4,
+					NUMANodes: []int{0, 1},
+				},
+			},
+			resourceName: "test-resource",
+			expectedErr:  true,
+		},
+		{
+			name: "basic case",
+			req: &pluginapi.ResourceRequest{
+				PodUid:         "test-uid",
+				PodNamespace:   "test",
+				PodName:        "test",
+				ContainerName:  "test",
+				ContainerType:  pluginapi.ContainerType_MAIN,
+				ContainerIndex: 0,
+				Hint: &pluginapi.TopologyHint{
+					Nodes: []uint64{0, 1},
+				},
+				ResourceName: "test-resource",
+			},
+			allocationInfo: &state.AllocationInfo{
+				AllocatedAllocation: state.Allocation{
+					Quantity:  4,
+					NUMANodes: []int{0, 1},
+				},
+			},
+			annotations: map[string]string{
+				"test-key": "test-value",
+			},
+			resourceName: "test-resource",
+			expectedResp: &pluginapi.ResourceAllocationResponse{
+				PodUid:         "test-uid",
+				PodNamespace:   "test",
+				PodName:        "test",
+				ContainerName:  "test",
+				ContainerType:  pluginapi.ContainerType_MAIN,
+				ContainerIndex: 0,
+				ResourceName:   "test-resource",
+				AllocationResult: &pluginapi.ResourceAllocation{
+					ResourceAllocation: map[string]*pluginapi.ResourceAllocationInfo{
+						"test-resource": {
+							IsNodeResource:    true,
+							IsScalarResource:  true,
+							AllocatedQuantity: 4,
+							Annotations: map[string]string{
+								"test-key": "test-value",
+							},
+							ResourceHints: &pluginapi.ListOfTopologyHints{
+								Hints: []*pluginapi.TopologyHint{
+									{
+										Nodes: []uint64{0, 1},
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+
+			conf := generateTestConfiguration(t)
+			agentCtx := generateTestGenericContext(t, conf)
+			basePlugin, err := NewBasePlugin(agentCtx, conf, metrics.DummyMetrics{})
+			assert.NoError(t, err)
+
+			resp, err := basePlugin.PackAllocationResponse(tt.req, tt.allocationInfo, tt.annotations, tt.resourceName)
+			if tt.expectedErr {
+				assert.Error(t, err)
+			} else {
+				assert.NoError(t, err)
+				assert.Equal(t, tt.expectedResp, resp)
+			}
+		})
+	}
+}
diff --git a/pkg/agent/qrm-plugins/gpu/baseplugin/share_gpu_manager.go b/pkg/agent/qrm-plugins/gpu/baseplugin/share_gpu_manager.go
new file mode 100644
index 0000000000..4fe1d47e79
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/baseplugin/share_gpu_manager.go
@@ -0,0 +1,251 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package baseplugin
+
+import (
+	"context"
+	"sync"
+	"time"
+
+	"github.com/pkg/errors"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/apimachinery/pkg/util/wait"
+
+	"github.com/kubewharf/katalyst-api/pkg/apis/config/v1alpha1"
+	gpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/consts"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state"
+	"github.com/kubewharf/katalyst-core/pkg/util/general"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// This module determines whether each GPU device is eligible for shared usage
+// (ShareGPU) based on per-pod extended indicators fetched from MetaServer. It
+// periodically scans the GPU machine state maintained by BasePlugin and builds
+// a snapshot map `shareGPUMap` that records the ShareGPU decision for each
+// device ID.
+// Decision rule: Only main containers are considered; if any main container on
+// a device disables ShareGPU, the device is marked non-shareable. To reduce
+// repeated external queries, a per-sync in-memory cache keyed by `pod UID` is
+// used to memoize `EnableShareGPU` decisions.
+
+// ShareGPUManager determines per-device ShareGPU eligibility and maintains a
+// periodic snapshot. Safe for concurrent reads via `EnableShareGPU`.
+//
+// Time Complexity:
+//   - `sync`: O(D + C) where D is number of devices and C is number of main
+//     containers scanned; per-pod indicator lookups are amortized O(1) via cache.
+//   - `EnableShareGPU`: O(1) map read.
+//   - `Allocate`: O(A) over `TopologyAwareAllocations` entries when marking.
+//
+// Potential Errors:
+// - External indicator fetch may fail; treated conservatively and wrapped.
+// - Internal panics are recovered to keep the manager running.
+type ShareGPUManager interface {
+	// Allocate processes an allocation of a main container. If the pod's
+	// indicator disables ShareGPU, all involved device IDs in
+	// `TopologyAwareAllocations` are marked non-shareable in the snapshot.
+	// Params:
+	// - ctx: request-scoped context for external calls
+	// - allocationInfo: container allocation metadata; ignored if nil or non-main
+	// Returns: none; updates internal snapshot
+	// Errors: any external errors are logged and wrapped internally
+	Allocate(ctx context.Context, allocationInfo *state.AllocationInfo)
+
+	// EnableShareGPU returns the cached ShareGPU decision for a given device ID.
+	// Params:
+	// - id: device ID string
+	// Returns: true if the device is shareable; false if unknown or disallowed
+	EnableShareGPU(id string) bool
+
+	// Run starts the periodic synchronization loop until `stopCh` is closed.
+	// Params:
+	// - basePlugin: plugin providing machine state and MetaServer
+	// - stopCh: channel to stop the loop
+	// Notes: blocking method; should be called in a separate goroutine.
+	Run(*BasePlugin, <-chan struct{})
+}
+type shareGPUManager struct {
+	sync.RWMutex
+
+	shareGPUMap map[string]bool
+	basePlugin  *BasePlugin
+}
+
+// NewShareGPUManager creates a new ShareGPUManager instance.
+// Returns: a manager with an empty snapshot cache.
+func NewShareGPUManager() ShareGPUManager {
+	return &shareGPUManager{
+		shareGPUMap: make(map[string]bool),
+	}
+}
+
+// EnableShareGPU returns the cached ShareGPU decision for a given device ID.
+// If the device ID is not present in the latest snapshot, it returns false.
+// Complexity: O(1) map lookup.
+func (s *shareGPUManager) EnableShareGPU(id string) bool {
+	s.RLock()
+	defer s.RUnlock()
+
+	return s.shareGPUMap[id]
+}
+
+// Allocate marks involved device IDs as non-shareable if the pod disables
+// ShareGPU. Non-main containers are ignored.
+// Complexity: O(A) where A is number of device IDs in TopologyAwareAllocations.
+func (s *shareGPUManager) Allocate(ctx context.Context, allocationInfo *state.AllocationInfo) {
+	if allocationInfo == nil || !allocationInfo.CheckMainContainer() {
+		return
+	}
+
+	enableShareGPU := s.evaluateContainerDeviceShareStatus(ctx, allocationInfo, nil)
+	if enableShareGPU {
+		return
+	}
+
+	s.Lock()
+	defer s.Unlock()
+	for id := range allocationInfo.TopologyAwareAllocations {
+		s.shareGPUMap[id] = false
+	}
+}
+
+// Run starts the periodic synchronization loop that refreshes ShareGPU
+// decisions. The loop:
+// - immediately performs a synchronization once for faster readiness;
+// - then schedules periodic syncs every 15 seconds;
+// - stops when `stopCh` is closed.
+// Note: The method is blocking; callers should invoke it in a goroutine.
+func (s *shareGPUManager) Run(basePlugin *BasePlugin, stopCh <-chan struct{}) {
+	ctx, cancel := context.WithCancel(context.Background())
+	s.basePlugin = basePlugin
+
+	go func() {
+		<-stopCh
+		cancel()
+	}()
+
+	s.sync(ctx)
+	wait.UntilWithContext(ctx, s.sync, 30*time.Second)
+}
+
+// sync refreshes the ShareGPU decisions by scanning machine state.
+// Complexity: O(D + C) per invocation.
+func (s *shareGPUManager) sync(ctx context.Context) {
+	if s.basePlugin == nil {
+		general.Infof("share gpu manager sync failed, basePlugin is nil")
+		return
+	}
+
+	s.Lock()
+	defer s.Unlock()
+	machineState, ok := s.basePlugin.State.GetMachineState()[gpuconsts.GPUDeviceType]
+	if !ok {
+		general.Infof("share gpu manager found no GPU machine state; skipping")
+		return
+	}
+
+	// Build a fresh snapshot with per-sync indicator cache.
+	shareGPUMap := make(map[string]bool, len(machineState))
+	indicatorCache := make(map[types.UID]bool)
+	for id, alloc := range machineState {
+		shareGPUMap[id] = s.evaluateDeviceShareStatus(ctx, alloc, indicatorCache)
+	}
+
+	s.shareGPUMap = shareGPUMap
+}
+
+// evaluateDeviceShareStatus scans main containers for a device and returns true
+// if and only if all of them enable ShareGPU. Any error retrieving indicators is
+// treated as non-blocking and the container is ignored (optimistic sharing).
+// evaluateDeviceShareStatus returns true iff all main containers enable ShareGPU.
+// Complexity: O(Cd) where Cd is number of main containers on the device.
+func (s *shareGPUManager) evaluateDeviceShareStatus(ctx context.Context, alloc *state.AllocationState, cache map[types.UID]bool) bool {
+	if alloc == nil {
+		return false
+	}
+
+	// Default to shareable and short-circuit to false once a disallowed pod is found.
+	for _, containerEntries := range alloc.PodEntries {
+		for _, container := range containerEntries {
+			if !container.CheckMainContainer() || container.CheckReclaimed() {
+				continue
+			}
+
+			enableShareGPU := s.evaluateContainerDeviceShareStatus(ctx, container, cache)
+			if !enableShareGPU {
+				return false
+			}
+		}
+	}
+
+	return true
+}
+
+// evaluateContainerDeviceShareStatus checks a single container's pod-level indicator.
+// If a cache is provided, it memoizes decisions by pod UID.
+// Complexity: O(1) with cache hit; O(ExternalCall) otherwise.
+func (s *shareGPUManager) evaluateContainerDeviceShareStatus(ctx context.Context, container *state.AllocationInfo, cache map[types.UID]bool) bool {
+	podMeta := s.preparePodMeta(container)
+
+	if cache != nil {
+		if v, ok := cache[podMeta.UID]; ok {
+			return v
+		}
+	}
+
+	enableShareGPU, err := s.getPodEnableShareGPU(ctx, podMeta)
+	if err != nil {
+		general.Infof("share gpu manager: fetching extended indicators failed for pod %s/%s: %v", podMeta.Namespace, podMeta.Name, err)
+		if cache != nil {
+			cache[podMeta.UID] = false
+		}
+		return false
+	}
+
+	if cache != nil {
+		cache[podMeta.UID] = enableShareGPU
+	}
+	return enableShareGPU
+}
+
+// getPodEnableShareGPU queries MetaServer for the pod's `EnableShareGPU` indicator.
+// Returns: boolean indicator value; error when external call fails.
+// Errors: wrapped with context using `pkg/errors`.
+func (s *shareGPUManager) getPodEnableShareGPU(ctx context.Context, podMeta metav1.ObjectMeta) (bool, error) {
+	enableShareGPU := false
+	indicators := v1alpha1.ReclaimResourceIndicators{}
+	baseLine, err := s.basePlugin.MetaServer.ServiceExtendedIndicator(ctx, podMeta, &indicators)
+	if err != nil {
+		return false, errors.Wrapf(err, "ServiceExtendedIndicator failed for pod %s/%s", podMeta.Namespace, podMeta.Name)
+	}
+
+	if !baseLine && indicators.EnableShareGPU != nil {
+		enableShareGPU = *indicators.EnableShareGPU
+	}
+
+	return enableShareGPU, nil
+}
+
+func (s *shareGPUManager) preparePodMeta(info *state.AllocationInfo) metav1.ObjectMeta {
+	return metav1.ObjectMeta{
+		UID:         types.UID(info.PodUid),
+		Namespace:   info.PodNamespace,
+		Name:        info.PodName,
+		Labels:      info.Labels,
+		Annotations: info.Annotations,
+	}
+}
diff --git a/pkg/agent/qrm-plugins/gpu/baseplugin/share_gpu_manager_test.go b/pkg/agent/qrm-plugins/gpu/baseplugin/share_gpu_manager_test.go
new file mode 100644
index 0000000000..5f0e491f4c
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/baseplugin/share_gpu_manager_test.go
@@ -0,0 +1,329 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package baseplugin
+
+import (
+	"context"
+	"fmt"
+	"sync"
+	"testing"
+	"time"
+
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/types"
+	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	"github.com/kubewharf/katalyst-api/pkg/apis/config/v1alpha1"
+	workloadapis "github.com/kubewharf/katalyst-api/pkg/apis/workload/v1alpha1"
+	apiconsts "github.com/kubewharf/katalyst-api/pkg/consts"
+	commonstate "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate"
+	gpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/consts"
+	gpustate "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver/spd"
+)
+
+// fakeSPM is a lightweight ServiceProfilingManager for tests.
+type fakeSPM struct {
+	behaviors map[types.UID]struct {
+		baseline bool
+		enable   *bool
+		err      error
+	}
+	calls int
+}
+
+func (f *fakeSPM) ServiceExtendedIndicator(_ context.Context, podMeta metav1.ObjectMeta, indicators interface{}) (bool, error) {
+	f.calls++
+	b := f.behaviors[podMeta.UID]
+	// fill EnableShareGPU if indicators is of proper type
+	if ind, ok := indicators.(*v1alpha1.ReclaimResourceIndicators); ok {
+		ind.EnableShareGPU = b.enable
+	}
+	if b.err != nil {
+		return false, b.err
+	}
+	return b.baseline, nil
+}
+
+// Stubs for unused interface methods
+func (f *fakeSPM) ServiceBusinessPerformanceLevel(context.Context, metav1.ObjectMeta) (spd.PerformanceLevel, error) {
+	return spd.PerformanceLevelPerfect, nil
+}
+
+func (f *fakeSPM) ServiceBusinessPerformanceScore(context.Context, metav1.ObjectMeta) (float64, error) {
+	return spd.MaxPerformanceScore, nil
+}
+
+func (f *fakeSPM) ServiceSystemPerformanceTarget(context.Context, metav1.ObjectMeta) (spd.IndicatorTarget, error) {
+	return spd.IndicatorTarget{}, nil
+}
+
+func (f *fakeSPM) ServiceBaseline(context.Context, metav1.ObjectMeta) (bool, error) {
+	return false, nil
+}
+
+func (f *fakeSPM) ServiceAggregateMetrics(context.Context, metav1.ObjectMeta, v1.ResourceName, bool, workloadapis.Aggregator, workloadapis.Aggregator) ([]resource.Quantity, error) {
+	return nil, nil
+}
+func (f *fakeSPM) Run(context.Context) {}
+
+// fakeState provides minimal state for tests.
+type fakeState struct {
+	machine gpustate.AllocationResourcesMap
+}
+
+func (f *fakeState) GetMachineState() gpustate.AllocationResourcesMap {
+	return f.machine
+}
+func (f *fakeState) GetPodResourceEntries() gpustate.PodResourceEntries             { return nil }
+func (f *fakeState) GetPodEntries(resourceName v1.ResourceName) gpustate.PodEntries { return nil }
+func (f *fakeState) GetAllocationInfo(resourceName v1.ResourceName, podUID, containerName string) *gpustate.AllocationInfo {
+	return nil
+}
+func (f *fakeState) SetMachineState(gpustate.AllocationResourcesMap, bool)          {}
+func (f *fakeState) SetResourceState(v1.ResourceName, gpustate.AllocationMap, bool) {}
+func (f *fakeState) SetPodResourceEntries(gpustate.PodResourceEntries, bool)        {}
+func (f *fakeState) SetAllocationInfo(v1.ResourceName, string, string, *gpustate.AllocationInfo, bool) {
+}
+func (f *fakeState) Delete(v1.ResourceName, string, string, bool) {}
+func (f *fakeState) ClearState()                                  {}
+func (f *fakeState) StoreState() error                            { return nil }
+
+func makeContainer(podUID, podNS, podName, containerName string, main bool, ids ...string) *gpustate.AllocationInfo {
+	t := pluginapi.ContainerType_SIDECAR.String()
+	if main {
+		t = pluginapi.ContainerType_MAIN.String()
+	}
+	topo := make(map[string]gpustate.Allocation)
+	for _, id := range ids {
+		topo[id] = gpustate.Allocation{Quantity: 1}
+	}
+	return &gpustate.AllocationInfo{
+		AllocationMeta: gpustate.AllocationInfo{AllocationMeta: commonstate.AllocationMeta{
+			PodUid:        podUID,
+			PodNamespace:  podNS,
+			PodName:       podName,
+			ContainerName: containerName,
+			ContainerType: t,
+		}}.AllocationMeta,
+		AllocatedAllocation:      gpustate.Allocation{Quantity: 1},
+		TopologyAwareAllocations: topo,
+	}
+}
+
+func Test_EnableShareGPU_DefaultFalse(t *testing.T) {
+	t.Parallel()
+	m := NewShareGPUManager().(*shareGPUManager)
+	if got := m.EnableShareGPU("non-existent"); got {
+		t.Fatalf("expected false for unknown id, got true")
+	}
+}
+
+func Test_Allocate_MarkFalse_OnIndicatorFalse(t *testing.T) {
+	t.Parallel()
+	spm := &fakeSPM{behaviors: map[types.UID]struct {
+		baseline bool
+		enable   *bool
+		err      error
+	}{
+		types.UID("u1"): {baseline: false, enable: ptrBool(false)},
+	}}
+	bp := &BasePlugin{MetaServer: &metaserver.MetaServer{ServiceProfilingManager: spm}}
+	m := NewShareGPUManager().(*shareGPUManager)
+	m.basePlugin = bp
+
+	alloc := makeContainer("u1", "ns", "pod", "c", true, "GPU-1", "GPU-2")
+	m.Allocate(context.Background(), alloc)
+
+	if m.EnableShareGPU("GPU-1") || m.EnableShareGPU("GPU-2") {
+		t.Fatalf("expected devices marked false after Allocate")
+	}
+}
+
+func Test_Sync_BuildsSnapshot_WithCache(t *testing.T) {
+	t.Parallel()
+	enable := ptrBool(true)
+	spm := &fakeSPM{behaviors: map[types.UID]struct {
+		baseline bool
+		enable   *bool
+		err      error
+	}{
+		types.UID("u1"): {baseline: false, enable: enable},
+	}}
+	bp := &BasePlugin{MetaServer: &metaserver.MetaServer{ServiceProfilingManager: spm}}
+
+	// Build machine state: one device with two main containers of same pod
+	ce := gpustate.ContainerEntries{
+		"c1": makeContainer("u1", "ns", "pod", "c1", true, "GPU-1"),
+		"c2": makeContainer("u1", "ns", "pod", "c2", true, "GPU-1"),
+	}
+	allocState := &gpustate.AllocationState{PodEntries: gpustate.PodEntries{"u1": ce}}
+	machine := gpustate.AllocationResourcesMap{v1.ResourceName(gpuconsts.GPUDeviceType): gpustate.AllocationMap{"GPU-1": allocState}}
+
+	m := NewShareGPUManager().(*shareGPUManager)
+	m.basePlugin = &BasePlugin{MetaServer: bp.MetaServer, State: &fakeState{machine: machine}}
+	m.sync(context.Background())
+
+	if !m.EnableShareGPU("GPU-1") {
+		t.Fatalf("expected GPU-1 shareable")
+	}
+	if spm.calls != 1 {
+		t.Fatalf("expected 1 indicator call due to cache, got %d", spm.calls)
+	}
+}
+
+func Test_Concurrency_Safety(t *testing.T) {
+	t.Parallel()
+	enable := ptrBool(true)
+	spm := &fakeSPM{behaviors: map[types.UID]struct {
+		baseline bool
+		enable   *bool
+		err      error
+	}{
+		types.UID("u1"): {baseline: false, enable: enable},
+	}}
+	bp := &BasePlugin{MetaServer: &metaserver.MetaServer{ServiceProfilingManager: spm}}
+	m := NewShareGPUManager().(*shareGPUManager)
+	m.basePlugin = bp
+
+	alloc := makeContainer("u1", "ns", "pod", "c", true, "GPU-1")
+
+	var wg sync.WaitGroup
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	// Writer goroutine
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		for i := 0; i < 100; i++ {
+			m.Allocate(ctx, alloc)
+			time.Sleep(time.Millisecond)
+		}
+	}()
+
+	// Reader goroutine
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		for i := 0; i < 100; i++ {
+			_ = m.EnableShareGPU("GPU-1")
+			time.Sleep(time.Millisecond)
+		}
+	}()
+
+	wg.Wait()
+}
+
+func Test_Run_StartsAndStops(t *testing.T) {
+	t.Parallel()
+	enable := ptrBool(true)
+	spm := &fakeSPM{behaviors: map[types.UID]struct {
+		baseline bool
+		enable   *bool
+		err      error
+	}{
+		types.UID("u1"): {baseline: false, enable: enable},
+	}}
+	bp := &BasePlugin{MetaServer: &metaserver.MetaServer{ServiceProfilingManager: spm}, State: &fakeState{machine: gpustate.AllocationResourcesMap{v1.ResourceName(gpuconsts.GPUDeviceType): gpustate.AllocationMap{"GPU-1": &gpustate.AllocationState{PodEntries: gpustate.PodEntries{"u1": gpustate.ContainerEntries{"c": makeContainer("u1", "ns", "pod", "c", true, "GPU-1")}}}}}}}
+	m := NewShareGPUManager()
+
+	stopCh := make(chan struct{})
+	go m.Run(bp, stopCh)
+	time.Sleep(50 * time.Millisecond)
+	close(stopCh)
+}
+
+func Test_Allocate_Ignores_NonMain(t *testing.T) {
+	t.Parallel()
+	spm := &fakeSPM{behaviors: map[types.UID]struct {
+		baseline bool
+		enable   *bool
+		err      error
+	}{}}
+	bp := &BasePlugin{MetaServer: &metaserver.MetaServer{ServiceProfilingManager: spm}}
+	m := NewShareGPUManager().(*shareGPUManager)
+	m.basePlugin = bp
+
+	alloc := makeContainer("u2", "ns", "pod", "c", false, "GPU-1")
+	m.Allocate(context.Background(), alloc)
+	if m.EnableShareGPU("GPU-1") {
+		t.Fatalf("non-main container should not mark device")
+	}
+}
+
+func Test_evaluateDeviceShareStatus_Ignores_reclaimed(t *testing.T) {
+	t.Parallel()
+	m := NewShareGPUManager().(*shareGPUManager)
+	alloc := makeContainer("u2", "ns", "pod", "c", true, "GPU-1")
+	alloc.QoSLevel = apiconsts.PodAnnotationQoSLevelReclaimedCores
+	as := &gpustate.AllocationState{
+		PodEntries: map[string]gpustate.ContainerEntries{
+			"u2": {
+				"c": alloc,
+			},
+		},
+	}
+
+	if !m.evaluateDeviceShareStatus(context.Background(), as, make(map[types.UID]bool)) {
+		t.Fatalf("reclaimed container should not mark device")
+	}
+}
+
+func ptrBool(b bool) *bool { v := b; return &v }
+
+func BenchmarkSync_WithIndicatorCache(b *testing.B) {
+	enable := ptrBool(true)
+	spm := &fakeSPM{behaviors: map[types.UID]struct {
+		baseline bool
+		enable   *bool
+		err      error
+	}{
+		types.UID("u1"): {baseline: false, enable: enable},
+	}}
+	bp := &BasePlugin{MetaServer: &metaserver.MetaServer{ServiceProfilingManager: spm}}
+
+	// Build a large machine state: N devices, each with M main containers from the same pod
+	const N, M = 50, 20
+	am := make(gpustate.AllocationMap, N)
+	for i := 0; i < N; i++ {
+		ce := make(gpustate.ContainerEntries, M)
+		for j := 0; j < M; j++ {
+			cname := fmt.Sprintf("c%c", 'A'+j)
+			ce[cname] = makeContainer("u1", "ns", "pod", cname, true, fmt.Sprintf("GPU-%c", 'A'+i))
+		}
+		am[fmt.Sprintf("GPU-%c", 'A'+i)] = &gpustate.AllocationState{PodEntries: gpustate.PodEntries{"u1": ce}}
+	}
+	machine := gpustate.AllocationResourcesMap{v1.ResourceName(gpuconsts.GPUDeviceType): am}
+
+	m := NewShareGPUManager().(*shareGPUManager)
+	m.basePlugin = &BasePlugin{MetaServer: bp.MetaServer, State: &fakeState{machine: machine}}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		spm.calls = 0
+		m.sync(context.Background())
+	}
+
+	// Expect only 1 external call per sync due to cache (one unique pod UID)
+	if spm.calls != 1 {
+		b.Fatalf("expected 1 indicator call per sync, got %d", spm.calls)
+	}
+}
diff --git a/pkg/agent/qrm-plugins/gpu/consts/consts.go b/pkg/agent/qrm-plugins/gpu/consts/consts.go
new file mode 100644
index 0000000000..e8ca95567b
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/consts/consts.go
@@ -0,0 +1,55 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package consts
+
+import (
+	"time"
+
+	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	"github.com/kubewharf/katalyst-api/pkg/consts"
+	"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent/qrm"
+)
+
+type AllocatedResource struct {
+	ResourceName string
+	PodName      string
+	PodNamespace string
+	*pluginapi.TopologyAwareResource
+}
+
+type AllocatableResource struct {
+	ResourceName string
+	*pluginapi.AllocatableTopologyAwareResource
+}
+
+const (
+	GPUDeviceType  = "gpu_device"
+	RDMADeviceType = "rdma_device"
+
+	// GPUResourcePluginPolicyNameStatic is the policy name of static gpu resource plugin
+	GPUResourcePluginPolicyNameStatic = string(consts.ResourcePluginPolicyNameStatic)
+
+	GPUPluginDynamicPolicyName = qrm.QRMPluginNameGPU + "_" + GPUResourcePluginPolicyNameStatic
+	ClearResidualState         = GPUPluginDynamicPolicyName + "_clear_residual_state"
+
+	GPUMemPluginName = "gpu_mem_resource_plugin"
+
+	StateCheckPeriod          = 30 * time.Second
+	StateCheckTolerationTimes = 3
+	MaxResidualTime           = 5 * time.Minute
+)
diff --git a/pkg/agent/qrm-plugins/gpu/customdeviceplugin/custom_device_plugin_stub.go b/pkg/agent/qrm-plugins/gpu/customdeviceplugin/custom_device_plugin_stub.go
new file mode 100644
index 0000000000..b2b5aa6e3b
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/customdeviceplugin/custom_device_plugin_stub.go
@@ -0,0 +1,98 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package customdeviceplugin
+
+import (
+	"context"
+	"fmt"
+
+	v1 "k8s.io/api/core/v1"
+	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/baseplugin"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state"
+)
+
+type CustomDevicePluginStub struct {
+	*baseplugin.BasePlugin
+}
+
+func NewCustomDevicePluginStub(base *baseplugin.BasePlugin) CustomDevicePlugin {
+	return &CustomDevicePluginStub{
+		BasePlugin: base,
+	}
+}
+
+func (c CustomDevicePluginStub) DeviceNames() []string {
+	return []string{"custom-device-plugin-stub"}
+}
+
+func (c CustomDevicePluginStub) GetAssociatedDeviceTopologyHints(context.Context, *pluginapi.AssociatedDeviceRequest) (*pluginapi.AssociatedDeviceHintsResponse, error) {
+	return &pluginapi.AssociatedDeviceHintsResponse{}, nil
+}
+
+func (c CustomDevicePluginStub) UpdateAllocatableAssociatedDevices(context.Context, *pluginapi.UpdateAllocatableAssociatedDevicesRequest) (*pluginapi.UpdateAllocatableAssociatedDevicesResponse, error) {
+	return &pluginapi.UpdateAllocatableAssociatedDevicesResponse{}, nil
+}
+
+func (c CustomDevicePluginStub) DefaultAccompanyResourceName() string {
+	return "resource-plugin-stub"
+}
+
+func (c CustomDevicePluginStub) AllocateAssociatedDevice(_ context.Context, resReq *pluginapi.ResourceRequest, _ *pluginapi.DeviceRequest, accompanyResourceName string) (*pluginapi.AssociatedDeviceAllocationResponse, error) {
+	// Simply check if the accompany resource has been allocated
+	// If it has been allocated, allocate the associated device
+	accompanyResourceAllocation := c.State.GetAllocationInfo(v1.ResourceName(accompanyResourceName), resReq.PodUid, resReq.ContainerName)
+	if accompanyResourceAllocation != nil {
+		c.State.SetAllocationInfo(v1.ResourceName(accompanyResourceName), resReq.PodUid, resReq.ContainerName, &state.AllocationInfo{}, false)
+		return &pluginapi.AssociatedDeviceAllocationResponse{}, nil
+	} else {
+		return nil, fmt.Errorf("accompany resource %s has not been allocated", accompanyResourceName)
+	}
+}
+
+type CustomDevicePluginStub2 struct {
+	*baseplugin.BasePlugin
+}
+
+func NewCustomDevicePluginStub2(base *baseplugin.BasePlugin) CustomDevicePlugin {
+	return &CustomDevicePluginStub2{
+		BasePlugin: base,
+	}
+}
+
+func (c CustomDevicePluginStub2) DeviceNames() []string {
+	return []string{"custom-device-plugin-stub-2"}
+}
+
+func (c CustomDevicePluginStub2) GetAssociatedDeviceTopologyHints(context.Context, *pluginapi.AssociatedDeviceRequest) (*pluginapi.AssociatedDeviceHintsResponse, error) {
+	return &pluginapi.AssociatedDeviceHintsResponse{}, nil
+}
+
+func (c CustomDevicePluginStub2) UpdateAllocatableAssociatedDevices(context.Context, *pluginapi.UpdateAllocatableAssociatedDevicesRequest) (*pluginapi.UpdateAllocatableAssociatedDevicesResponse, error) {
+	return &pluginapi.UpdateAllocatableAssociatedDevicesResponse{}, nil
+}
+
+func (c CustomDevicePluginStub2) DefaultAccompanyResourceName() string {
+	return "resource-plugin-stub"
+}
+
+func (c CustomDevicePluginStub2) AllocateAssociatedDevice(_ context.Context, resReq *pluginapi.ResourceRequest, deviceReq *pluginapi.DeviceRequest, _ string) (*pluginapi.AssociatedDeviceAllocationResponse, error) {
+	// Simply allocate the associated device
+	c.State.SetAllocationInfo(v1.ResourceName(deviceReq.DeviceName), resReq.PodUid, resReq.ContainerName, &state.AllocationInfo{}, false)
+	return &pluginapi.AssociatedDeviceAllocationResponse{}, nil
+}
diff --git a/pkg/agent/qrm-plugins/gpu/customdeviceplugin/gpu/gpu.go b/pkg/agent/qrm-plugins/gpu/customdeviceplugin/gpu/gpu.go
new file mode 100644
index 0000000000..a9ba9530a4
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/customdeviceplugin/gpu/gpu.go
@@ -0,0 +1,229 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gpu
+
+import (
+	"context"
+	"fmt"
+
+	v1 "k8s.io/api/core/v1"
+	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	"github.com/kubewharf/katalyst-api/pkg/consts"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/baseplugin"
+	gpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/consts"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/customdeviceplugin"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate/manager"
+	qrmutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
+	"github.com/kubewharf/katalyst-core/pkg/util/general"
+	"github.com/kubewharf/katalyst-core/pkg/util/machine"
+)
+
+const GPUCustomDevicePluginName = "gpu-custom-device-plugin"
+
+const (
+	defaultAccompanyResourceName = string(consts.ResourceGPUMemory)
+)
+
+type GPUDevicePlugin struct {
+	*baseplugin.BasePlugin
+	deviceNames []string
+}
+
+func NewGPUDevicePlugin(base *baseplugin.BasePlugin) customdeviceplugin.CustomDevicePlugin {
+	gpuTopologyProvider := machine.NewDeviceTopologyProvider(base.Conf.GPUDeviceNames)
+	base.DeviceTopologyRegistry.RegisterDeviceTopologyProvider(gpuconsts.GPUDeviceType, gpuTopologyProvider)
+	base.DefaultResourceStateGeneratorRegistry.RegisterResourceStateGenerator(gpuconsts.GPUDeviceType,
+		state.NewGenericDefaultResourceStateGenerator(gpuconsts.GPUDeviceType, base.DeviceTopologyRegistry))
+	base.RegisterDeviceNameToType(base.Conf.GPUDeviceNames, gpuconsts.GPUDeviceType)
+
+	return &GPUDevicePlugin{
+		BasePlugin:  base,
+		deviceNames: base.Conf.GPUDeviceNames,
+	}
+}
+
+func (p *GPUDevicePlugin) DefaultAccompanyResourceName() string {
+	return defaultAccompanyResourceName
+}
+
+func (p *GPUDevicePlugin) DeviceNames() []string {
+	return p.deviceNames
+}
+
+func (p *GPUDevicePlugin) UpdateAllocatableAssociatedDevices(ctx context.Context, request *pluginapi.UpdateAllocatableAssociatedDevicesRequest) (*pluginapi.UpdateAllocatableAssociatedDevicesResponse, error) {
+	return p.UpdateAllocatableAssociatedDevicesByDeviceType(request, gpuconsts.GPUDeviceType)
+}
+
+func (p *GPUDevicePlugin) GetAssociatedDeviceTopologyHints(context.Context, *pluginapi.AssociatedDeviceRequest) (*pluginapi.AssociatedDeviceHintsResponse, error) {
+	return &pluginapi.AssociatedDeviceHintsResponse{}, nil
+}
+
+func (p *GPUDevicePlugin) AllocateAssociatedDevice(
+	ctx context.Context, resReq *pluginapi.ResourceRequest, deviceReq *pluginapi.DeviceRequest, _ string,
+) (*pluginapi.AssociatedDeviceAllocationResponse, error) {
+	qosLevel, err := qrmutil.GetKatalystQoSLevelFromResourceReq(p.Conf.QoSConfiguration, resReq, p.PodAnnotationKeptKeys, p.PodLabelKeptKeys)
+	if err != nil {
+		err = fmt.Errorf("GetKatalystQoSLevelFromResourceReq for pod: %s/%s, container: %s failed with error: %v",
+			resReq.PodNamespace, resReq.PodName, resReq.ContainerName, err)
+		general.Errorf("%s", err.Error())
+		return nil, err
+	}
+
+	general.InfoS("called",
+		"podNamespace", resReq.PodNamespace,
+		"podName", resReq.PodName,
+		"containerName", resReq.ContainerName,
+		"qosLevel", qosLevel,
+		"reqAnnotations", resReq.Annotations,
+		"resourceRequests", resReq.ResourceRequests,
+		"deviceName", deviceReq.DeviceName,
+		"resourceHint", resReq.Hint,
+		"deviceHint", deviceReq.Hint,
+		"availableDevices", deviceReq.AvailableDevices,
+		"reusableDevices", deviceReq.ReusableDevices,
+		"deviceRequest", deviceReq.DeviceRequest,
+	)
+
+	gpuAllocationInfo := p.State.GetAllocationInfo(gpuconsts.GPUDeviceType, resReq.PodUid, resReq.ContainerName)
+	if gpuAllocationInfo != nil {
+		if gpuAllocationInfo.TopologyAwareAllocations == nil {
+			return nil, fmt.Errorf("GPU topology aware allocation info is nil")
+		}
+		allocatedDevices := make([]string, 0, len(gpuAllocationInfo.TopologyAwareAllocations))
+		for gpuID := range gpuAllocationInfo.TopologyAwareAllocations {
+			allocatedDevices = append(allocatedDevices, gpuID)
+		}
+		return &pluginapi.AssociatedDeviceAllocationResponse{
+			AllocationResult: &pluginapi.AssociatedDeviceAllocation{
+				AllocatedDevices: allocatedDevices,
+			},
+		}, nil
+	}
+
+	var allocatedDevices []string
+	memoryAllocationInfo := p.State.GetAllocationInfo(v1.ResourceName(defaultAccompanyResourceName), resReq.PodUid, resReq.ContainerName)
+	// GPU memory should have been allocated at this stage.
+	// We anticipate that gpu devices have also been allocated, so we can directly use the allocated devices from the gpu memory state.
+	if memoryAllocationInfo == nil || memoryAllocationInfo.TopologyAwareAllocations == nil {
+		// When GPU memory allocation info is nil, invoke the GPU allocate strategy to perform GPU allocation
+		general.InfoS("GPU memory allocation info is nil, invoking GPU allocate strategy",
+			"podNamespace", resReq.PodNamespace,
+			"podName", resReq.PodName,
+			"containerName", resReq.ContainerName)
+
+		// Get GPU topology
+		gpuTopology, numaTopologyReady, err := p.DeviceTopologyRegistry.GetDeviceTopology(gpuconsts.GPUDeviceType)
+		if err != nil {
+			general.Warningf("failed to get gpu topology: %v", err)
+			return nil, fmt.Errorf("failed to get gpu topology: %w", err)
+		}
+
+		if !numaTopologyReady {
+			general.Warningf("numa topology is not ready")
+			return nil, fmt.Errorf("numa topology is not ready")
+		}
+
+		// Use the strategy framework to allocate GPU devices
+		result, err := manager.AllocateGPUUsingStrategy(
+			resReq,
+			deviceReq,
+			gpuTopology,
+			p.Conf.GPUQRMPluginConfig,
+			p.Emitter,
+			p.MetaServer,
+			p.State.GetMachineState(),
+			qosLevel,
+		)
+		if err != nil {
+			return nil, fmt.Errorf("GPU allocation using strategy failed: %v", err)
+		}
+
+		if !result.Success {
+			return nil, fmt.Errorf("GPU allocation failed: %s", result.ErrorMessage)
+		}
+
+		allocatedDevices = result.AllocatedDevices
+	} else {
+		// when GPU memory allocation info exists
+		for gpuID := range memoryAllocationInfo.TopologyAwareAllocations {
+			allocatedDevices = append(allocatedDevices, gpuID)
+		}
+	}
+
+	gpuTopology, numaTopologyReady, err := p.DeviceTopologyRegistry.GetDeviceTopology(gpuconsts.GPUDeviceType)
+	if err != nil {
+		general.Warningf("failed to get gpu topology: %v", err)
+		return nil, fmt.Errorf("failed to get gpu topology: %w", err)
+	}
+
+	if !numaTopologyReady {
+		general.Warningf("numa topology is not ready")
+		return nil, fmt.Errorf("numa topology is not ready")
+	}
+
+	// Save gpu device allocations in state
+	numaNodes := machine.NewCPUSet()
+	gpuDeviceTopologyAwareAllocations := make(map[string]state.Allocation)
+	for _, deviceID := range allocatedDevices {
+		info, ok := gpuTopology.Devices[deviceID]
+		if !ok {
+			return nil, fmt.Errorf("failed to get gpu info for device: %s", deviceID)
+		}
+
+		gpuDeviceTopologyAwareAllocations[deviceID] = state.Allocation{
+			Quantity:  1,
+			NUMANodes: info.NumaNodes,
+		}
+		numaNodes.Add(info.NumaNodes...)
+	}
+
+	gpuDeviceAllocationInfo := &state.AllocationInfo{
+		AllocationMeta: commonstate.GenerateGenericContainerAllocationMeta(resReq, commonstate.EmptyOwnerPoolName, qosLevel),
+		AllocatedAllocation: state.Allocation{
+			Quantity:  float64(len(allocatedDevices)),
+			NUMANodes: numaNodes.ToSliceInt(),
+		},
+	}
+	gpuDeviceAllocationInfo.TopologyAwareAllocations = gpuDeviceTopologyAwareAllocations
+
+	// TODO：State can be updated using the actual resource name
+	p.State.SetAllocationInfo(gpuconsts.GPUDeviceType, resReq.PodUid, resReq.ContainerName, gpuDeviceAllocationInfo, false)
+	resourceState, err := p.GenerateResourceStateFromPodEntries(gpuconsts.GPUDeviceType, nil)
+	if err != nil {
+		return nil, fmt.Errorf("failed to generate gpu device state from pod entries: %v", err)
+	}
+	p.State.SetResourceState(gpuconsts.GPUDeviceType, resourceState, true)
+
+	general.InfoS("allocated gpu devices",
+		"podNamespace", resReq.PodNamespace,
+		"podName", resReq.PodName,
+		"containerName", resReq.ContainerName,
+		"qosLevel", qosLevel,
+		"allocatedDevices", allocatedDevices)
+
+	// call shareGPUManager to update GPU device state
+	p.BasePlugin.ShareGPUManager.Allocate(ctx, gpuDeviceAllocationInfo)
+
+	return &pluginapi.AssociatedDeviceAllocationResponse{
+		AllocationResult: &pluginapi.AssociatedDeviceAllocation{
+			AllocatedDevices: allocatedDevices,
+		},
+	}, nil
+}
diff --git a/pkg/agent/qrm-plugins/gpu/customdeviceplugin/gpu/gpu_test.go b/pkg/agent/qrm-plugins/gpu/customdeviceplugin/gpu/gpu_test.go
new file mode 100644
index 0000000000..621401afaf
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/customdeviceplugin/gpu/gpu_test.go
@@ -0,0 +1,330 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gpu
+
+import (
+	"context"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/util/uuid"
+	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	katalyst_base "github.com/kubewharf/katalyst-core/cmd/base"
+	"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/baseplugin"
+	gpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/consts"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state"
+	"github.com/kubewharf/katalyst-core/pkg/config"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver"
+	"github.com/kubewharf/katalyst-core/pkg/metrics"
+	"github.com/kubewharf/katalyst-core/pkg/util/machine"
+)
+
+func generateTestConfiguration(t *testing.T) *config.Configuration {
+	conf := config.NewConfiguration()
+	tmpDir := t.TempDir()
+	conf.QRMPluginSocketDirs = []string{tmpDir}
+	conf.CheckpointManagerDir = tmpDir
+
+	return conf
+}
+
+func generateTestGenericContext(t *testing.T, conf *config.Configuration) *agent.GenericContext {
+	genericCtx, err := katalyst_base.GenerateFakeGenericContext([]runtime.Object{})
+	if err != nil {
+		t.Fatalf("unable to generate test generic context: %v", err)
+	}
+
+	metaServer, err := metaserver.NewMetaServer(genericCtx.Client, metrics.DummyMetrics{}, conf)
+	if err != nil {
+		t.Fatalf("unable to generate test meta server: %v", err)
+	}
+
+	agentCtx := &agent.GenericContext{
+		GenericContext: genericCtx,
+		MetaServer:     metaServer,
+		PluginManager:  nil,
+	}
+
+	agentCtx.MetaServer = metaServer
+	return agentCtx
+}
+
+func makeTestBasePlugin(t *testing.T) *baseplugin.BasePlugin {
+	conf := generateTestConfiguration(t)
+	agentCtx := generateTestGenericContext(t, conf)
+
+	tmpDir := t.TempDir()
+	conf.GenericQRMPluginConfiguration.StateFileDirectory = tmpDir
+	conf.GPUDeviceNames = []string{"test-gpu"}
+
+	basePlugin, err := baseplugin.NewBasePlugin(agentCtx, conf, metrics.DummyMetrics{})
+	assert.NoError(t, err)
+
+	stateImpl, err := state.NewCheckpointState(conf.QRMPluginsConfiguration, tmpDir, "test", "test-policy", state.NewDefaultResourceStateGeneratorRegistry(), true, metrics.DummyMetrics{})
+	assert.NoError(t, err)
+
+	basePlugin.State = stateImpl
+
+	return basePlugin
+}
+
+func TestGPUDevicePlugin_UpdateAllocatableAssociatedDevices(t *testing.T) {
+	t.Parallel()
+
+	basePlugin := makeTestBasePlugin(t)
+	devicePlugin := NewGPUDevicePlugin(basePlugin)
+
+	// Update topology with associated devices
+	req := &pluginapi.UpdateAllocatableAssociatedDevicesRequest{
+		DeviceName: "test-gpu",
+		Devices: []*pluginapi.AssociatedDevice{
+			{
+				ID: "test-gpu-0",
+				Topology: &pluginapi.TopologyInfo{
+					Nodes: []*pluginapi.NUMANode{
+						{
+							ID: 0,
+						},
+					},
+				},
+			},
+			{
+				ID: "test-gpu-1",
+				Topology: &pluginapi.TopologyInfo{
+					Nodes: []*pluginapi.NUMANode{
+						{
+							ID: 1,
+						},
+					},
+				},
+			},
+		},
+	}
+
+	resp, err := devicePlugin.UpdateAllocatableAssociatedDevices(context.Background(), req)
+	assert.NoError(t, err)
+	assert.NotNil(t, resp)
+
+	// Verify device topology is updated
+	gpuDevicePlugin := devicePlugin.(*GPUDevicePlugin)
+	deviceTopology, numaTopologyReady, err := gpuDevicePlugin.DeviceTopologyRegistry.GetDeviceTopology(gpuconsts.GPUDeviceType)
+	assert.NoError(t, err)
+	assert.True(t, numaTopologyReady)
+	assert.NotNil(t, deviceTopology)
+
+	expectedDeviceTopology := &machine.DeviceTopology{
+		Devices: map[string]machine.DeviceInfo{
+			"test-gpu-0": {
+				NumaNodes:      []int{0},
+				DeviceAffinity: make(map[machine.AffinityPriority]machine.DeviceIDs),
+			},
+			"test-gpu-1": {
+				NumaNodes:      []int{1},
+				DeviceAffinity: make(map[machine.AffinityPriority]machine.DeviceIDs),
+			},
+		},
+	}
+
+	assert.Equal(t, expectedDeviceTopology, deviceTopology)
+}
+
+func TestGPUDevicePlugin_AllocateAssociatedDevice(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name                            string
+		podUID                          string
+		containerName                   string
+		allocationInfo                  *state.AllocationInfo
+		accompanyResourceAllocationInfo *state.AllocationInfo
+		deviceReq                       *pluginapi.DeviceRequest
+		deviceTopology                  *machine.DeviceTopology
+		expectedErr                     bool
+		expectedResp                    *pluginapi.AssociatedDeviceAllocationResponse
+	}{
+		{
+			name: "Allocation already exists",
+			allocationInfo: &state.AllocationInfo{
+				AllocatedAllocation: state.Allocation{
+					Quantity:  2,
+					NUMANodes: []int{0, 1},
+				},
+				TopologyAwareAllocations: map[string]state.Allocation{
+					"test-gpu-0": {
+						Quantity:  1,
+						NUMANodes: []int{0},
+					},
+					"test-gpu-1": {
+						Quantity:  1,
+						NUMANodes: []int{1},
+					},
+				},
+			},
+			podUID:        string(uuid.NewUUID()),
+			containerName: "test-container",
+			deviceReq: &pluginapi.DeviceRequest{
+				DeviceName:       "test-gpu",
+				AvailableDevices: []string{"test-gpu-2", "test-gpu-3"},
+				ReusableDevices:  []string{"test-gpu-2", "test-gpu-3"},
+				DeviceRequest:    2,
+			},
+			expectedResp: &pluginapi.AssociatedDeviceAllocationResponse{
+				AllocationResult: &pluginapi.AssociatedDeviceAllocation{
+					AllocatedDevices: []string{"test-gpu-0", "test-gpu-1"},
+				},
+			},
+		},
+		{
+			name: "gpu memory allocation exists",
+			accompanyResourceAllocationInfo: &state.AllocationInfo{
+				AllocatedAllocation: state.Allocation{
+					Quantity:  4,
+					NUMANodes: []int{0, 1},
+				},
+				TopologyAwareAllocations: map[string]state.Allocation{
+					"test-gpu-0": {
+						Quantity:  2,
+						NUMANodes: []int{0},
+					},
+					"test-gpu-1": {
+						Quantity:  2,
+						NUMANodes: []int{1},
+					},
+				},
+			},
+			podUID:        string(uuid.NewUUID()),
+			containerName: "test-container",
+			deviceReq: &pluginapi.DeviceRequest{
+				DeviceName:       "test-gpu",
+				AvailableDevices: []string{"test-gpu-2", "test-gpu-3"},
+				ReusableDevices:  []string{"test-gpu-2", "test-gpu-3"},
+				DeviceRequest:    2,
+			},
+			deviceTopology: &machine.DeviceTopology{
+				Devices: map[string]machine.DeviceInfo{
+					"test-gpu-0": {
+						NumaNodes: []int{0},
+					},
+					"test-gpu-1": {
+						NumaNodes: []int{1},
+					},
+					"test-gpu-2": {
+						NumaNodes: []int{0},
+					},
+					"test-gpu-3": {
+						NumaNodes: []int{1},
+					},
+				},
+			},
+			expectedResp: &pluginapi.AssociatedDeviceAllocationResponse{
+				AllocationResult: &pluginapi.AssociatedDeviceAllocation{
+					AllocatedDevices: []string{"test-gpu-0", "test-gpu-1"},
+				},
+			},
+		},
+		{
+			name: "device topology does not exist",
+			accompanyResourceAllocationInfo: &state.AllocationInfo{
+				AllocatedAllocation: state.Allocation{
+					Quantity:  4,
+					NUMANodes: []int{0, 1},
+				},
+				TopologyAwareAllocations: map[string]state.Allocation{
+					"test-gpu-0": {
+						Quantity:  2,
+						NUMANodes: []int{0},
+					},
+					"test-gpu-1": {
+						Quantity:  2,
+						NUMANodes: []int{1},
+					},
+				},
+			},
+			podUID:        string(uuid.NewUUID()),
+			containerName: "test-container",
+			deviceReq: &pluginapi.DeviceRequest{
+				DeviceName:       "test-gpu",
+				AvailableDevices: []string{"test-gpu-2", "test-gpu-3"},
+				ReusableDevices:  []string{"test-gpu-2", "test-gpu-3"},
+				DeviceRequest:    2,
+			},
+			expectedErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+
+			basePlugin := makeTestBasePlugin(t)
+			devicePlugin := NewGPUDevicePlugin(basePlugin)
+
+			if tt.allocationInfo != nil {
+				basePlugin.State.SetAllocationInfo(gpuconsts.GPUDeviceType, tt.podUID, tt.containerName, tt.allocationInfo, false)
+			}
+
+			if tt.accompanyResourceAllocationInfo != nil {
+				basePlugin.State.SetAllocationInfo(v1.ResourceName(defaultAccompanyResourceName), tt.podUID, tt.containerName, tt.accompanyResourceAllocationInfo, false)
+			}
+
+			if tt.deviceTopology != nil {
+				err := basePlugin.DeviceTopologyRegistry.SetDeviceTopology(gpuconsts.GPUDeviceType, tt.deviceTopology)
+				assert.NoError(t, err)
+			}
+
+			resourceReq := &pluginapi.ResourceRequest{
+				PodUid:        tt.podUID,
+				ContainerName: tt.containerName,
+			}
+
+			resp, err := devicePlugin.AllocateAssociatedDevice(context.Background(), resourceReq, tt.deviceReq, "test")
+			if tt.expectedErr {
+				assert.Error(t, err)
+			} else {
+				assert.NoError(t, err)
+				evaluateAllocatedDevicesResult(t, tt.expectedResp, resp)
+
+				// Verify state is updated
+				allocationInfo := basePlugin.State.GetAllocationInfo(gpuconsts.GPUDeviceType, tt.podUID, tt.containerName)
+				assert.NotNil(t, allocationInfo)
+			}
+		})
+	}
+}
+
+func evaluateAllocatedDevicesResult(t *testing.T, expectedResp, actualResp *pluginapi.AssociatedDeviceAllocationResponse) {
+	if expectedResp.AllocationResult == nil && actualResp.AllocationResult == nil {
+		return
+	}
+
+	if expectedResp.AllocationResult != nil && actualResp.AllocationResult == nil {
+		t.Errorf("expected allocation result %v, but got nil", expectedResp.AllocationResult)
+		return
+	}
+
+	if actualResp.AllocationResult != nil && expectedResp.AllocationResult == nil {
+		t.Errorf("expected nil allocation result, but got %v", actualResp.AllocationResult)
+		return
+	}
+
+	assert.ElementsMatch(t, expectedResp.AllocationResult.AllocatedDevices, actualResp.AllocationResult.AllocatedDevices)
+}
diff --git a/pkg/agent/qrm-plugins/gpu/customdeviceplugin/interface.go b/pkg/agent/qrm-plugins/gpu/customdeviceplugin/interface.go
new file mode 100644
index 0000000000..ea446e2df6
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/customdeviceplugin/interface.go
@@ -0,0 +1,38 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package customdeviceplugin
+
+import (
+	"context"
+
+	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+)
+
+type CustomDevicePlugin interface {
+	// DeviceNames returns a list of all possible names for this device
+	DeviceNames() []string
+
+	GetAssociatedDeviceTopologyHints(ctx context.Context, request *pluginapi.AssociatedDeviceRequest) (*pluginapi.AssociatedDeviceHintsResponse, error)
+
+	UpdateAllocatableAssociatedDevices(ctx context.Context, request *pluginapi.UpdateAllocatableAssociatedDevicesRequest) (*pluginapi.UpdateAllocatableAssociatedDevicesResponse, error)
+
+	DefaultAccompanyResourceName() string
+
+	AllocateAssociatedDevice(
+		ctx context.Context, resReq *pluginapi.ResourceRequest, deviceReq *pluginapi.DeviceRequest, accompanyResourceName string,
+	) (*pluginapi.AssociatedDeviceAllocationResponse, error)
+}
diff --git a/pkg/agent/qrm-plugins/gpu/customdeviceplugin/rdma/rdma.go b/pkg/agent/qrm-plugins/gpu/customdeviceplugin/rdma/rdma.go
new file mode 100644
index 0000000000..b04d9a014e
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/customdeviceplugin/rdma/rdma.go
@@ -0,0 +1,309 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package rdma
+
+import (
+	"context"
+	"fmt"
+	"math"
+
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/util/sets"
+	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/baseplugin"
+	gpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/consts"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/customdeviceplugin"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state"
+	gpuutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/util"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
+	"github.com/kubewharf/katalyst-core/pkg/util/general"
+	"github.com/kubewharf/katalyst-core/pkg/util/machine"
+)
+
+const RDMACustomDevicePluginName = "rdma-custom-device-plugin"
+
+type RDMADevicePlugin struct {
+	*baseplugin.BasePlugin
+	deviceNames []string
+}
+
+func NewRDMADevicePlugin(base *baseplugin.BasePlugin) customdeviceplugin.CustomDevicePlugin {
+	rdmaTopologyProvider := machine.NewDeviceTopologyProvider(base.Conf.RDMADeviceNames)
+	base.DeviceTopologyRegistry.RegisterDeviceTopologyProvider(gpuconsts.RDMADeviceType, rdmaTopologyProvider)
+	base.DefaultResourceStateGeneratorRegistry.RegisterResourceStateGenerator(gpuconsts.RDMADeviceType,
+		state.NewGenericDefaultResourceStateGenerator(gpuconsts.RDMADeviceType, base.DeviceTopologyRegistry))
+	base.RegisterDeviceNameToType(base.Conf.RDMADeviceNames, gpuconsts.RDMADeviceType)
+
+	return &RDMADevicePlugin{
+		BasePlugin:  base,
+		deviceNames: base.Conf.RDMADeviceNames,
+	}
+}
+
+func (p *RDMADevicePlugin) DefaultAccompanyResourceName() string {
+	return ""
+}
+
+func (p *RDMADevicePlugin) DeviceNames() []string {
+	return p.deviceNames
+}
+
+func (p *RDMADevicePlugin) UpdateAllocatableAssociatedDevices(ctx context.Context, request *pluginapi.UpdateAllocatableAssociatedDevicesRequest) (*pluginapi.UpdateAllocatableAssociatedDevicesResponse, error) {
+	return p.UpdateAllocatableAssociatedDevicesByDeviceType(request, gpuconsts.RDMADeviceType)
+}
+
+func (p *RDMADevicePlugin) GetAssociatedDeviceTopologyHints(context.Context, *pluginapi.AssociatedDeviceRequest) (*pluginapi.AssociatedDeviceHintsResponse, error) {
+	return &pluginapi.AssociatedDeviceHintsResponse{}, nil
+}
+
+// AllocateAssociatedDevice check if rdma is allocated to other containers, make sure they do not share rdma
+func (p *RDMADevicePlugin) AllocateAssociatedDevice(
+	ctx context.Context, resReq *pluginapi.ResourceRequest, deviceReq *pluginapi.DeviceRequest, accompanyResourceName string,
+) (*pluginapi.AssociatedDeviceAllocationResponse, error) {
+	qosLevel, err := util.GetKatalystQoSLevelFromResourceReq(p.Conf.QoSConfiguration, resReq, p.PodAnnotationKeptKeys, p.PodLabelKeptKeys)
+	if err != nil {
+		err = fmt.Errorf("GetKatalystQoSLevelFromResourceReq for pod: %s/%s, container: %s failed with error: %v",
+			resReq.PodNamespace, resReq.PodName, resReq.ContainerName, err)
+		general.Errorf("%s", err.Error())
+		return nil, err
+	}
+
+	general.InfoS("called",
+		"podNamespace", resReq.PodNamespace,
+		"podName", resReq.PodName,
+		"containerName", resReq.ContainerName,
+		"qosLevel", qosLevel,
+		"reqAnnotations", resReq.Annotations,
+		"resourceRequests", resReq.ResourceRequests,
+		"deviceName", deviceReq.DeviceName,
+		"resourceHint", resReq.Hint,
+		"deviceHint", deviceReq.Hint,
+		"availableDevices", deviceReq.AvailableDevices,
+		"reusableDevices", deviceReq.ReusableDevices,
+		"deviceRequest", deviceReq.DeviceRequest,
+	)
+
+	// Check if there is state for the device name
+	rdmaAllocationInfo := p.State.GetAllocationInfo(gpuconsts.RDMADeviceType, resReq.PodUid, resReq.ContainerName)
+	if rdmaAllocationInfo != nil && rdmaAllocationInfo.TopologyAwareAllocations != nil {
+		allocatedDevices := make([]string, 0, len(rdmaAllocationInfo.TopologyAwareAllocations))
+		for rdmaID := range rdmaAllocationInfo.TopologyAwareAllocations {
+			allocatedDevices = append(allocatedDevices, rdmaID)
+		}
+		return &pluginapi.AssociatedDeviceAllocationResponse{
+			AllocationResult: &pluginapi.AssociatedDeviceAllocation{
+				AllocatedDevices: allocatedDevices,
+			},
+		}, nil
+	}
+
+	rdmaTopology, numaTopologyReady, err := p.DeviceTopologyRegistry.GetDeviceTopology(gpuconsts.RDMADeviceType)
+	if err != nil {
+		return nil, fmt.Errorf("failed to get gpu device topology: %v", err)
+	}
+	if !numaTopologyReady {
+		return nil, fmt.Errorf("gpu device topology is not ready")
+	}
+
+	hintNodes, err := machine.NewCPUSetUint64(deviceReq.GetHint().GetNodes()...)
+	if err != nil {
+		general.Warningf("failed to get hint nodes: %v", err)
+		return nil, err
+	}
+
+	var allocatedRdmaDevices []string
+
+	// No accompany resource name
+	if accompanyResourceName == "" {
+		allocatedRdmaDevices, err = p.allocateWithNoAccompanyResource(deviceReq, rdmaTopology, hintNodes)
+		if err != nil {
+			return nil, fmt.Errorf("failed to allocate with no accompany resource: %v", err)
+		}
+	} else {
+		allocatedRdmaDevices, err = p.allocateWithAccompanyResource(deviceReq, resReq, accompanyResourceName)
+		if err != nil {
+			return nil, fmt.Errorf("failed to allocate with accompany resource: %v", err)
+		}
+	}
+
+	// Modify rdma state
+	topologyAwareAllocations := make(map[string]state.Allocation)
+	for _, deviceID := range allocatedRdmaDevices {
+		info, ok := rdmaTopology.Devices[deviceID]
+		if !ok {
+			return nil, fmt.Errorf("failed to get rdma info for device %s", deviceID)
+		}
+
+		topologyAwareAllocations[deviceID] = state.Allocation{
+			Quantity:  1,
+			NUMANodes: info.GetNUMANodes(),
+		}
+	}
+
+	allocationInfo := &state.AllocationInfo{
+		AllocationMeta: commonstate.GenerateGenericContainerAllocationMeta(resReq, commonstate.EmptyOwnerPoolName, qosLevel),
+		AllocatedAllocation: state.Allocation{
+			Quantity:  1,
+			NUMANodes: hintNodes.ToSliceInt(),
+		},
+	}
+
+	allocationInfo.TopologyAwareAllocations = topologyAwareAllocations
+	p.State.SetAllocationInfo(gpuconsts.RDMADeviceType, resReq.PodUid, resReq.ContainerName, allocationInfo, false)
+	resourceState, err := p.GenerateResourceStateFromPodEntries(gpuconsts.RDMADeviceType, nil)
+	if err != nil {
+		return nil, fmt.Errorf("failed to generate rdma device state from pod entries: %v", err)
+	}
+
+	p.State.SetResourceState(gpuconsts.RDMADeviceType, resourceState, true)
+
+	general.InfoS("allocated rdma devices",
+		"podNamespace", resReq.PodNamespace,
+		"podName", resReq.PodName,
+		"containerName", resReq.ContainerName,
+		"qosLevel", qosLevel,
+		"allocatedRdmaDevices", allocatedRdmaDevices)
+
+	return &pluginapi.AssociatedDeviceAllocationResponse{
+		AllocationResult: &pluginapi.AssociatedDeviceAllocation{
+			AllocatedDevices: allocatedRdmaDevices,
+		},
+	}, nil
+}
+
+// allocateWithNoAccompanyResource allocates the rdma devices by best effort basis on the by making sure that
+// it fits the hint nodes.
+func (p *RDMADevicePlugin) allocateWithNoAccompanyResource(
+	deviceReq *pluginapi.DeviceRequest, rdmaTopology *machine.DeviceTopology, hintNodes machine.CPUSet,
+) ([]string, error) {
+	reqQuantity := deviceReq.GetDeviceRequest()
+
+	machineState, ok := p.State.GetMachineState()[gpuconsts.RDMADeviceType]
+	if !ok {
+		return nil, fmt.Errorf("no machine state for resource %s", gpuconsts.RDMADeviceType)
+	}
+
+	allocatedDevices := sets.NewString()
+	allocateDevices := func(devices ...string) bool {
+		for _, device := range devices {
+			allocatedDevices.Insert(device)
+			if allocatedDevices.Len() >= int(reqQuantity) {
+				return true
+			}
+		}
+		return false
+	}
+
+	availableDevices := deviceReq.GetAvailableDevices()
+	reusableDevices := deviceReq.GetReusableDevices()
+
+	// allocate reusable devices first
+	allocated := allocateDevices(reusableDevices...)
+	if allocated {
+		return allocatedDevices.UnsortedList(), nil
+	}
+
+	for _, device := range availableDevices {
+		if !gpuutil.IsNUMAAffinityDevice(device, rdmaTopology, hintNodes) {
+			continue
+		}
+
+		if !machineState.IsRequestSatisfied(device, 1, 1) {
+			general.Infof("available numa affinity rdma %s is already allocated", device)
+			continue
+		}
+
+		if allocateDevices(device) {
+			return allocatedDevices.UnsortedList(), nil
+		}
+	}
+
+	return nil, fmt.Errorf("not enough available RDMAs found in rdmaTopology, number of needed RDMAs: %d, availableDevices len: %d, allocatedDevices len: %d", reqQuantity, len(availableDevices), len(allocatedDevices))
+}
+
+// allocateWithAccompanyResource allocates the rdma devices by first allocating the reusable devices, then allocating the
+// available devices proportionally by ensuring NUMA affinity with the accompany resource
+func (p *RDMADevicePlugin) allocateWithAccompanyResource(
+	deviceReq *pluginapi.DeviceRequest, resReq *pluginapi.ResourceRequest, accompanyResourceName string,
+) ([]string, error) {
+	var err error
+
+	// Find out the accompany devices that are allocated to the container and allocate RDMA devices that correspond to the numa nodes of accompany device
+	accompanyDeviceType, err := p.GetResourceTypeFromDeviceName(accompanyResourceName)
+	if err != nil {
+		return nil, fmt.Errorf("failed to get device type for accompany resource %s: %v", accompanyResourceName, err)
+	}
+
+	// Allocate all the reusable devices first
+	allocatedDevices := sets.NewString(deviceReq.ReusableDevices...)
+
+	// Get ratio of accompany resource to target device
+	accompanyResourceToTargetDeviceRatio := p.State.GetMachineState().GetRatioOfAccompanyResourceToTargetResource(accompanyDeviceType, gpuconsts.RDMADeviceType)
+
+	// Allocate target device according to ratio of accompany resource to target device
+	podResourceEntries := p.State.GetPodResourceEntries()
+	totalAllocated, accompanyResourceIds := podResourceEntries.GetTotalAllocatedResourceOfContainer(v1.ResourceName(accompanyDeviceType), resReq.PodUid, resReq.ContainerName)
+
+	rdmaToBeAllocated := int(math.Ceil(float64(totalAllocated) * accompanyResourceToTargetDeviceRatio))
+
+	// For every gpu that is allocated to the container, find out the rdma devices that have affinity to the same
+	// numa nodes as the gpu and allocate them
+	accompanyResourceToRdmaAffinityMap, err := p.DeviceTopologyRegistry.GetDeviceNUMAAffinity(accompanyDeviceType, gpuconsts.RDMADeviceType)
+	if err != nil {
+		general.Warningf("failed to get gpu to rdma affinity map: %v", err)
+		return nil, err
+	}
+
+	machineState := p.State.GetMachineState()[v1.ResourceName(gpuconsts.RDMADeviceType)]
+
+	allocateDevices := func(devices ...string) bool {
+		for _, device := range devices {
+			if allocatedDevices.Len() >= rdmaToBeAllocated {
+				return true
+			}
+			allocatedDevices.Insert(device)
+		}
+		if allocatedDevices.Len() >= rdmaToBeAllocated {
+			return true
+		}
+		return false
+	}
+
+	for accompanyResourceId := range accompanyResourceIds {
+		rdmaDevices, ok := accompanyResourceToRdmaAffinityMap[accompanyResourceId]
+		if !ok {
+			general.Warningf("failed to get rdma device with accompany device id: %s", accompanyResourceId)
+			continue
+		}
+
+		// Iterate through the rdma devices and check if they are already allocated
+		for _, rdmaDevice := range rdmaDevices {
+			if !machineState.IsRequestSatisfied(rdmaDevice, 1, 1) {
+				continue
+			}
+
+			if allocateDevices(rdmaDevice) {
+				return allocatedDevices.UnsortedList(), nil
+			}
+		}
+	}
+
+	// Did not find enough available rdma devices to allocate, return the devices that are already allocated
+	return allocatedDevices.UnsortedList(), nil
+}
diff --git a/pkg/agent/qrm-plugins/gpu/customdeviceplugin/rdma/rdma_test.go b/pkg/agent/qrm-plugins/gpu/customdeviceplugin/rdma/rdma_test.go
new file mode 100644
index 0000000000..37975a32cc
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/customdeviceplugin/rdma/rdma_test.go
@@ -0,0 +1,563 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package rdma
+
+import (
+	"context"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/util/uuid"
+	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	katalyst_base "github.com/kubewharf/katalyst-core/cmd/base"
+	"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/baseplugin"
+	gpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/consts"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state"
+	"github.com/kubewharf/katalyst-core/pkg/config"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver"
+	"github.com/kubewharf/katalyst-core/pkg/metrics"
+	"github.com/kubewharf/katalyst-core/pkg/util/machine"
+)
+
+func generateTestConfiguration(t *testing.T) *config.Configuration {
+	conf := config.NewConfiguration()
+	tmpDir := t.TempDir()
+	conf.QRMPluginSocketDirs = []string{tmpDir}
+	conf.CheckpointManagerDir = tmpDir
+
+	return conf
+}
+
+func generateTestGenericContext(t *testing.T, conf *config.Configuration) *agent.GenericContext {
+	genericCtx, err := katalyst_base.GenerateFakeGenericContext([]runtime.Object{})
+	if err != nil {
+		t.Fatalf("unable to generate test generic context: %v", err)
+	}
+
+	metaServer, err := metaserver.NewMetaServer(genericCtx.Client, metrics.DummyMetrics{}, conf)
+	if err != nil {
+		t.Fatalf("unable to generate test meta server: %v", err)
+	}
+
+	agentCtx := &agent.GenericContext{
+		GenericContext: genericCtx,
+		MetaServer:     metaServer,
+		PluginManager:  nil,
+	}
+
+	agentCtx.MetaServer = metaServer
+	return agentCtx
+}
+
+func makeTestBasePlugin(t *testing.T) *baseplugin.BasePlugin {
+	conf := generateTestConfiguration(t)
+	agentCtx := generateTestGenericContext(t, conf)
+
+	tmpDir := t.TempDir()
+	conf.GenericQRMPluginConfiguration.StateFileDirectory = tmpDir
+	conf.RDMADeviceNames = []string{"test-rdma"}
+
+	basePlugin, err := baseplugin.NewBasePlugin(agentCtx, conf, metrics.DummyMetrics{})
+	assert.NoError(t, err)
+
+	stateImpl, err := state.NewCheckpointState(conf.QRMPluginsConfiguration, tmpDir, "test", "test-policy", state.NewDefaultResourceStateGeneratorRegistry(), true, metrics.DummyMetrics{})
+	assert.NoError(t, err)
+
+	basePlugin.State = stateImpl
+
+	// Register gpu device type and gpu device topology provider as it is an accompany resource for rdma
+	basePlugin.RegisterDeviceNameToType([]string{"test-gpu"}, gpuconsts.GPUDeviceType)
+	gpuTopologyProvider := machine.NewDeviceTopologyProvider([]string{"test-gpu"})
+	basePlugin.DeviceTopologyRegistry.RegisterDeviceTopologyProvider(gpuconsts.GPUDeviceType, gpuTopologyProvider)
+
+	return basePlugin
+}
+
+func TestRDMADevicePlugin_UpdateAllocatableAssociatedDevices(t *testing.T) {
+	t.Parallel()
+
+	basePlugin := makeTestBasePlugin(t)
+	devicePlugin := NewRDMADevicePlugin(basePlugin)
+
+	// Update topology with associated devices
+	req := &pluginapi.UpdateAllocatableAssociatedDevicesRequest{
+		DeviceName: "test-rdma",
+		Devices: []*pluginapi.AssociatedDevice{
+			{
+				ID: "test-rdma-0",
+				Topology: &pluginapi.TopologyInfo{
+					Nodes: []*pluginapi.NUMANode{
+						{
+							ID: 0,
+						},
+					},
+				},
+			},
+			{
+				ID: "test-rdma-1",
+				Topology: &pluginapi.TopologyInfo{
+					Nodes: []*pluginapi.NUMANode{
+						{
+							ID: 1,
+						},
+					},
+				},
+			},
+		},
+	}
+
+	resp, err := devicePlugin.UpdateAllocatableAssociatedDevices(context.Background(), req)
+	assert.NoError(t, err)
+	assert.NotNil(t, resp)
+
+	// Verify device topology is updated
+	gpuDevicePlugin := devicePlugin.(*RDMADevicePlugin)
+	deviceTopology, numaTopologyReady, err := gpuDevicePlugin.DeviceTopologyRegistry.GetDeviceTopology(gpuconsts.RDMADeviceType)
+	assert.NoError(t, err)
+	assert.True(t, numaTopologyReady)
+	assert.NotNil(t, deviceTopology)
+
+	expectedDeviceTopology := &machine.DeviceTopology{
+		Devices: map[string]machine.DeviceInfo{
+			"test-rdma-0": {
+				NumaNodes:      []int{0},
+				DeviceAffinity: make(map[machine.AffinityPriority]machine.DeviceIDs),
+			},
+			"test-rdma-1": {
+				NumaNodes:      []int{1},
+				DeviceAffinity: make(map[machine.AffinityPriority]machine.DeviceIDs),
+			},
+		},
+	}
+
+	assert.Equal(t, expectedDeviceTopology, deviceTopology)
+}
+
+func TestRDMADevicePlugin_AllocateAssociatedDevices(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name                            string
+		podUID                          string
+		containerName                   string
+		allocationInfo                  *state.AllocationInfo
+		accompanyResourceAllocationInfo *state.AllocationInfo
+		accompanyResourceName           string
+		deviceReq                       *pluginapi.DeviceRequest
+		deviceTopology                  *machine.DeviceTopology
+		accompanyDeviceTopology         *machine.DeviceTopology
+		machineState                    state.AllocationResourcesMap
+		expectedErr                     bool
+		expectedResp                    *pluginapi.AssociatedDeviceAllocationResponse
+	}{
+		{
+			name: "Allocation already exists",
+			allocationInfo: &state.AllocationInfo{
+				AllocatedAllocation: state.Allocation{
+					Quantity:  2,
+					NUMANodes: []int{0, 1},
+				},
+				TopologyAwareAllocations: map[string]state.Allocation{
+					"test-rdma-0": {
+						Quantity:  1,
+						NUMANodes: []int{0},
+					},
+					"test-rdma-1": {
+						Quantity:  1,
+						NUMANodes: []int{1},
+					},
+				},
+			},
+			podUID:        string(uuid.NewUUID()),
+			containerName: "test-container",
+			deviceReq: &pluginapi.DeviceRequest{
+				DeviceName:       "test-rdma",
+				AvailableDevices: []string{"test-rdma-2", "test-rdma-3"},
+				ReusableDevices:  []string{"test-rdma-2", "test-rdma-3"},
+				DeviceRequest:    2,
+			},
+			expectedResp: &pluginapi.AssociatedDeviceAllocationResponse{
+				AllocationResult: &pluginapi.AssociatedDeviceAllocation{
+					AllocatedDevices: []string{"test-rdma-0", "test-rdma-1"},
+				},
+			},
+		},
+		{
+			name:          "No accompany resource allocates by best effort, allocate reusable devices first",
+			podUID:        string(uuid.NewUUID()),
+			containerName: "test-container",
+			deviceTopology: &machine.DeviceTopology{
+				Devices: map[string]machine.DeviceInfo{
+					"test-rdma-0": {
+						NumaNodes: []int{0},
+					},
+					"test-rdma-1": {
+						NumaNodes: []int{1},
+					},
+					"test-rdma-2": {
+						NumaNodes: []int{0},
+					},
+					"test-rdma-3": {
+						NumaNodes: []int{1},
+					},
+				},
+			},
+			machineState: state.AllocationResourcesMap{
+				gpuconsts.RDMADeviceType: {
+					"test-rdma-0": {},
+					"test-rdma-1": {},
+					"test-rdma-2": {},
+					"test-rdma-3": {},
+				},
+			},
+			deviceReq: &pluginapi.DeviceRequest{
+				DeviceName:       "test-rdma",
+				ReusableDevices:  []string{"test-rdma-2", "test-rdma-3"},
+				AvailableDevices: []string{"test-rdma-0", "test-rdma-1", "test-rdma-2", "test-rdma-3"},
+				DeviceRequest:    2,
+			},
+			expectedResp: &pluginapi.AssociatedDeviceAllocationResponse{
+				AllocationResult: &pluginapi.AssociatedDeviceAllocation{
+					AllocatedDevices: []string{"test-rdma-2", "test-rdma-3"},
+				},
+			},
+		},
+		{
+			name:          "No accompany resource allocates by best effort, no reusable devices, only allocate available devices with NUMA affinity",
+			podUID:        string(uuid.NewUUID()),
+			containerName: "test-container",
+			deviceTopology: &machine.DeviceTopology{
+				Devices: map[string]machine.DeviceInfo{
+					"test-rdma-0": {
+						NumaNodes: []int{0},
+					},
+					"test-rdma-1": {
+						NumaNodes: []int{1},
+					},
+					"test-rdma-2": {
+						NumaNodes: []int{0},
+					},
+					"test-rdma-3": {
+						NumaNodes: []int{1},
+					},
+				},
+			},
+			machineState: state.AllocationResourcesMap{
+				gpuconsts.RDMADeviceType: {
+					"test-rdma-0": {},
+					"test-rdma-1": {},
+					"test-rdma-2": {},
+					"test-rdma-3": {},
+				},
+			},
+			deviceReq: &pluginapi.DeviceRequest{
+				DeviceName:       "test-rdma",
+				ReusableDevices:  nil,
+				AvailableDevices: []string{"test-rdma-0", "test-rdma-1", "test-rdma-2", "test-rdma-3"},
+				DeviceRequest:    2,
+				Hint: &pluginapi.TopologyHint{
+					Nodes: []uint64{0},
+				},
+			},
+			expectedResp: &pluginapi.AssociatedDeviceAllocationResponse{
+				AllocationResult: &pluginapi.AssociatedDeviceAllocation{
+					AllocatedDevices: []string{"test-rdma-0", "test-rdma-2"},
+				},
+			},
+		},
+		{
+			name:          "No accompany resource allocates by best effort, no reusable devices, skip devices that are already allocated",
+			podUID:        string(uuid.NewUUID()),
+			containerName: "test-container",
+			deviceTopology: &machine.DeviceTopology{
+				Devices: map[string]machine.DeviceInfo{
+					"test-rdma-0": {
+						NumaNodes: []int{0},
+					},
+					"test-rdma-1": {
+						NumaNodes: []int{1},
+					},
+					"test-rdma-2": {
+						NumaNodes: []int{0},
+					},
+					"test-rdma-3": {
+						NumaNodes: []int{1},
+					},
+				},
+			},
+			machineState: state.AllocationResourcesMap{
+				gpuconsts.RDMADeviceType: {
+					"test-rdma-0": {
+						PodEntries: map[string]state.ContainerEntries{
+							"pod-uid2": {
+								"test-container": {
+									AllocatedAllocation: state.Allocation{
+										Quantity: 1,
+									},
+								},
+							},
+						},
+					},
+					"test-rdma-1": {
+						PodEntries: map[string]state.ContainerEntries{
+							"pod-uid3": {
+								"test-container": {
+									AllocatedAllocation: state.Allocation{
+										Quantity: 1,
+									},
+								},
+							},
+						},
+					},
+					"test-rdma-2": {},
+					"test-rdma-3": {},
+				},
+			},
+			deviceReq: &pluginapi.DeviceRequest{
+				DeviceName:       "test-rdma",
+				ReusableDevices:  nil,
+				AvailableDevices: []string{"test-rdma-0", "test-rdma-1", "test-rdma-2", "test-rdma-3"},
+				DeviceRequest:    2,
+				Hint: &pluginapi.TopologyHint{
+					Nodes: []uint64{0, 1},
+				},
+			},
+			expectedResp: &pluginapi.AssociatedDeviceAllocationResponse{
+				AllocationResult: &pluginapi.AssociatedDeviceAllocation{
+					AllocatedDevices: []string{"test-rdma-2", "test-rdma-3"},
+				},
+			},
+		},
+		{
+			name:          "Accompany resource has been allocated",
+			podUID:        "test-pod",
+			containerName: "test-container",
+			deviceTopology: &machine.DeviceTopology{
+				Devices: map[string]machine.DeviceInfo{
+					"test-rdma-0": {
+						NumaNodes: []int{0},
+					},
+					"test-rdma-1": {
+						NumaNodes: []int{1},
+					},
+					"test-rdma-2": {
+						NumaNodes: []int{0},
+					},
+					"test-rdma-3": {
+						NumaNodes: []int{1},
+					},
+				},
+			},
+			// Ratio of 1 rdma device per 2 gpu devices
+			accompanyDeviceTopology: &machine.DeviceTopology{
+				Devices: map[string]machine.DeviceInfo{
+					"test-gpu-0": {
+						NumaNodes: []int{0},
+					},
+					"test-gpu-1": {
+						NumaNodes: []int{1},
+					},
+					"test-gpu-2": {
+						NumaNodes: []int{0},
+					},
+					"test-gpu-3": {
+						NumaNodes: []int{1},
+					},
+					"test-gpu-4": {
+						NumaNodes: []int{0},
+					},
+					"test-gpu-5": {
+						NumaNodes: []int{1},
+					},
+					"test-gpu-6": {
+						NumaNodes: []int{0},
+					},
+					"test-gpu-7": {
+						NumaNodes: []int{1},
+					},
+				},
+			},
+			accompanyResourceName: "test-gpu",
+			accompanyResourceAllocationInfo: &state.AllocationInfo{
+				AllocatedAllocation: state.Allocation{
+					Quantity: 4,
+				},
+				TopologyAwareAllocations: map[string]state.Allocation{
+					"test-gpu-0": {
+						Quantity: 1,
+					},
+					"test-gpu-2": {
+						Quantity: 1,
+					},
+					"test-gpu-4": {
+						Quantity: 1,
+					},
+					"test-gpu-6": {
+						Quantity: 1,
+					},
+				},
+			},
+			machineState: state.AllocationResourcesMap{
+				gpuconsts.RDMADeviceType: {
+					"test-rdma-0": {},
+					"test-rdma-1": {},
+					"test-rdma-2": {},
+					"test-rdma-3": {},
+				},
+				gpuconsts.GPUDeviceType: {
+					"test-gpu-0": {
+						PodEntries: map[string]state.ContainerEntries{
+							"test-pod": {
+								"test-container": {
+									AllocatedAllocation: state.Allocation{
+										Quantity: 1,
+									},
+								},
+							},
+						},
+					},
+					"test-gpu-1": {},
+					"test-gpu-2": {
+						PodEntries: map[string]state.ContainerEntries{
+							"test-pod": {
+								"test-container": {
+									AllocatedAllocation: state.Allocation{
+										Quantity: 1,
+									},
+								},
+							},
+						},
+					},
+					"test-gpu-3": {},
+					"test-gpu-4": {
+						PodEntries: map[string]state.ContainerEntries{
+							"test-pod": {
+								"test-container": {
+									AllocatedAllocation: state.Allocation{
+										Quantity: 1,
+									},
+								},
+							},
+						},
+					},
+					"test-gpu-5": {},
+					"test-gpu-6": {
+						PodEntries: map[string]state.ContainerEntries{
+							"test-pod": {
+								"test-container": {
+									AllocatedAllocation: state.Allocation{
+										Quantity: 1,
+									},
+								},
+							},
+						},
+					},
+					"test-gpu-7": {},
+				},
+			},
+			deviceReq: &pluginapi.DeviceRequest{
+				DeviceName:       "test-rdma",
+				ReusableDevices:  nil,
+				AvailableDevices: []string{"test-rdma-0", "test-rdma-1", "test-rdma-2", "test-rdma-3"},
+				DeviceRequest:    2,
+				Hint: &pluginapi.TopologyHint{
+					Nodes: []uint64{0, 1},
+				},
+			},
+			expectedResp: &pluginapi.AssociatedDeviceAllocationResponse{
+				AllocationResult: &pluginapi.AssociatedDeviceAllocation{
+					AllocatedDevices: []string{"test-rdma-0", "test-rdma-2"},
+				},
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+
+			basePlugin := makeTestBasePlugin(t)
+			devicePlugin := NewRDMADevicePlugin(basePlugin)
+
+			if tt.allocationInfo != nil {
+				basePlugin.State.SetAllocationInfo(gpuconsts.RDMADeviceType, tt.podUID, tt.containerName, tt.allocationInfo, false)
+			}
+
+			if tt.accompanyResourceAllocationInfo != nil && tt.accompanyResourceName != "" {
+				accompanyResourceType, err := basePlugin.GetResourceTypeFromDeviceName(tt.accompanyResourceName)
+				assert.NoError(t, err)
+				basePlugin.State.SetAllocationInfo(v1.ResourceName(accompanyResourceType), tt.podUID, tt.containerName, tt.accompanyResourceAllocationInfo, false)
+			}
+
+			if tt.deviceTopology != nil {
+				err := basePlugin.DeviceTopologyRegistry.SetDeviceTopology(gpuconsts.RDMADeviceType, tt.deviceTopology)
+				assert.NoError(t, err)
+			}
+
+			if tt.accompanyResourceName != "" && tt.accompanyDeviceTopology != nil {
+				accompanyResourceType, err := basePlugin.GetResourceTypeFromDeviceName(tt.accompanyResourceName)
+				assert.NoError(t, err)
+				err = basePlugin.DeviceTopologyRegistry.SetDeviceTopology(accompanyResourceType, tt.accompanyDeviceTopology)
+				assert.NoError(t, err)
+			}
+
+			if tt.machineState != nil {
+				basePlugin.State.SetMachineState(tt.machineState, false)
+			}
+
+			resourceReq := &pluginapi.ResourceRequest{
+				PodUid:        tt.podUID,
+				ContainerName: tt.containerName,
+			}
+
+			resp, err := devicePlugin.AllocateAssociatedDevice(context.Background(), resourceReq, tt.deviceReq, tt.accompanyResourceName)
+			if tt.expectedErr {
+				assert.Error(t, err)
+			} else {
+				assert.NoError(t, err)
+				evaluateAllocatedDevicesResult(t, tt.expectedResp, resp)
+
+				// Verify state is updated
+				allocationInfo := basePlugin.State.GetAllocationInfo(gpuconsts.RDMADeviceType, tt.podUID, tt.containerName)
+				assert.NotNil(t, allocationInfo)
+			}
+		})
+	}
+}
+
+func evaluateAllocatedDevicesResult(t *testing.T, expectedResp, actualResp *pluginapi.AssociatedDeviceAllocationResponse) {
+	if expectedResp.AllocationResult == nil && actualResp.AllocationResult == nil {
+		return
+	}
+
+	if expectedResp.AllocationResult != nil && actualResp.AllocationResult == nil {
+		t.Errorf("expected allocation result %v, but got nil", expectedResp.AllocationResult)
+		return
+	}
+
+	if actualResp.AllocationResult != nil && expectedResp.AllocationResult == nil {
+		t.Errorf("expected nil allocation result, but got %v", actualResp.AllocationResult)
+		return
+	}
+
+	assert.ElementsMatch(t, expectedResp.AllocationResult.AllocatedDevices, actualResp.AllocationResult.AllocatedDevices)
+}
diff --git a/pkg/agent/qrm-plugins/gpu/customdeviceplugin/registry/registry.go b/pkg/agent/qrm-plugins/gpu/customdeviceplugin/registry/registry.go
new file mode 100644
index 0000000000..5a52bf7552
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/customdeviceplugin/registry/registry.go
@@ -0,0 +1,41 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package registry
+
+import (
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/baseplugin"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/customdeviceplugin"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/customdeviceplugin/gpu"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/customdeviceplugin/rdma"
+)
+
+type initFunc func(plugin *baseplugin.BasePlugin) customdeviceplugin.CustomDevicePlugin
+
+var customDevicePluginsMap = make(map[string]initFunc)
+
+func RegisterCustomDevicePlugin(deviceName string, initFunc initFunc) {
+	customDevicePluginsMap[deviceName] = initFunc
+}
+
+func GetRegisteredCustomDevicePlugin() map[string]initFunc {
+	return customDevicePluginsMap
+}
+
+func init() {
+	RegisterCustomDevicePlugin(gpu.GPUCustomDevicePluginName, gpu.NewGPUDevicePlugin)
+	RegisterCustomDevicePlugin(rdma.RDMACustomDevicePluginName, rdma.NewRDMADevicePlugin)
+}
diff --git a/pkg/agent/qrm-plugins/gpu/gpu.go b/pkg/agent/qrm-plugins/gpu/gpu.go
new file mode 100644
index 0000000000..62010cd178
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/gpu.go
@@ -0,0 +1,27 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gpu
+
+import (
+	"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent/qrm"
+	gpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/consts"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/staticpolicy"
+)
+
+func init() {
+	qrm.RegisterGPUPolicyInitializer(gpuconsts.GPUResourcePluginPolicyNameStatic, staticpolicy.NewStaticPolicy)
+}
diff --git a/pkg/agent/qrm-plugins/gpu/resourceplugin/gpumemory/gpu_mem.go b/pkg/agent/qrm-plugins/gpu/resourceplugin/gpumemory/gpu_mem.go
new file mode 100644
index 0000000000..eac2833070
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/resourceplugin/gpumemory/gpu_mem.go
@@ -0,0 +1,642 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gpumemory
+
+import (
+	"context"
+	"fmt"
+	"math"
+	"sort"
+	"sync"
+
+	deviceplugin "k8s.io/kubelet/pkg/apis/deviceplugin/v1alpha"
+	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	"github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1"
+	"github.com/kubewharf/katalyst-api/pkg/consts"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/baseplugin"
+	gpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/consts"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/resourceplugin"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate/manager"
+	gpuutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/util"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
+	"github.com/kubewharf/katalyst-core/pkg/metrics"
+	"github.com/kubewharf/katalyst-core/pkg/util/general"
+	"github.com/kubewharf/katalyst-core/pkg/util/machine"
+	"github.com/kubewharf/katalyst-core/pkg/util/metric"
+	qosutil "github.com/kubewharf/katalyst-core/pkg/util/qos"
+)
+
+type GPUMemPlugin struct {
+	sync.Mutex
+	*baseplugin.BasePlugin
+}
+
+func NewGPUMemPlugin(base *baseplugin.BasePlugin) resourceplugin.ResourcePlugin {
+	base.DefaultResourceStateGeneratorRegistry.RegisterResourceStateGenerator(string(consts.ResourceGPUMemory),
+		state.NewGenericDefaultResourceStateGenerator(gpuconsts.GPUDeviceType, base.DeviceTopologyRegistry))
+	return &GPUMemPlugin{
+		BasePlugin: base,
+	}
+}
+
+func (p *GPUMemPlugin) ResourceName() string {
+	return string(consts.ResourceGPUMemory)
+}
+
+func (p *GPUMemPlugin) GetTopologyHints(ctx context.Context, req *pluginapi.ResourceRequest) (resp *pluginapi.ResourceHintsResponse, err error) {
+	// if not numa binding, return nil hints to let kubelet choose numa node randomly
+	if !qosutil.AnnotationsIndicateNUMABinding(req.Annotations) {
+		return util.PackResourceHintsResponse(req, string(consts.ResourceGPUMemory),
+			map[string]*pluginapi.ListOfTopologyHints{
+				string(consts.ResourceGPUMemory): nil, // indicates that there is no numa preference
+			})
+	}
+
+	qosLevel, err := util.GetKatalystQoSLevelFromResourceReq(p.Conf.QoSConfiguration, req, p.PodAnnotationKeptKeys, p.PodLabelKeptKeys)
+	if err != nil {
+		err = fmt.Errorf("GetKatalystQoSLevelFromResourceReq for pod: %s/%s, container: %s failed with error: %v",
+			req.PodNamespace, req.PodName, req.ContainerName, err)
+		general.Errorf("%s", err.Error())
+		return nil, err
+	}
+
+	_, gpuMemory, err := util.GetQuantityFromResourceRequests(req.ResourceRequests, p.ResourceName(), false)
+	if err != nil {
+		return nil, fmt.Errorf("getReqQuantityFromResourceReq failed with error: %v", err)
+	}
+
+	general.InfoS("called",
+		"podNamespace", req.PodNamespace,
+		"podName", req.PodName,
+		"containerName", req.ContainerName,
+		"qosLevel", qosLevel,
+		"reqAnnotations", req.Annotations,
+		"gpuMemory", gpuMemory)
+
+	p.Lock()
+	defer func() {
+		if err := p.State.StoreState(); err != nil {
+			general.ErrorS(err, "store state failed", "podName", req.PodName, "containerName", req.ContainerName)
+		}
+		p.Unlock()
+		if err != nil {
+			metricTags := []metrics.MetricTag{
+				{Key: "error_message", Val: metric.MetricTagValueFormat(err)},
+			}
+			_ = p.Emitter.StoreInt64(util.MetricNameGetTopologyHintsFailed, 1, metrics.MetricTypeNameRaw, metricTags...)
+		}
+	}()
+
+	var hints map[string]*pluginapi.ListOfTopologyHints
+	machineState := p.State.GetMachineState()[consts.ResourceGPUMemory]
+	allocationInfo := p.State.GetAllocationInfo(consts.ResourceGPUMemory, req.PodUid, req.ContainerName)
+
+	if allocationInfo != nil {
+		hints = regenerateGPUMemoryHints(allocationInfo, false)
+
+		// regenerateHints failed. need to clear container record and re-calculate.
+		if hints == nil {
+			podEntries := p.State.GetPodEntries(consts.ResourceGPUMemory)
+			delete(podEntries[req.PodUid], req.ContainerName)
+			if len(podEntries[req.PodUid]) == 0 {
+				delete(podEntries, req.PodUid)
+			}
+
+			var err error
+			machineState, err = p.GenerateResourceStateFromPodEntries(string(consts.ResourceGPUMemory), podEntries)
+			if err != nil {
+				general.Errorf("pod: %s/%s, container: %s GenerateMachineStateFromPodEntries failed with error: %v",
+					req.PodNamespace, req.PodName, req.ContainerName, err)
+				return nil, fmt.Errorf("GenerateMachineStateFromPodEntries failed with error: %v", err)
+			}
+		}
+	}
+
+	gpuCount, gpuNames, err := gpuutil.GetGPUCount(req, p.Conf.GPUDeviceNames)
+	if err != nil {
+		general.Errorf("getGPUCount failed from req %v with error: %v", req, err)
+		return nil, fmt.Errorf("getGPUCount failed with error: %v", err)
+	}
+
+	general.Infof("gpuCount: %f, gpuNames: %v", gpuCount, gpuNames.List())
+
+	// otherwise, calculate hint for container without allocated memory
+	if hints == nil {
+		var calculateErr error
+		// calculate hint for container without allocated cpus
+		hints, calculateErr = p.calculateHints(gpuMemory, gpuCount, machineState, req)
+		if calculateErr != nil {
+			return nil, fmt.Errorf("calculateHints failed with error: %v", calculateErr)
+		}
+	}
+
+	return util.PackResourceHintsResponse(req, p.ResourceName(), hints)
+}
+
+func (p *GPUMemPlugin) calculateHints(
+	gpuMemory float64, gpuReq float64, machineState state.AllocationMap, req *pluginapi.ResourceRequest,
+) (map[string]*pluginapi.ListOfTopologyHints, error) {
+	gpuTopology, numaTopologyReady, err := p.DeviceTopologyRegistry.GetDeviceTopology(gpuconsts.GPUDeviceType)
+	if err != nil {
+		return nil, err
+	}
+
+	if !numaTopologyReady {
+		return nil, fmt.Errorf("numa topology is not ready")
+	}
+
+	perGPUMemory := gpuMemory / gpuReq
+	general.Infof("gpuMemory: %f, gpuReq: %f, perGPUMemory: %f", gpuMemory, gpuReq, perGPUMemory)
+
+	numaToAvailableGPUCount := make(map[int]float64)
+	numaToMostAllocatedGPUMemory := make(map[int]float64)
+	for gpuID, info := range gpuTopology.Devices {
+		if info.Health != deviceplugin.Healthy {
+			continue
+		}
+
+		s := machineState[gpuID]
+		// todo: get allocated quantity according to qos level
+		allocated := s.GetQuantityAllocated()
+		if allocated+perGPUMemory <= float64(p.Conf.GPUMemoryAllocatablePerGPU.Value()) {
+			for _, numaNode := range info.GetNUMANodes() {
+				numaToAvailableGPUCount[numaNode] += 1
+				numaToMostAllocatedGPUMemory[numaNode] = math.Max(allocated, numaToMostAllocatedGPUMemory[numaNode])
+			}
+		}
+	}
+
+	numaNodes := make([]int, 0, p.MetaServer.NumNUMANodes)
+	for numaNode := range p.MetaServer.NUMAToCPUs {
+		numaNodes = append(numaNodes, numaNode)
+	}
+	sort.Ints(numaNodes)
+
+	minNUMAsCountNeeded, _, err := gpuutil.GetNUMANodesCountToFitGPUReq(gpuReq, p.MetaServer.CPUTopology, gpuTopology)
+	if err != nil {
+		return nil, err
+	}
+
+	numaCountPerSocket, err := p.MetaServer.NUMAsPerSocket()
+	if err != nil {
+		return nil, fmt.Errorf("NUMAsPerSocket failed with error: %v", err)
+	}
+
+	numaBound := len(numaNodes)
+	if numaBound > machine.LargeNUMAsPoint {
+		// [TODO]: to discuss refine minNUMAsCountNeeded+1
+		numaBound = minNUMAsCountNeeded + 1
+	}
+
+	var availableNumaHints []*pluginapi.TopologyHint
+	machine.IterateBitMasks(numaNodes, numaBound, func(mask machine.BitMask) {
+		maskCount := mask.Count()
+		if maskCount < minNUMAsCountNeeded {
+			return
+		}
+
+		maskBits := mask.GetBits()
+		numaCountNeeded := mask.Count()
+
+		allAvailableGPUsCountInMask := float64(0)
+		for _, nodeID := range maskBits {
+			allAvailableGPUsCountInMask += numaToAvailableGPUCount[nodeID]
+		}
+
+		if allAvailableGPUsCountInMask < gpuReq {
+			return
+		}
+
+		crossSockets, err := machine.CheckNUMACrossSockets(maskBits, p.MetaServer.CPUTopology)
+		if err != nil {
+			return
+		} else if numaCountNeeded <= numaCountPerSocket && crossSockets {
+			return
+		}
+
+		preferred := maskCount == minNUMAsCountNeeded
+		availableNumaHints = append(availableNumaHints, &pluginapi.TopologyHint{
+			Nodes:     machine.MaskToUInt64Array(mask),
+			Preferred: preferred,
+		})
+	})
+
+	// prefer numa nodes with most allocated gpu memory
+	p.preferGPUMemoryMostAllocatedHints(availableNumaHints, numaToMostAllocatedGPUMemory)
+
+	// NOTE: because grpc is inability to distinguish between an empty array and nil,
+	//       we return an error instead of an empty array.
+	//       we should resolve this issue if we need to manage multi-resource in one plugin.
+	if len(availableNumaHints) == 0 {
+		general.Warningf("got no available gpu memory hints for pod: %s/%s, container: %s",
+			req.PodNamespace, req.PodName, req.ContainerName)
+		return nil, gpuutil.ErrNoAvailableGPUMemoryHints
+	}
+
+	return map[string]*pluginapi.ListOfTopologyHints{
+		p.ResourceName(): {
+			Hints: availableNumaHints,
+		},
+	}, nil
+}
+
+func (p *GPUMemPlugin) preferGPUMemoryMostAllocatedHints(
+	hints []*pluginapi.TopologyHint, numaToMostAllocatedGPUMemory map[int]float64,
+) {
+	hintGPUMemoryMostAllocated := make(map[int]float64)
+	for index, hint := range hints {
+		if !hint.Preferred {
+			continue
+		}
+
+		gpuMemoryMostAllocated := float64(0)
+		for _, nodeID := range hint.Nodes {
+			gpuMemoryMostAllocated = math.Max(gpuMemoryMostAllocated, numaToMostAllocatedGPUMemory[int(nodeID)])
+		}
+		hintGPUMemoryMostAllocated[index] = gpuMemoryMostAllocated
+	}
+
+	mostAllocatedHintIndex := -1
+	for index, hint := range hints {
+		if !hint.Preferred {
+			continue
+		}
+
+		if mostAllocatedHintIndex == -1 || hintGPUMemoryMostAllocated[index] > hintGPUMemoryMostAllocated[mostAllocatedHintIndex] {
+			mostAllocatedHintIndex = index
+		}
+	}
+
+	if mostAllocatedHintIndex < 0 {
+		return
+	}
+
+	for index, hint := range hints {
+		if !hint.Preferred || mostAllocatedHintIndex == index {
+			continue
+		}
+		hint.Preferred = false
+	}
+}
+
+func (p *GPUMemPlugin) GetTopologyAwareResources(ctx context.Context, podUID, containerName string) (*pluginapi.GetTopologyAwareResourcesResponse, error) {
+	general.InfofV(4, "called")
+
+	allocationInfo := p.State.GetAllocationInfo(consts.ResourceGPUMemory, podUID, containerName)
+	if allocationInfo == nil {
+		return nil, nil
+	}
+
+	topologyAwareQuantityList := make([]*pluginapi.TopologyAwareQuantity, 0, len(allocationInfo.TopologyAwareAllocations))
+	for deviceID, alloc := range allocationInfo.TopologyAwareAllocations {
+		perNUMAAllocated := alloc.Quantity
+		if len(alloc.NUMANodes) > 0 {
+			perNUMAAllocated = alloc.Quantity / float64(len(alloc.NUMANodes))
+			for _, nodeID := range alloc.NUMANodes {
+				if nodeID < 0 {
+					nodeID = 0
+				}
+				topologyAwareQuantityList = append(topologyAwareQuantityList, &pluginapi.TopologyAwareQuantity{
+					Node:          uint64(nodeID),
+					ResourceValue: perNUMAAllocated,
+					Name:          deviceID,
+					Type:          string(v1alpha1.TopologyTypeGPU),
+					Annotations: map[string]string{
+						consts.ResourceAnnotationKeyResourceIdentifier: "",
+					},
+				})
+			}
+		} else {
+			topologyAwareQuantityList = append(topologyAwareQuantityList, &pluginapi.TopologyAwareQuantity{
+				ResourceValue: perNUMAAllocated,
+				Name:          deviceID,
+				Type:          string(v1alpha1.TopologyTypeGPU),
+				Annotations: map[string]string{
+					consts.ResourceAnnotationKeyResourceIdentifier: "",
+				},
+			})
+		}
+	}
+
+	resp := &pluginapi.GetTopologyAwareResourcesResponse{
+		PodUid:       podUID,
+		PodName:      allocationInfo.PodName,
+		PodNamespace: allocationInfo.PodNamespace,
+		ContainerTopologyAwareResources: &pluginapi.ContainerTopologyAwareResources{
+			ContainerName: containerName,
+			AllocatedResources: map[string]*pluginapi.TopologyAwareResource{
+				p.ResourceName(): {
+					IsNodeResource:                    true,
+					IsScalarResource:                  true,
+					AggregatedQuantity:                allocationInfo.AllocatedAllocation.Quantity,
+					OriginalAggregatedQuantity:        allocationInfo.AllocatedAllocation.Quantity,
+					TopologyAwareQuantityList:         topologyAwareQuantityList,
+					OriginalTopologyAwareQuantityList: topologyAwareQuantityList,
+				},
+			},
+		},
+	}
+
+	return resp, nil
+}
+
+func (p *GPUMemPlugin) GetTopologyAwareAllocatableResources(ctx context.Context) (*gpuconsts.AllocatableResource, error) {
+	general.InfofV(4, "called")
+
+	p.Lock()
+	defer p.Unlock()
+
+	gpuTopology, numaTopologyReady, err := p.DeviceTopologyRegistry.GetDeviceTopology(gpuconsts.GPUDeviceType)
+	if err != nil {
+		return nil, err
+	}
+
+	if !numaTopologyReady {
+		return nil, fmt.Errorf("numa topology is not ready")
+	}
+
+	topologyAwareAllocatableQuantityList := make([]*pluginapi.TopologyAwareQuantity, 0, len(gpuTopology.Devices))
+	topologyAwareCapacityQuantityList := make([]*pluginapi.TopologyAwareQuantity, 0, len(gpuTopology.Devices))
+	var aggregatedAllocatableQuantity, aggregatedCapacityQuantity float64
+	for deviceID, deviceInfo := range gpuTopology.Devices {
+		gpuMemoryAllocatablePerGPU := float64(p.Conf.GPUMemoryAllocatablePerGPU.Value())
+		gpuMemoryCapacityPerGPU := float64(p.Conf.GPUMemoryAllocatablePerGPU.Value())
+		if deviceInfo.Health != deviceplugin.Healthy || !p.ShareGPUManager.EnableShareGPU(deviceID) {
+			// if the device is not healthy or not enabled to share, then set allocatable to 0
+			gpuMemoryAllocatablePerGPU = 0
+		}
+		aggregatedAllocatableQuantity += gpuMemoryAllocatablePerGPU
+		aggregatedCapacityQuantity += gpuMemoryCapacityPerGPU
+		if len(deviceInfo.NumaNodes) > 0 {
+			gpuMemoryAllocatablePerGPU = gpuMemoryAllocatablePerGPU / float64(len(deviceInfo.NumaNodes))
+			gpuMemoryCapacityPerGPU = gpuMemoryCapacityPerGPU / float64(len(deviceInfo.NumaNodes))
+			for _, numaID := range deviceInfo.NumaNodes {
+				if numaID < 0 {
+					numaID = 0
+				}
+				topologyAwareAllocatableQuantityList = append(topologyAwareAllocatableQuantityList, &pluginapi.TopologyAwareQuantity{
+					ResourceValue: gpuMemoryAllocatablePerGPU,
+					Name:          deviceID,
+					Node:          uint64(numaID),
+					Type:          string(v1alpha1.TopologyTypeGPU),
+					Annotations: map[string]string{
+						consts.ResourceAnnotationKeyResourceIdentifier: "",
+					},
+				})
+				topologyAwareCapacityQuantityList = append(topologyAwareCapacityQuantityList, &pluginapi.TopologyAwareQuantity{
+					ResourceValue: gpuMemoryCapacityPerGPU,
+					Name:          deviceID,
+					Node:          uint64(numaID),
+					Type:          string(v1alpha1.TopologyTypeGPU),
+					Annotations: map[string]string{
+						consts.ResourceAnnotationKeyResourceIdentifier: "",
+					},
+				})
+			}
+		} else {
+			// if deviceInfo.NumaNodes is empty, then it means the device is not NUMA aware
+			topologyAwareAllocatableQuantityList = append(topologyAwareAllocatableQuantityList, &pluginapi.TopologyAwareQuantity{
+				ResourceValue: gpuMemoryAllocatablePerGPU,
+				Name:          deviceID,
+				Type:          string(v1alpha1.TopologyTypeGPU),
+				Annotations: map[string]string{
+					consts.ResourceAnnotationKeyResourceIdentifier: "",
+				},
+			})
+			topologyAwareCapacityQuantityList = append(topologyAwareCapacityQuantityList, &pluginapi.TopologyAwareQuantity{
+				ResourceValue: gpuMemoryCapacityPerGPU,
+				Name:          deviceID,
+				Type:          string(v1alpha1.TopologyTypeGPU),
+				Annotations: map[string]string{
+					consts.ResourceAnnotationKeyResourceIdentifier: "",
+				},
+			})
+		}
+	}
+
+	return &gpuconsts.AllocatableResource{
+		ResourceName: p.ResourceName(),
+		AllocatableTopologyAwareResource: &pluginapi.AllocatableTopologyAwareResource{
+			IsNodeResource:                       true,
+			IsScalarResource:                     true,
+			AggregatedAllocatableQuantity:        aggregatedAllocatableQuantity,
+			TopologyAwareAllocatableQuantityList: topologyAwareAllocatableQuantityList,
+			AggregatedCapacityQuantity:           aggregatedCapacityQuantity,
+			TopologyAwareCapacityQuantityList:    topologyAwareCapacityQuantityList,
+		},
+	}, nil
+}
+
+func (p *GPUMemPlugin) Allocate(
+	ctx context.Context, resourceReq *pluginapi.ResourceRequest, deviceReq *pluginapi.DeviceRequest,
+) (*pluginapi.ResourceAllocationResponse, error) {
+	quantity, exists := resourceReq.ResourceRequests[p.ResourceName()]
+	if !exists || quantity == 0 {
+		general.InfoS("No GPU memory annotation detected and no GPU memory requested, returning empty response",
+			"podNamespace", resourceReq.PodNamespace,
+			"podName", resourceReq.PodName,
+			"containerName", resourceReq.ContainerName)
+		return util.CreateEmptyAllocationResponse(resourceReq, p.ResourceName()), nil
+	}
+
+	qosLevel, err := util.GetKatalystQoSLevelFromResourceReq(p.Conf.QoSConfiguration, resourceReq, p.PodAnnotationKeptKeys, p.PodLabelKeptKeys)
+	if err != nil {
+		err = fmt.Errorf("GetKatalystQoSLevelFromResourceReq for pod: %s/%s, container: %s failed with error: %v",
+			resourceReq.PodNamespace, resourceReq.PodName, resourceReq.ContainerName, err)
+		general.Errorf("%s", err.Error())
+		return nil, err
+	}
+
+	_, gpuMemory, err := util.GetQuantityFromResourceRequests(resourceReq.ResourceRequests, p.ResourceName(), false)
+	if err != nil {
+		return nil, fmt.Errorf("getReqQuantityFromResourceReq failed with error: %v", err)
+	}
+
+	general.InfoS("called",
+		"podNamespace", resourceReq.PodNamespace,
+		"podName", resourceReq.PodName,
+		"containerName", resourceReq.ContainerName,
+		"qosLevel", qosLevel,
+		"reqAnnotations", resourceReq.Annotations,
+		"gpuMemory", gpuMemory,
+		"deviceReq", deviceReq.String())
+
+	p.Lock()
+	defer func() {
+		if err := p.State.StoreState(); err != nil {
+			general.ErrorS(err, "store state failed", "podName", resourceReq.PodName, "containerName", resourceReq.ContainerName)
+		}
+		p.Unlock()
+		if err != nil {
+			metricTags := []metrics.MetricTag{
+				{Key: "error_message", Val: metric.MetricTagValueFormat(err)},
+			}
+			_ = p.Emitter.StoreInt64(util.MetricNameAllocateFailed, 1, metrics.MetricTypeNameRaw, metricTags...)
+		}
+	}()
+
+	// currently, not to deal with init containers
+	if resourceReq.ContainerType == pluginapi.ContainerType_INIT {
+		return util.CreateEmptyAllocationResponse(resourceReq, p.ResourceName()), nil
+	} else if resourceReq.ContainerType == pluginapi.ContainerType_SIDECAR {
+		// not to deal with sidecars, and return a trivial allocationResult to avoid re-allocating
+		return p.PackAllocationResponse(resourceReq, &state.AllocationInfo{}, nil, p.ResourceName())
+	}
+
+	allocationInfo := p.State.GetAllocationInfo(consts.ResourceGPUMemory, resourceReq.PodUid, resourceReq.ContainerName)
+	if allocationInfo != nil {
+		resp, packErr := p.PackAllocationResponse(resourceReq, allocationInfo, nil, p.ResourceName())
+		if packErr != nil {
+			general.Errorf("pod: %s/%s, container: %s packAllocationResponse failed with error: %v",
+				resourceReq.PodNamespace, resourceReq.PodName, resourceReq.ContainerName, packErr)
+			return nil, fmt.Errorf("packAllocationResponse failed with error: %w", packErr)
+		}
+		return resp, nil
+	}
+
+	if deviceReq == nil {
+		general.InfoS("Nil device request, returning empty response",
+			"podNamespace", resourceReq.PodNamespace,
+			"podName", resourceReq.PodName,
+			"containerName", resourceReq.ContainerName)
+		// if deviceReq is nil, return empty response to re-allocate after device allocation
+		return util.CreateEmptyAllocationResponse(resourceReq, p.ResourceName()), nil
+	}
+
+	general.Infof("deviceReq: %v", deviceReq.String())
+
+	// Get GPU topology
+	gpuTopology, numaTopologyReady, err := p.DeviceTopologyRegistry.GetDeviceTopology(gpuconsts.GPUDeviceType)
+	if err != nil {
+		general.Warningf("failed to get gpu topology: %v", err)
+		return nil, fmt.Errorf("failed to get gpu topology: %v", err)
+	}
+
+	if !numaTopologyReady {
+		general.Warningf("numa topology is not ready")
+		return nil, fmt.Errorf("numa topology is not ready")
+	}
+
+	// Use the strategy framework to allocate GPU memory
+	result, err := manager.AllocateGPUUsingStrategy(
+		resourceReq,
+		deviceReq,
+		gpuTopology,
+		p.Conf.GPUQRMPluginConfig,
+		p.Emitter,
+		p.MetaServer,
+		p.State.GetMachineState(),
+		qosLevel,
+	)
+	if err != nil {
+		return nil, fmt.Errorf("GPU allocation using strategy failed: %v", err)
+	}
+
+	if !result.Success {
+		return nil, fmt.Errorf("GPU allocation failed: %s", result.ErrorMessage)
+	}
+
+	// get hint nodes from request
+	hintNodes, err := machine.NewCPUSetUint64(resourceReq.GetHint().GetNodes()...)
+	if err != nil {
+		general.Warningf("failed to get hint nodes: %v", err)
+		return nil, fmt.Errorf("failed to get hint nodes: %w", err)
+	}
+
+	newAllocation := &state.AllocationInfo{
+		AllocationMeta: commonstate.GenerateGenericContainerAllocationMeta(resourceReq, commonstate.EmptyOwnerPoolName, qosLevel),
+		AllocatedAllocation: state.Allocation{
+			Quantity:  gpuMemory,
+			NUMANodes: hintNodes.ToSliceInt(),
+		},
+		TopologyAwareAllocations: make(map[string]state.Allocation),
+	}
+
+	gpuMemoryPerGPU := gpuMemory / float64(deviceReq.DeviceRequest)
+	for _, deviceID := range result.AllocatedDevices {
+		info, ok := gpuTopology.Devices[deviceID]
+		if !ok {
+			return nil, fmt.Errorf("failed to get gpu info for device: %s", deviceID)
+		}
+
+		newAllocation.TopologyAwareAllocations[deviceID] = state.Allocation{
+			Quantity:  gpuMemoryPerGPU,
+			NUMANodes: info.NumaNodes,
+		}
+	}
+
+	// Set allocation info in state
+	p.State.SetAllocationInfo(consts.ResourceGPUMemory, resourceReq.PodUid, resourceReq.ContainerName, newAllocation, false)
+
+	machineState, stateErr := p.GenerateResourceStateFromPodEntries(string(consts.ResourceGPUMemory), nil)
+	if stateErr != nil {
+		general.ErrorS(stateErr, "GenerateResourceStateFromPodEntries failed",
+			"podNamespace", resourceReq.PodNamespace,
+			"podName", resourceReq.PodName,
+			"containerName", resourceReq.ContainerName,
+			"gpuMemory", gpuMemory)
+		return nil, fmt.Errorf("GenerateResourceStateFromPodEntries failed with error: %v", stateErr)
+	}
+
+	// update state cache
+	p.State.SetResourceState(consts.ResourceGPUMemory, machineState, true)
+
+	return p.PackAllocationResponse(resourceReq, newAllocation, nil, p.ResourceName())
+}
+
+// regenerateGPUMemoryHints regenerates hints for container that'd already been allocated gpu memory,
+// and regenerateHints will assemble hints based on already-existed AllocationInfo,
+// without any calculation logics at all
+func regenerateGPUMemoryHints(
+	allocationInfo *state.AllocationInfo, regenerate bool,
+) map[string]*pluginapi.ListOfTopologyHints {
+	if allocationInfo == nil {
+		general.Errorf("RegenerateHints got nil allocationInfo")
+		return nil
+	}
+
+	hints := map[string]*pluginapi.ListOfTopologyHints{}
+	if regenerate {
+		general.ErrorS(nil, "need to regenerate hints",
+			"podNamespace", allocationInfo.PodNamespace,
+			"podName", allocationInfo.PodName,
+			"podUID", allocationInfo.PodUid,
+			"containerName", allocationInfo.ContainerName)
+
+		return nil
+	}
+
+	allocatedNumaNodes := machine.NewCPUSet(allocationInfo.AllocatedAllocation.NUMANodes...)
+
+	general.InfoS("regenerating machineInfo hints, gpu memory was already allocated to pod",
+		"podNamespace", allocationInfo.PodNamespace,
+		"podName", allocationInfo.PodName,
+		"containerName", allocationInfo.ContainerName,
+		"hint", allocatedNumaNodes)
+	hints[string(consts.ResourceGPUMemory)] = &pluginapi.ListOfTopologyHints{
+		Hints: []*pluginapi.TopologyHint{
+			{
+				Nodes:     allocatedNumaNodes.ToSliceUInt64(),
+				Preferred: true,
+			},
+		},
+	}
+	return hints
+}
diff --git a/pkg/agent/qrm-plugins/gpu/resourceplugin/interface.go b/pkg/agent/qrm-plugins/gpu/resourceplugin/interface.go
new file mode 100644
index 0000000000..f368b40939
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/resourceplugin/interface.go
@@ -0,0 +1,40 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package resourceplugin
+
+import (
+	"context"
+
+	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	gpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/consts"
+)
+
+// ResourcePlugin knows how to handle resource requests for a specific resource type.
+type ResourcePlugin interface {
+	ResourceName() string
+
+	GetTopologyHints(ctx context.Context, request *pluginapi.ResourceRequest) (*pluginapi.ResourceHintsResponse, error)
+
+	GetTopologyAwareResources(ctx context.Context, podUID, containerName string) (*pluginapi.GetTopologyAwareResourcesResponse, error)
+
+	GetTopologyAwareAllocatableResources(ctx context.Context) (*gpuconsts.AllocatableResource, error)
+
+	Allocate(
+		ctx context.Context, resourceReq *pluginapi.ResourceRequest, deviceReq *pluginapi.DeviceRequest,
+	) (*pluginapi.ResourceAllocationResponse, error)
+}
diff --git a/pkg/agent/qrm-plugins/gpu/resourceplugin/registry/registry.go b/pkg/agent/qrm-plugins/gpu/resourceplugin/registry/registry.go
new file mode 100644
index 0000000000..a7cefbaf7e
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/resourceplugin/registry/registry.go
@@ -0,0 +1,40 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package registry
+
+import (
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/baseplugin"
+	gpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/consts"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/resourceplugin"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/resourceplugin/gpumemory"
+)
+
+type InitFunc func(plugin *baseplugin.BasePlugin) resourceplugin.ResourcePlugin
+
+var resourcePluginsMap = make(map[string]InitFunc)
+
+func RegisterResourcePlugin(pluginName string, initFunc InitFunc) {
+	resourcePluginsMap[pluginName] = initFunc
+}
+
+func GetRegisteredResourcePlugin() map[string]InitFunc {
+	return resourcePluginsMap
+}
+
+func init() {
+	RegisterResourcePlugin(gpuconsts.GPUMemPluginName, gpumemory.NewGPUMemPlugin)
+}
diff --git a/pkg/agent/qrm-plugins/gpu/resourceplugin/resource_plugin_stub.go b/pkg/agent/qrm-plugins/gpu/resourceplugin/resource_plugin_stub.go
new file mode 100644
index 0000000000..57b28089c6
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/resourceplugin/resource_plugin_stub.go
@@ -0,0 +1,82 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package resourceplugin
+
+import (
+	"context"
+	"fmt"
+
+	v1 "k8s.io/api/core/v1"
+	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/baseplugin"
+	gpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/consts"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state"
+)
+
+type ResourcePluginStub struct {
+	*baseplugin.BasePlugin
+}
+
+func NewResourcePluginStub(base *baseplugin.BasePlugin) ResourcePlugin {
+	return &ResourcePluginStub{BasePlugin: base}
+}
+
+func (r ResourcePluginStub) ResourceName() string {
+	return "resource-plugin-stub"
+}
+
+func (r ResourcePluginStub) GetTopologyHints(context.Context, *pluginapi.ResourceRequest) (*pluginapi.ResourceHintsResponse, error) {
+	return &pluginapi.ResourceHintsResponse{}, nil
+}
+
+func (r ResourcePluginStub) GetTopologyAwareResources(_ context.Context, podUID, containerName string) (*pluginapi.GetTopologyAwareResourcesResponse, error) {
+	// Simply returns a fixed response if the podUID and containerName is found in the state, otherwise return an error
+	allocationInfo := r.State.GetAllocationInfo(v1.ResourceName(r.ResourceName()), podUID, containerName)
+	if allocationInfo == nil {
+		return nil, fmt.Errorf("allocationInfo is nil")
+	}
+
+	// Simply returns a fixed response
+	return &pluginapi.GetTopologyAwareResourcesResponse{
+		ContainerTopologyAwareResources: &pluginapi.ContainerTopologyAwareResources{
+			AllocatedResources: map[string]*pluginapi.TopologyAwareResource{
+				r.ResourceName(): {},
+			},
+		},
+	}, nil
+}
+
+func (r ResourcePluginStub) GetTopologyAwareAllocatableResources(context.Context) (*gpuconsts.AllocatableResource, error) {
+	// Simply return a fixed response
+	return &gpuconsts.AllocatableResource{
+		ResourceName: r.ResourceName(),
+	}, nil
+}
+
+func (r ResourcePluginStub) Allocate(_ context.Context, resourceReq *pluginapi.ResourceRequest, deviceReq *pluginapi.DeviceRequest) (*pluginapi.ResourceAllocationResponse, error) {
+	// Simply save resource request and device request in state
+	if resourceReq.ResourceName == r.ResourceName() {
+		r.State.SetAllocationInfo(v1.ResourceName(r.ResourceName()), resourceReq.PodUid, resourceReq.ContainerName, &state.AllocationInfo{}, false)
+	}
+
+	if deviceReq != nil {
+		r.State.SetAllocationInfo(v1.ResourceName(deviceReq.DeviceName), resourceReq.PodUid, resourceReq.ContainerName, &state.AllocationInfo{}, false)
+	}
+
+	return &pluginapi.ResourceAllocationResponse{}, nil
+}
diff --git a/pkg/agent/qrm-plugins/gpu/state/checkpoint.go b/pkg/agent/qrm-plugins/gpu/state/checkpoint.go
new file mode 100644
index 0000000000..fcd518ff97
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/state/checkpoint.go
@@ -0,0 +1,62 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package state
+
+import (
+	"encoding/json"
+
+	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
+	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
+)
+
+var _ checkpointmanager.Checkpoint = &GPUPluginCheckpoint{}
+
+type GPUPluginCheckpoint struct {
+	PolicyName         string                 `json:"policyName"`
+	MachineState       AllocationResourcesMap `json:"machineState"`
+	PodResourceEntries PodResourceEntries     `json:"pod_entries"`
+	Checksum           checksum.Checksum      `json:"checksum"`
+}
+
+func NewGPUPluginCheckpoint() *GPUPluginCheckpoint {
+	return &GPUPluginCheckpoint{
+		PodResourceEntries: make(PodResourceEntries),
+		MachineState:       make(AllocationResourcesMap),
+	}
+}
+
+// MarshalCheckpoint returns marshaled checkpoint
+func (cp *GPUPluginCheckpoint) MarshalCheckpoint() ([]byte, error) {
+	// make sure checksum wasn't set before, so it doesn't affect output checksum
+	cp.Checksum = 0
+	cp.Checksum = checksum.New(cp)
+	return json.Marshal(*cp)
+}
+
+// UnmarshalCheckpoint tries to unmarshal passed bytes to checkpoint
+func (cp *GPUPluginCheckpoint) UnmarshalCheckpoint(blob []byte) error {
+	return json.Unmarshal(blob, cp)
+}
+
+// VerifyChecksum verifies that current checksum of checkpoint is valid
+func (cp *GPUPluginCheckpoint) VerifyChecksum() error {
+	ck := cp.Checksum
+	cp.Checksum = 0
+	err := ck.Verify(cp)
+	cp.Checksum = ck
+	return err
+}
diff --git a/pkg/agent/qrm-plugins/gpu/state/interface.go b/pkg/agent/qrm-plugins/gpu/state/interface.go
new file mode 100644
index 0000000000..753a3efa6a
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/state/interface.go
@@ -0,0 +1,60 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package state
+
+import (
+	v1 "k8s.io/api/core/v1"
+)
+
+// reader is used to get information from local states
+type reader interface {
+	GetMachineState() AllocationResourcesMap
+	GetPodResourceEntries() PodResourceEntries
+	GetPodEntries(resourceName v1.ResourceName) PodEntries
+	GetAllocationInfo(resourceName v1.ResourceName, podUID, containerName string) *AllocationInfo
+}
+
+// writer is used to store information into local states,
+// and it also provides functionality to maintain the local files
+type writer interface {
+	SetMachineState(allocationResourcesMap AllocationResourcesMap, persist bool)
+	SetResourceState(resourceName v1.ResourceName, allocationMap AllocationMap, persist bool)
+	SetPodResourceEntries(podResourceEntries PodResourceEntries, persist bool)
+	SetAllocationInfo(
+		resourceName v1.ResourceName, podUID, containerName string, allocationInfo *AllocationInfo, persist bool,
+	)
+
+	Delete(resourceName v1.ResourceName, podUID, containerName string, persist bool)
+	ClearState()
+	StoreState() error
+}
+
+// DefaultResourceStateGenerator interface is used to generate default resource state for each resource
+type DefaultResourceStateGenerator interface {
+	GenerateDefaultResourceState() (AllocationMap, error)
+}
+
+// ReadonlyState interface only provides methods for tracking pod assignments
+type ReadonlyState interface {
+	reader
+}
+
+// State interface provides methods for tracking and setting pod assignments
+type State interface {
+	writer
+	ReadonlyState
+}
diff --git a/pkg/agent/qrm-plugins/gpu/state/state.go b/pkg/agent/qrm-plugins/gpu/state/state.go
new file mode 100644
index 0000000000..7833025b15
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/state/state.go
@@ -0,0 +1,375 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package state
+
+import (
+	"encoding/json"
+	"sync"
+
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/util/sets"
+	"k8s.io/klog/v2"
+
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate"
+	"github.com/kubewharf/katalyst-core/pkg/util/general"
+)
+
+type AllocationInfo struct {
+	commonstate.AllocationMeta `json:",inline"`
+
+	AllocatedAllocation      Allocation            `json:"allocated_allocation"`
+	TopologyAwareAllocations map[string]Allocation `json:"topology_aware_allocations"`
+}
+
+type Allocation struct {
+	// Quantity refers to the amount of device allocated
+	Quantity  float64 `json:"quantity"`
+	NUMANodes []int   `json:"numa_nodes"`
+}
+
+func (a *Allocation) Clone() Allocation {
+	if a == nil {
+		return Allocation{}
+	}
+
+	numaNodes := make([]int, len(a.NUMANodes))
+	copy(numaNodes, a.NUMANodes)
+	return Allocation{
+		Quantity:  a.Quantity,
+		NUMANodes: numaNodes,
+	}
+}
+
+type (
+	ContainerEntries   map[string]*AllocationInfo     // Keyed by container name
+	PodEntries         map[string]ContainerEntries    // Keyed by pod UID
+	PodResourceEntries map[v1.ResourceName]PodEntries // Keyed by resource name
+)
+
+type AllocationState struct {
+	PodEntries PodEntries `json:"pod_entries"`
+}
+
+type AllocationMap map[string]*AllocationState // AllocationMap keyed by device name i.e. GPU-fef8089b-4820-abfc-e83e-94318197576e
+
+type AllocationResourcesMap map[v1.ResourceName]AllocationMap // AllocationResourcesMap keyed by resource name i.e. v1.ResourceName("nvidia.com/gpu")
+
+func (i *AllocationInfo) String() string {
+	if i == nil {
+		return ""
+	}
+
+	contentBytes, err := json.Marshal(i)
+	if err != nil {
+		general.LoggerWithPrefix("AllocationInfo.String", general.LoggingPKGFull).Errorf("marshal AllocationInfo failed with error: %v", err)
+		return ""
+	}
+	return string(contentBytes)
+}
+
+func (i *AllocationInfo) Clone() *AllocationInfo {
+	if i == nil {
+		return nil
+	}
+
+	clone := &AllocationInfo{
+		AllocationMeta:      *i.AllocationMeta.Clone(),
+		AllocatedAllocation: i.AllocatedAllocation.Clone(),
+	}
+
+	if i.TopologyAwareAllocations != nil {
+		clone.TopologyAwareAllocations = make(map[string]Allocation)
+		for k, v := range i.TopologyAwareAllocations {
+			clone.TopologyAwareAllocations[k] = v.Clone()
+		}
+	}
+
+	return clone
+}
+
+func (e PodEntries) String() string {
+	if e == nil {
+		return ""
+	}
+
+	contentBytes, err := json.Marshal(e)
+	if err != nil {
+		general.LoggerWithPrefix("PodEntries.String", general.LoggingPKGFull).Errorf("marshal PodEntries failed with error: %v", err)
+		return ""
+	}
+	return string(contentBytes)
+}
+
+func (e PodEntries) Clone() PodEntries {
+	clone := make(PodEntries)
+	for podUID, containerEntries := range e {
+		clone[podUID] = make(ContainerEntries)
+		for containerName, allocationInfo := range containerEntries {
+			clone[podUID][containerName] = allocationInfo.Clone()
+		}
+	}
+	return clone
+}
+
+func (e PodEntries) GetAllocationInfo(uid string, name string) *AllocationInfo {
+	if e == nil {
+		return nil
+	}
+
+	if containerEntries, ok := e[uid]; ok {
+		if allocationInfo, ok := containerEntries[name]; ok {
+			return allocationInfo.Clone()
+		}
+	}
+	return nil
+}
+
+func (e PodEntries) SetAllocationInfo(podUID string, containerName string, allocationInfo *AllocationInfo) {
+	if e == nil {
+		return
+	}
+
+	if _, ok := e[podUID]; !ok {
+		e[podUID] = make(ContainerEntries)
+	}
+
+	e[podUID][containerName] = allocationInfo.Clone()
+}
+
+func (pre PodResourceEntries) String() string {
+	if pre == nil {
+		return ""
+	}
+
+	contentBytes, err := json.Marshal(pre)
+	if err != nil {
+		general.LoggerWithPrefix("PodResourceEntries.String", general.LoggingPKGFull).Errorf("[PodResourceEntries.String] marshal PodResourceEntries failed with error: %v", err)
+		return ""
+	}
+	return string(contentBytes)
+}
+
+func (pre PodResourceEntries) Clone() PodResourceEntries {
+	if pre == nil {
+		return nil
+	}
+
+	clone := make(PodResourceEntries)
+	for resourceName, podEntries := range pre {
+		clone[resourceName] = podEntries.Clone()
+	}
+	return clone
+}
+
+func (pre PodResourceEntries) RemovePod(podUID string) {
+	if pre == nil {
+		return
+	}
+
+	for _, podEntries := range pre {
+		delete(podEntries, podUID)
+	}
+}
+
+// GetTotalAllocatedResourceOfContainer returns the total allocated resource quantity of a container together with
+// the specific resource IDs that are allocated.
+func (pre PodResourceEntries) GetTotalAllocatedResourceOfContainer(
+	resourceName v1.ResourceName, podUID, containerName string,
+) (int, sets.String) {
+	if podEntries, ok := pre[resourceName]; ok {
+		if allocationInfo := podEntries.GetAllocationInfo(podUID, containerName); allocationInfo != nil {
+			totalAllocationQuantity := int(allocationInfo.AllocatedAllocation.Quantity)
+			allocationIDs := sets.NewString()
+			for id := range allocationInfo.TopologyAwareAllocations {
+				allocationIDs.Insert(id)
+			}
+			return totalAllocationQuantity, allocationIDs
+		}
+	}
+	return 0, nil
+}
+
+func (as *AllocationState) String() string {
+	if as == nil {
+		return ""
+	}
+
+	contentBytes, err := json.Marshal(as)
+	if err != nil {
+		general.LoggerWithPrefix("AllocationState.String", general.LoggingPKGFull).Errorf("[AllocationState.String]marshal AllocationState failed with error: %v", err)
+		return ""
+	}
+	return string(contentBytes)
+}
+
+func (as *AllocationState) Clone() *AllocationState {
+	if as == nil {
+		return nil
+	}
+
+	return &AllocationState{
+		PodEntries: as.PodEntries.Clone(),
+	}
+}
+
+func (as *AllocationState) SetAllocationInfo(podUID string, containerName string, allocationInfo *AllocationInfo) {
+	if as == nil {
+		return
+	}
+
+	if as.PodEntries == nil {
+		as.PodEntries = make(PodEntries)
+	}
+
+	if _, ok := as.PodEntries[podUID]; !ok {
+		as.PodEntries[podUID] = make(ContainerEntries)
+	}
+
+	as.PodEntries[podUID][containerName] = allocationInfo.Clone()
+}
+
+func (am AllocationMap) String() string {
+	if am == nil {
+		return ""
+	}
+
+	contentBytes, err := json.Marshal(am)
+	if err != nil {
+		general.LoggerWithPrefix("AllocationMap.String", general.LoggingPKGFull).Errorf("[AllocationMap.String]marshal AllocationMap failed with error: %v", err)
+		return ""
+	}
+	return string(contentBytes)
+}
+
+func (am AllocationMap) Clone() AllocationMap {
+	if am == nil {
+		return nil
+	}
+
+	clone := make(AllocationMap)
+	for id, ns := range am {
+		clone[id] = ns.Clone()
+	}
+	return clone
+}
+
+func (arm AllocationResourcesMap) String() string {
+	if arm == nil {
+		return ""
+	}
+
+	contentBytes, err := json.Marshal(arm)
+	if err != nil {
+		klog.Errorf("[AllocationResourcesMap.String] marshal AllocationResourcesMap failed with error: %v", err)
+		return ""
+	}
+	return string(contentBytes)
+}
+
+func (arm AllocationResourcesMap) Clone() AllocationResourcesMap {
+	clone := make(AllocationResourcesMap)
+	for resourceName, am := range arm {
+		clone[resourceName] = am.Clone()
+	}
+	return clone
+}
+
+// GetRatioOfAccompanyResourceToTargetResource returns the ratio of total accompany resource to total target resource.
+// For example, if the total number of accompany resource is 4 and the total number of target resource is 2,
+// the ratio is 2.
+func (arm AllocationResourcesMap) GetRatioOfAccompanyResourceToTargetResource(accompanyResourceName, targetResourceName string) float64 {
+	// Find the ratio of the total number of accompany resource to the total number of target resource
+	accompanyResourceMap := arm[v1.ResourceName(accompanyResourceName)].Clone()
+	accompanyResourceNumber := accompanyResourceMap.getNumberDevices()
+
+	targetResourceMap := arm[v1.ResourceName(targetResourceName)].Clone()
+	targetResourceNumber := targetResourceMap.getNumberDevices()
+
+	if targetResourceNumber == 0 {
+		return 0
+	}
+
+	return float64(accompanyResourceNumber) / float64(targetResourceNumber)
+}
+
+func (as *AllocationState) GetQuantityAllocated() float64 {
+	if as == nil {
+		return 0
+	}
+
+	quantityAllocated := float64(0)
+	for _, podEntries := range as.PodEntries {
+		for _, allocationInfo := range podEntries {
+			quantityAllocated += allocationInfo.AllocatedAllocation.Quantity
+		}
+	}
+	return quantityAllocated
+}
+
+func (am AllocationMap) GetQuantityAllocated(id string) float64 {
+	if am == nil {
+		return 0
+	}
+
+	return am[id].GetQuantityAllocated()
+}
+
+func (am AllocationMap) IsRequestSatisfied(id string, request float64, allocatable float64) bool {
+	allocated := am.GetQuantityAllocated(id)
+	return allocatable-allocated >= request
+}
+
+func (am AllocationMap) getNumberDevices() int {
+	if am == nil {
+		return 0
+	}
+
+	return len(am)
+}
+
+type DefaultResourceStateGeneratorRegistry struct {
+	mutex      sync.RWMutex
+	generators map[string]DefaultResourceStateGenerator
+}
+
+func NewDefaultResourceStateGeneratorRegistry() *DefaultResourceStateGeneratorRegistry {
+	return &DefaultResourceStateGeneratorRegistry{
+		generators: make(map[string]DefaultResourceStateGenerator),
+	}
+}
+
+func (r *DefaultResourceStateGeneratorRegistry) RegisterResourceStateGenerator(resourceName string, generator DefaultResourceStateGenerator) {
+	r.mutex.Lock()
+	defer r.mutex.Unlock()
+
+	r.generators[resourceName] = generator
+}
+
+func (r *DefaultResourceStateGeneratorRegistry) GetGenerators() map[string]DefaultResourceStateGenerator {
+	r.mutex.RLock()
+	defer r.mutex.RUnlock()
+
+	return r.generators
+}
+
+func (r *DefaultResourceStateGeneratorRegistry) GetGenerator(resourceName string) (DefaultResourceStateGenerator, bool) {
+	r.mutex.RLock()
+	defer r.mutex.RUnlock()
+
+	generator, ok := r.generators[resourceName]
+	return generator, ok
+}
diff --git a/pkg/agent/qrm-plugins/gpu/state/state_checkpoint.go b/pkg/agent/qrm-plugins/gpu/state/state_checkpoint.go
new file mode 100644
index 0000000000..11b9207285
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/state/state_checkpoint.go
@@ -0,0 +1,285 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package state
+
+import (
+	"errors"
+	"fmt"
+	"path"
+	"reflect"
+	"sync"
+	"time"
+
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
+	cmerrors "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
+
+	"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm"
+	"github.com/kubewharf/katalyst-core/pkg/metrics"
+	"github.com/kubewharf/katalyst-core/pkg/util/general"
+)
+
+const (
+	metricMetaCacheStoreStateDuration = "metacache_store_state_duration"
+)
+
+var (
+	_          State = &stateCheckpoint{}
+	generalLog       = general.LoggerWithPrefix("gpu_plugin", general.LoggingPKGFull)
+)
+
+// stateCheckpoint is an in-memory implementation of State;
+// everytime we want to read or write states, those requests will always
+// go to in-memory State, and then go to disk State, i.e. in write-back mode
+type stateCheckpoint struct {
+	sync.RWMutex
+	cache             State
+	policyName        string
+	checkpointManager checkpointmanager.CheckpointManager
+	checkpointName    string
+	// when we add new properties to checkpoint,
+	// it will cause checkpoint corruption and we should skip it
+	skipStateCorruption bool
+	emitter             metrics.MetricEmitter
+}
+
+func (s *stateCheckpoint) SetMachineState(allocationResourcesMap AllocationResourcesMap, persist bool) {
+	s.Lock()
+	defer s.Unlock()
+
+	s.cache.SetMachineState(allocationResourcesMap, persist)
+	if persist {
+		err := s.storeState()
+		if err != nil {
+			generalLog.ErrorS(err, "store machineState to checkpoint error")
+		}
+	}
+}
+
+func (s *stateCheckpoint) SetResourceState(resourceName v1.ResourceName, allocationMap AllocationMap, persist bool) {
+	s.Lock()
+	defer s.Unlock()
+
+	s.cache.SetResourceState(resourceName, allocationMap, persist)
+	if persist {
+		err := s.storeState()
+		if err != nil {
+			generalLog.ErrorS(err, "store resource state to checkpoint error")
+		}
+	}
+}
+
+func (s *stateCheckpoint) SetPodResourceEntries(podResourceEntries PodResourceEntries, persist bool) {
+	s.Lock()
+	defer s.Unlock()
+
+	s.cache.SetPodResourceEntries(podResourceEntries, persist)
+	if persist {
+		err := s.storeState()
+		if err != nil {
+			generalLog.ErrorS(err, "store pod entries to checkpoint error", "err")
+		}
+	}
+}
+
+func (s *stateCheckpoint) SetAllocationInfo(
+	resourceName v1.ResourceName, podUID, containerName string, allocationInfo *AllocationInfo, persist bool,
+) {
+	s.Lock()
+	defer s.Unlock()
+
+	s.cache.SetAllocationInfo(resourceName, podUID, containerName, allocationInfo, persist)
+	if persist {
+		err := s.storeState()
+		if err != nil {
+			generalLog.ErrorS(err, "store allocationInfo to checkpoint error")
+		}
+	}
+}
+
+func (s *stateCheckpoint) Delete(resourceName v1.ResourceName, podUID, containerName string, persist bool) {
+	s.Lock()
+	defer s.Unlock()
+
+	s.cache.Delete(resourceName, podUID, containerName, persist)
+	if persist {
+		err := s.storeState()
+		if err != nil {
+			generalLog.ErrorS(err, "store state after delete operation to checkpoint error")
+		}
+	}
+}
+
+func (s *stateCheckpoint) ClearState() {
+	s.Lock()
+	defer s.Unlock()
+
+	s.cache.ClearState()
+	err := s.storeState()
+	if err != nil {
+		generalLog.ErrorS(err, "store state after clear operation to checkpoint error")
+	}
+}
+
+func (s *stateCheckpoint) StoreState() error {
+	s.Lock()
+	defer s.Unlock()
+	return s.storeState()
+}
+
+func (s *stateCheckpoint) GetMachineState() AllocationResourcesMap {
+	s.RLock()
+	defer s.RUnlock()
+
+	return s.cache.GetMachineState()
+}
+
+func (s *stateCheckpoint) GetPodResourceEntries() PodResourceEntries {
+	s.RLock()
+	defer s.RUnlock()
+
+	return s.cache.GetPodResourceEntries()
+}
+
+func (s *stateCheckpoint) GetPodEntries(resourceName v1.ResourceName) PodEntries {
+	s.RLock()
+	defer s.RUnlock()
+
+	return s.cache.GetPodEntries(resourceName)
+}
+
+func (s *stateCheckpoint) GetAllocationInfo(
+	resourceName v1.ResourceName, podUID, containerName string,
+) *AllocationInfo {
+	s.RLock()
+	defer s.RUnlock()
+
+	return s.cache.GetAllocationInfo(resourceName, podUID, containerName)
+}
+
+func (s *stateCheckpoint) storeState() error {
+	startTime := time.Now()
+	general.InfoS("called")
+	defer func() {
+		elapsed := time.Since(startTime)
+		general.InfoS("finished", "duration", elapsed)
+		_ = s.emitter.StoreFloat64(metricMetaCacheStoreStateDuration, float64(elapsed/time.Millisecond), metrics.MetricTypeNameRaw)
+	}()
+	checkpoint := NewGPUPluginCheckpoint()
+	checkpoint.PolicyName = s.policyName
+	checkpoint.MachineState = s.cache.GetMachineState()
+	checkpoint.PodResourceEntries = s.cache.GetPodResourceEntries()
+
+	err := s.checkpointManager.CreateCheckpoint(s.checkpointName, checkpoint)
+	if err != nil {
+		generalLog.ErrorS(err, "could not save checkpoint")
+		return err
+	}
+	return nil
+}
+
+func (s *stateCheckpoint) restoreState(defaultResourceStateGenerators *DefaultResourceStateGeneratorRegistry) error {
+	s.Lock()
+	defer s.Unlock()
+	var err error
+	var foundAndSkippedStateCorruption bool
+
+	checkpoint := NewGPUPluginCheckpoint()
+	if err = s.checkpointManager.GetCheckpoint(s.checkpointName, checkpoint); err != nil {
+		if errors.Is(err, cmerrors.ErrCheckpointNotFound) {
+			return s.storeState()
+		} else if errors.Is(err, cmerrors.ErrCorruptCheckpoint) {
+			if !s.skipStateCorruption {
+				return err
+			}
+
+			foundAndSkippedStateCorruption = true
+			generalLog.Infof("restore checkpoint failed with err: %s, but we skip it", err)
+		} else {
+			return err
+		}
+	}
+
+	if s.policyName != checkpoint.PolicyName && !s.skipStateCorruption {
+		return fmt.Errorf("configured policy %q differs from state checkpoint policy %q", s.policyName, checkpoint.PolicyName)
+	}
+
+	machineState, err := GenerateMachineStateFromPodEntries(checkpoint.PodResourceEntries, defaultResourceStateGenerators)
+	if err != nil {
+		return fmt.Errorf("GenerateMachineStateFromPodEntries failed with error: %v", err)
+	}
+
+	s.cache.SetMachineState(machineState, false)
+	s.cache.SetPodResourceEntries(checkpoint.PodResourceEntries, false)
+
+	if !reflect.DeepEqual(machineState, checkpoint.MachineState) {
+		generalLog.Warningf("machine state changed: "+
+			"machineState: %s; checkpointMachineState: %s",
+			machineState.String(), checkpoint.MachineState.String())
+
+		err = s.storeState()
+		if err != nil {
+			return fmt.Errorf("storeState when machine state changed failed with error: %v", err)
+		}
+	}
+
+	if foundAndSkippedStateCorruption {
+		generalLog.Infof("found and skipped state corruption, we shoud store to rectify the checksum")
+
+		err = s.storeState()
+		if err != nil {
+			return fmt.Errorf("storeState failed with error: %v", err)
+		}
+	}
+
+	generalLog.InfoS("state checkpoint: restored state from checkpoint")
+
+	return nil
+}
+
+func NewCheckpointState(
+	conf *qrm.QRMPluginsConfiguration, stateDir, checkpointName, policyName string,
+	defaultResourceStateGenerators *DefaultResourceStateGeneratorRegistry,
+	skipStateCorruption bool, emitter metrics.MetricEmitter,
+) (State, error) {
+	checkpointManager, err := checkpointmanager.NewCheckpointManager(stateDir)
+	if err != nil {
+		return nil, fmt.Errorf("failed to initialize checkpoint manager: %v", err)
+	}
+
+	defaultCache, err := NewGPUPluginState(conf, defaultResourceStateGenerators)
+	if err != nil {
+		return nil, fmt.Errorf("NewGPUPluginState failed with error: %v", err)
+	}
+
+	sc := &stateCheckpoint{
+		cache:               defaultCache,
+		policyName:          policyName,
+		checkpointManager:   checkpointManager,
+		checkpointName:      checkpointName,
+		skipStateCorruption: skipStateCorruption,
+		emitter:             emitter,
+	}
+
+	if err := sc.restoreState(defaultResourceStateGenerators); err != nil {
+		return nil, fmt.Errorf("could not restore state from checkpoint: %v, please drain this node and delete "+
+			"the gpu plugin checkpoint file %q before restarting Kubelet",
+			err, path.Join(stateDir, checkpointName))
+	}
+
+	return sc, nil
+}
diff --git a/pkg/agent/qrm-plugins/gpu/state/state_mem.go b/pkg/agent/qrm-plugins/gpu/state/state_mem.go
new file mode 100644
index 0000000000..89793c5a6b
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/state/state_mem.go
@@ -0,0 +1,173 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package state
+
+import (
+	"fmt"
+	"sync"
+
+	v1 "k8s.io/api/core/v1"
+
+	"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm"
+)
+
+// gpuPluginState is an in-memory implementation of State;
+// everytime we want to read or write states, those requests will always
+// go to in-memory State, and then go to disk State, i.e. in write-back mode
+type gpuPluginState struct {
+	sync.RWMutex
+
+	qrmConf                        *qrm.QRMPluginsConfiguration
+	defaultResourceStateGenerators *DefaultResourceStateGeneratorRegistry
+
+	machineState       AllocationResourcesMap
+	podResourceEntries PodResourceEntries
+}
+
+func NewGPUPluginState(
+	conf *qrm.QRMPluginsConfiguration,
+	resourceStateGeneratorRegistry *DefaultResourceStateGeneratorRegistry,
+) (State, error) {
+	generalLog.InfoS("initializing new gpu plugin in-memory state store")
+
+	defaultMachineState, err := GenerateMachineState(resourceStateGeneratorRegistry)
+	if err != nil {
+		return nil, fmt.Errorf("GenerateMachineState failed with error: %w", err)
+	}
+
+	return &gpuPluginState{
+		qrmConf:                        conf,
+		machineState:                   defaultMachineState,
+		defaultResourceStateGenerators: resourceStateGeneratorRegistry,
+		podResourceEntries:             make(PodResourceEntries),
+	}, nil
+}
+
+func (s *gpuPluginState) SetMachineState(allocationResourcesMap AllocationResourcesMap, _ bool) {
+	s.Lock()
+	defer s.Unlock()
+	s.machineState = allocationResourcesMap.Clone()
+	generalLog.InfoS("updated gpu plugin machine state",
+		"GPUMap", allocationResourcesMap.String())
+}
+
+func (s *gpuPluginState) SetResourceState(resourceName v1.ResourceName, allocationMap AllocationMap, _ bool) {
+	s.Lock()
+	defer s.Unlock()
+	s.machineState[resourceName] = allocationMap.Clone()
+	generalLog.InfoS("updated gpu plugin resource state",
+		"resourceName", resourceName,
+		"allocationMap", allocationMap.String())
+}
+
+func (s *gpuPluginState) SetPodResourceEntries(podResourceEntries PodResourceEntries, _ bool) {
+	s.Lock()
+	defer s.Unlock()
+	s.podResourceEntries = podResourceEntries.Clone()
+}
+
+func (s *gpuPluginState) SetAllocationInfo(
+	resourceName v1.ResourceName, podUID, containerName string, allocationInfo *AllocationInfo, _ bool,
+) {
+	s.Lock()
+	defer s.Unlock()
+
+	if _, ok := s.podResourceEntries[resourceName]; !ok {
+		s.podResourceEntries[resourceName] = make(PodEntries)
+	}
+
+	if _, ok := s.podResourceEntries[resourceName][podUID]; !ok {
+		s.podResourceEntries[resourceName][podUID] = make(ContainerEntries)
+	}
+
+	s.podResourceEntries[resourceName][podUID][containerName] = allocationInfo.Clone()
+	generalLog.InfoS("updated gpu plugin pod resource entries",
+		"podUID", podUID,
+		"containerName", containerName,
+		"allocationInfo", allocationInfo.String())
+}
+
+func (s *gpuPluginState) Delete(resourceName v1.ResourceName, podUID, containerName string, _ bool) {
+	s.Lock()
+	defer s.Unlock()
+
+	if _, ok := s.podResourceEntries[resourceName]; !ok {
+		return
+	}
+
+	if _, ok := s.podResourceEntries[resourceName][podUID]; !ok {
+		return
+	}
+
+	delete(s.podResourceEntries[resourceName][podUID], containerName)
+	if len(s.podResourceEntries[resourceName][podUID]) == 0 {
+		delete(s.podResourceEntries[resourceName], podUID)
+	}
+
+	generalLog.InfoS("deleted container entry", "podUID", podUID, "containerName", containerName)
+}
+
+func (s *gpuPluginState) ClearState() {
+	s.Lock()
+	defer s.Unlock()
+
+	machineState, err := GenerateMachineState(s.defaultResourceStateGenerators)
+	if err != nil {
+		generalLog.ErrorS(err, "failed to generate machine state")
+	}
+	s.machineState = machineState
+	s.podResourceEntries = make(PodResourceEntries)
+
+	generalLog.InfoS("cleared state")
+}
+
+func (s *gpuPluginState) StoreState() error {
+	// nothing to do
+	return nil
+}
+
+func (s *gpuPluginState) GetMachineState() AllocationResourcesMap {
+	s.RLock()
+	defer s.RUnlock()
+
+	return s.machineState.Clone()
+}
+
+func (s *gpuPluginState) GetPodResourceEntries() PodResourceEntries {
+	s.RLock()
+	defer s.RUnlock()
+
+	return s.podResourceEntries.Clone()
+}
+
+func (s *gpuPluginState) GetPodEntries(resourceName v1.ResourceName) PodEntries {
+	s.RLock()
+	defer s.RUnlock()
+
+	return s.podResourceEntries[resourceName].Clone()
+}
+
+func (s *gpuPluginState) GetAllocationInfo(resourceName v1.ResourceName, podUID, containerName string) *AllocationInfo {
+	s.RLock()
+	defer s.RUnlock()
+
+	if res, ok := s.podResourceEntries[resourceName][podUID][containerName]; ok {
+		return res.Clone()
+	}
+
+	return nil
+}
diff --git a/pkg/agent/qrm-plugins/gpu/state/state_test.go b/pkg/agent/qrm-plugins/gpu/state/state_test.go
new file mode 100644
index 0000000000..b595b3467b
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/state/state_test.go
@@ -0,0 +1,199 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package state
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/util/sets"
+)
+
+func TestAllocationResourcesMap_GetRatioOfAccompanyResourceToTargetResource(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name                  string
+		accompanyResourceName string
+		targetResourceName    string
+		arm                   AllocationResourcesMap
+		want                  float64
+	}{
+		{
+			name:                  "normal case",
+			accompanyResourceName: "accompanyResource",
+			targetResourceName:    "targetResource",
+			arm: AllocationResourcesMap{
+				v1.ResourceName("accompanyResource"): {
+					"accompany1": {},
+					"accompany2": {},
+					"accompany3": {},
+					"accompany4": {},
+				},
+				v1.ResourceName("targetResource"): {
+					"target1": {},
+					"target2": {},
+				},
+			},
+			want: 2.0,
+		},
+		{
+			name:                  "got a ratio that is a fraction",
+			accompanyResourceName: "accompanyResource",
+			targetResourceName:    "targetResource",
+			arm: AllocationResourcesMap{
+				v1.ResourceName("accompanyResource"): {
+					"accompany1": {},
+					"accompany2": {},
+				},
+				v1.ResourceName("targetResource"): {
+					"target1": {},
+					"target2": {},
+					"target3": {},
+					"target4": {},
+				},
+			},
+			want: 0.5,
+		},
+		{
+			name:                  "no devices for target resource",
+			accompanyResourceName: "accompanyResource",
+			targetResourceName:    "targetResource",
+			arm: AllocationResourcesMap{
+				v1.ResourceName("accompanyResource"): {
+					"accompany1": {},
+					"accompany2": {},
+				},
+			},
+			want: 0,
+		},
+		{
+			name:                  "no devices for accompany resource and target resource",
+			accompanyResourceName: "accompanyResource",
+			targetResourceName:    "targetResource",
+			arm:                   AllocationResourcesMap{},
+			want:                  0,
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			got := tt.arm.GetRatioOfAccompanyResourceToTargetResource(tt.accompanyResourceName, tt.targetResourceName)
+			if got != tt.want {
+				t.Errorf("GetRatioOfAccompanyResourceToTargetResource() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestPodResourceEntries_GetTotalAllocatedResourceOfContainer(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name                        string
+		resourceName                v1.ResourceName
+		podUID                      string
+		containerName               string
+		pre                         PodResourceEntries
+		wantTotalAllocationQuantity int
+		wantAllocationIDs           sets.String
+	}{
+		{
+			name:          "normal case",
+			resourceName:  v1.ResourceName("testResource"),
+			podUID:        "podUID",
+			containerName: "containerName",
+			pre: PodResourceEntries{
+				v1.ResourceName("testResource"): {
+					"podUID2": {
+						"containerName": {
+							AllocatedAllocation: Allocation{
+								Quantity: 2,
+							},
+							TopologyAwareAllocations: map[string]Allocation{
+								"test-1": {
+									Quantity: 1,
+								},
+								"test-2": {
+									Quantity: 1,
+								},
+							},
+						},
+					},
+					"podUID": {
+						"containerName": {
+							AllocatedAllocation: Allocation{
+								Quantity: 2,
+							},
+							TopologyAwareAllocations: map[string]Allocation{
+								"test-3": {
+									Quantity: 1,
+								},
+								"test-4": {
+									Quantity: 1,
+								},
+							},
+						},
+					},
+				},
+			},
+			wantTotalAllocationQuantity: 2,
+			wantAllocationIDs:           sets.NewString("test-3", "test-4"),
+		},
+		{
+			name:          "no allocation",
+			resourceName:  v1.ResourceName("testResource"),
+			podUID:        "podUID",
+			containerName: "containerName",
+			pre: PodResourceEntries{
+				v1.ResourceName("testResource"): {
+					"podUID2": {
+						"containerName": {
+							AllocatedAllocation: Allocation{
+								Quantity: 2,
+							},
+							TopologyAwareAllocations: map[string]Allocation{
+								"test-1": {
+									Quantity: 1,
+								},
+								"test-2": {
+									Quantity: 1,
+								},
+							},
+						},
+					},
+				},
+			},
+			wantTotalAllocationQuantity: 0,
+			wantAllocationIDs:           sets.NewString(),
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			gotTotalAllocationQuantity, gotAllocationIDs := tt.pre.GetTotalAllocatedResourceOfContainer(tt.resourceName, tt.podUID, tt.containerName)
+			assert.Equal(t, tt.wantTotalAllocationQuantity, gotTotalAllocationQuantity)
+
+			assert.ElementsMatch(t, tt.wantAllocationIDs.UnsortedList(), gotAllocationIDs.UnsortedList())
+		})
+	}
+}
diff --git a/pkg/agent/qrm-plugins/gpu/state/util.go b/pkg/agent/qrm-plugins/gpu/state/util.go
new file mode 100644
index 0000000000..8863220cdb
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/state/util.go
@@ -0,0 +1,140 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package state
+
+import (
+	"fmt"
+
+	v1 "k8s.io/api/core/v1"
+
+	"github.com/kubewharf/katalyst-core/pkg/util/machine"
+)
+
+// GenerateMachineState returns an empty AllocationResourcesMap for all resource names.
+func GenerateMachineState(
+	defaultMachineStateGenerators *DefaultResourceStateGeneratorRegistry,
+) (AllocationResourcesMap, error) {
+	if defaultMachineStateGenerators == nil {
+		return nil, fmt.Errorf("cannot generate machine state from nil defaultMachineStateGenerators")
+	}
+
+	allocationResourcesMap := make(AllocationResourcesMap)
+	for resourceName, generator := range defaultMachineStateGenerators.GetGenerators() {
+		allocationMap, err := generator.GenerateDefaultResourceState()
+		if err != nil {
+			return nil, fmt.Errorf("GenerateDefaultResourceState for resource %s failed with error: %v", resourceName, err)
+		}
+		allocationResourcesMap[v1.ResourceName(resourceName)] = allocationMap
+	}
+
+	return allocationResourcesMap, nil
+}
+
+// GenerateMachineStateFromPodEntries returns AllocationResourcesMap for allocated resources based on
+// resource name along with existed pod resource entries.
+func GenerateMachineStateFromPodEntries(
+	podResourceEntries PodResourceEntries,
+	defaultMachineStateGenerators *DefaultResourceStateGeneratorRegistry,
+) (AllocationResourcesMap, error) {
+	if defaultMachineStateGenerators == nil {
+		return nil, fmt.Errorf("cannot generate machine state from nil resourceStateGeneratorRegistry")
+	}
+
+	machineState := make(AllocationResourcesMap)
+	for resourceName, podEntries := range podResourceEntries {
+		generator, ok := defaultMachineStateGenerators.GetGenerator(string(resourceName))
+		if !ok {
+			return nil, fmt.Errorf("GetGenerator for resource %s failed", resourceName)
+		}
+
+		allocationMap, err := GenerateResourceStateFromPodEntries(podEntries, generator)
+		if err != nil {
+			return nil, fmt.Errorf("GenerateResourceStateFromPodEntries for resource %s failed with error: %v", resourceName, err)
+		}
+		machineState[resourceName] = allocationMap
+	}
+
+	return machineState, nil
+}
+
+// GenerateResourceStateFromPodEntries returns an AllocationMap of a certain resource based on pod entries
+func GenerateResourceStateFromPodEntries(
+	podEntries PodEntries,
+	generator DefaultResourceStateGenerator,
+) (AllocationMap, error) {
+	machineState, err := generator.GenerateDefaultResourceState()
+	if err != nil {
+		return nil, fmt.Errorf("GenerateDefaultResourceState failed with error: %v", err)
+	}
+
+	for deviceID, allocationState := range machineState {
+		for podUID, containerEntries := range podEntries {
+			for containerName, allocationInfo := range containerEntries {
+				if containerName != "" && allocationInfo != nil {
+					allocation, ok := allocationInfo.TopologyAwareAllocations[deviceID]
+					if !ok {
+						continue
+					}
+					alloc := allocationInfo.Clone()
+					alloc.AllocatedAllocation = allocation.Clone()
+					alloc.TopologyAwareAllocations = map[string]Allocation{deviceID: allocation}
+					allocationState.SetAllocationInfo(podUID, containerName, alloc)
+				}
+			}
+		}
+		machineState[deviceID] = allocationState
+	}
+
+	return machineState, nil
+}
+
+type genericDefaultResourceStateGenerator struct {
+	resourceName     string
+	topologyRegistry *machine.DeviceTopologyRegistry
+}
+
+func NewGenericDefaultResourceStateGenerator(
+	resourceName string,
+	topologyRegistry *machine.DeviceTopologyRegistry,
+) DefaultResourceStateGenerator {
+	return &genericDefaultResourceStateGenerator{resourceName: resourceName, topologyRegistry: topologyRegistry}
+}
+
+// GenerateDefaultResourceState return a default resource state by topology
+func (g *genericDefaultResourceStateGenerator) GenerateDefaultResourceState() (AllocationMap, error) {
+	if g == nil {
+		return nil, fmt.Errorf("nil DefaultResourceStateGenerator")
+	}
+
+	if g.topologyRegistry == nil {
+		return nil, fmt.Errorf("topology provider registry must not be nil")
+	}
+
+	topology, _, err := g.topologyRegistry.GetDeviceTopology(g.resourceName)
+	if err != nil {
+		return nil, fmt.Errorf("topology provider registry failed with error: %v", err)
+	}
+
+	resourceState := make(AllocationMap)
+	for deviceID := range topology.Devices {
+		resourceState[deviceID] = &AllocationState{
+			PodEntries: make(PodEntries),
+		}
+	}
+
+	return resourceState, nil
+}
diff --git a/pkg/agent/qrm-plugins/gpu/staticpolicy/policy.go b/pkg/agent/qrm-plugins/gpu/staticpolicy/policy.go
new file mode 100644
index 0000000000..1a91438820
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/staticpolicy/policy.go
@@ -0,0 +1,757 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package staticpolicy
+
+import (
+	"context"
+	"fmt"
+	"sync"
+	"time"
+
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/util/sets"
+	"k8s.io/apimachinery/pkg/util/wait"
+	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	"github.com/kubewharf/katalyst-api/pkg/consts"
+	"github.com/kubewharf/katalyst-api/pkg/plugins/skeleton"
+	"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent"
+	appqrm "github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent/qrm"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/baseplugin"
+	gpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/consts"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/customdeviceplugin"
+	devicepluginregistry "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/customdeviceplugin/registry"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/resourceplugin"
+	resourcepluginregistry "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/resourceplugin/registry"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
+	"github.com/kubewharf/katalyst-core/pkg/agent/utilcomponent/periodicalhandler"
+	"github.com/kubewharf/katalyst-core/pkg/config"
+	dynamicconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver"
+	"github.com/kubewharf/katalyst-core/pkg/metrics"
+	"github.com/kubewharf/katalyst-core/pkg/util/general"
+)
+
+// StaticPolicy is the static gpu policy
+type StaticPolicy struct {
+	sync.RWMutex
+	pluginapi.UnimplementedResourcePluginServer
+	*baseplugin.BasePlugin
+
+	name    string
+	stopCh  chan struct{}
+	started bool
+
+	emitter metrics.MetricEmitter
+
+	residualHitMap map[string]int64
+
+	associatedDeviceNames sets.String
+	resourcePlugins       map[string]resourceplugin.ResourcePlugin
+	customDevicePlugins   map[string]customdeviceplugin.CustomDevicePlugin
+}
+
+// NewStaticPolicy returns a static gpu policy
+func NewStaticPolicy(
+	agentCtx *agent.GenericContext, conf *config.Configuration,
+	_ interface{}, agentName string,
+) (bool, agent.Component, error) {
+	wrappedEmitter := agentCtx.EmitterPool.GetDefaultMetricsEmitter().WithTags(agentName, metrics.MetricTag{
+		Key: util.QRMPluginPolicyTagName,
+		Val: gpuconsts.GPUResourcePluginPolicyNameStatic,
+	})
+
+	basePlugin, err := baseplugin.NewBasePlugin(agentCtx, conf, wrappedEmitter)
+	if err != nil {
+		return false, agent.ComponentStub{}, fmt.Errorf("failed to create base plugin: %w", err)
+	}
+
+	policyImplement := &StaticPolicy{
+		emitter:               wrappedEmitter,
+		stopCh:                make(chan struct{}),
+		name:                  fmt.Sprintf("%s_%s", agentName, gpuconsts.GPUResourcePluginPolicyNameStatic),
+		residualHitMap:        make(map[string]int64),
+		BasePlugin:            basePlugin,
+		resourcePlugins:       make(map[string]resourceplugin.ResourcePlugin),
+		associatedDeviceNames: sets.NewString(),
+		customDevicePlugins:   make(map[string]customdeviceplugin.CustomDevicePlugin),
+	}
+
+	if err = policyImplement.registerDefaultResourcePlugins(); err != nil {
+		return false, agent.ComponentStub{}, fmt.Errorf("failed to register resource plugins: %w", err)
+	}
+	if err = policyImplement.registerDefaultCustomDevicePlugins(); err != nil {
+		return false, agent.ComponentStub{}, fmt.Errorf("failed to register custom device plugins: %w", err)
+	}
+
+	// init state must be done after resource plugins and custom device plugins are registered
+	err = policyImplement.InitState()
+	if err != nil {
+		return false, agent.ComponentStub{}, fmt.Errorf("failed to init state: %w", err)
+	}
+
+	pluginWrapper, err := skeleton.NewRegistrationPluginWrapper(policyImplement, conf.QRMPluginSocketDirs,
+		func(key string, value int64) {
+			_ = wrappedEmitter.StoreInt64(key, value, metrics.MetricTypeNameRaw)
+		})
+	if err != nil {
+		return false, agent.ComponentStub{}, fmt.Errorf("static policy new plugin wrapper failed with error: %v", err)
+	}
+
+	return true, &agent.PluginWrapper{GenericPlugin: pluginWrapper}, nil
+}
+
+var _ skeleton.QRMPlugin = (*StaticPolicy)(nil)
+
+// Start starts this plugin
+func (p *StaticPolicy) Start() (err error) {
+	general.Infof("called")
+
+	p.Lock()
+	defer func() {
+		if !p.started {
+			if err == nil {
+				p.started = true
+			} else {
+				close(p.stopCh)
+			}
+		}
+		p.Unlock()
+	}()
+
+	if p.started {
+		general.Infof("already started")
+		return nil
+	}
+
+	p.stopCh = make(chan struct{})
+
+	go wait.Until(func() {
+		_ = p.emitter.StoreInt64(util.MetricNameHeartBeat, 1, metrics.MetricTypeNameRaw)
+	}, time.Second*30, p.stopCh)
+
+	err = periodicalhandler.RegisterPeriodicalHandlerWithHealthz(gpuconsts.ClearResidualState, general.HealthzCheckStateNotReady,
+		appqrm.QRMGPUPluginPeriodicalHandlerGroupName, p.clearResidualState, gpuconsts.StateCheckPeriod, gpuconsts.StateCheckTolerationTimes)
+	if err != nil {
+		general.Errorf("start %v failed, err: %v", gpuconsts.ClearResidualState, err)
+	}
+
+	err = p.BasePlugin.Run(p.stopCh)
+	if err != nil {
+		return fmt.Errorf("share gpu manager run failed with error: %w", err)
+	}
+
+	go wait.Until(func() {
+		periodicalhandler.ReadyToStartHandlersByGroup(appqrm.QRMGPUPluginPeriodicalHandlerGroupName)
+	}, 5*time.Second, p.stopCh)
+
+	return nil
+}
+
+// Stop stops this plugin
+func (p *StaticPolicy) Stop() error {
+	p.Lock()
+	defer func() {
+		p.started = false
+		p.Unlock()
+		general.Infof("stopped")
+	}()
+
+	if !p.started {
+		general.Warningf("already stopped")
+		return nil
+	}
+
+	close(p.stopCh)
+
+	return nil
+}
+
+// Name returns the name of this plugin
+func (p *StaticPolicy) Name() string {
+	return p.name
+}
+
+// ResourceName returns resource names managed by this plugin
+func (p *StaticPolicy) ResourceName() string {
+	return string(consts.ResourceGPUMemory)
+}
+
+// GetTopologyHints returns hints of corresponding resources
+func (p *StaticPolicy) GetTopologyHints(
+	ctx context.Context,
+	req *pluginapi.ResourceRequest,
+) (resp *pluginapi.ResourceHintsResponse, err error) {
+	general.InfofV(4, "called")
+	if req == nil {
+		return nil, fmt.Errorf("GetTopologyHints got nil req")
+	}
+
+	p.RLock()
+	defer p.RUnlock()
+
+	resourcePlugin := p.getResourcePlugin(req.ResourceName)
+	if resourcePlugin == nil {
+		return nil, fmt.Errorf("failed to find resource plugin by name %s", req.ResourceName)
+	}
+	return resourcePlugin.GetTopologyHints(ctx, req)
+}
+
+// GetPodTopologyHints returns hints of corresponding resources
+func (p *StaticPolicy) GetPodTopologyHints(
+	_ context.Context,
+	req *pluginapi.PodResourceRequest,
+) (resp *pluginapi.PodResourceHintsResponse, err error) {
+	return nil, util.ErrNotImplemented
+}
+
+func (p *StaticPolicy) RemovePod(
+	ctx context.Context,
+	req *pluginapi.RemovePodRequest,
+) (*pluginapi.RemovePodResponse, error) {
+	if req == nil {
+		return nil, fmt.Errorf("RemovePod got nil req")
+	}
+
+	p.Lock()
+	defer p.Unlock()
+
+	// For every resource plugin and custom resource plugin, remove pod from their state
+	if err := p.removePod(req.PodUid); err != nil {
+		general.ErrorS(err, "remove pod failed with error", "podUID", req.PodUid)
+		return nil, err
+	}
+
+	return &pluginapi.RemovePodResponse{}, nil
+}
+
+// GetResourcesAllocation returns allocation results of corresponding resources
+func (p *StaticPolicy) GetResourcesAllocation(
+	_ context.Context,
+	_ *pluginapi.GetResourcesAllocationRequest,
+) (*pluginapi.GetResourcesAllocationResponse, error) {
+	general.InfofV(4, "called")
+	return &pluginapi.GetResourcesAllocationResponse{}, nil
+}
+
+// GetTopologyAwareResources returns allocation results of corresponding resources as topology aware format
+func (p *StaticPolicy) GetTopologyAwareResources(
+	ctx context.Context,
+	req *pluginapi.GetTopologyAwareResourcesRequest,
+) (*pluginapi.GetTopologyAwareResourcesResponse, error) {
+	general.InfofV(4, "called")
+	if req == nil {
+		return nil, fmt.Errorf("GetTopologyAwareResources got nil req")
+	}
+
+	p.RLock()
+	defer p.RUnlock()
+
+	// Get topology aware resources for all resource plugins
+	allocatedResourcesList := make([]*pluginapi.GetTopologyAwareResourcesResponse, 0)
+	for _, resourcePlugin := range p.resourcePlugins {
+		allocatedResource, err := resourcePlugin.GetTopologyAwareResources(ctx, req.PodUid, req.ContainerName)
+		if err != nil {
+			general.Errorf("failed to get topology aware resources for plugin %s: %v", resourcePlugin.ResourceName(), err)
+			continue
+		}
+
+		if allocatedResource == nil {
+			continue
+		}
+		allocatedResourcesList = append(allocatedResourcesList, allocatedResource)
+	}
+
+	// Merge the respective response into one response
+	resp, err := p.mergeTopologyAwareResourcesResponse(req.PodUid, req.ContainerName, allocatedResourcesList)
+	if err != nil {
+		return nil, fmt.Errorf("failed to merge topology aware resources: %w", err)
+	}
+
+	return resp, nil
+}
+
+// mergeTopologyAwareResourcesResponse takes the separate topology aware resources response from the different sub-plugins and
+// merge them into one response.
+func (p *StaticPolicy) mergeTopologyAwareResourcesResponse(
+	podUID, containerName string, respList []*pluginapi.GetTopologyAwareResourcesResponse,
+) (*pluginapi.GetTopologyAwareResourcesResponse, error) {
+	result := &pluginapi.GetTopologyAwareResourcesResponse{
+		PodUid: podUID,
+		ContainerTopologyAwareResources: &pluginapi.ContainerTopologyAwareResources{
+			ContainerName: containerName,
+		},
+	}
+
+	allocatedResources := make(map[string]*pluginapi.TopologyAwareResource)
+	for _, resp := range respList {
+		if resp == nil {
+			continue
+		}
+
+		if result.PodName != "" && result.PodName != resp.PodName {
+			general.Errorf("pod name %s not match, expect %s", resp.PodName, result.PodName)
+			return nil, fmt.Errorf("pod name %s not match, expect %s", resp.PodName, result.PodName)
+		}
+
+		if result.PodNamespace != "" && result.PodNamespace != resp.PodNamespace {
+			general.Errorf("pod namespace %s not match, expect %s", resp.PodNamespace, result.PodNamespace)
+			return nil, fmt.Errorf("pod namespace %s not match, expect %s", resp.PodNamespace, result.PodNamespace)
+		}
+
+		if result.PodName == "" {
+			result.PodName = resp.PodName
+		}
+		if result.PodNamespace == "" {
+			result.PodNamespace = resp.PodNamespace
+		}
+
+		if resp.ContainerTopologyAwareResources == nil {
+			general.Errorf("container topology aware resources is nil for pod %s/%s, namespace %s", podUID, containerName, result.PodNamespace)
+			return nil, fmt.Errorf("container topology aware resources is nil for pod %s/%s, namespace %s", podUID, containerName, result.PodNamespace)
+		}
+
+		for resourceName, resource := range resp.ContainerTopologyAwareResources.AllocatedResources {
+			allocatedResources[resourceName] = resource
+		}
+	}
+
+	result.ContainerTopologyAwareResources.AllocatedResources = allocatedResources
+	return result, nil
+}
+
+// GetTopologyAwareAllocatableResources returns corresponding allocatable resources as topology aware format
+func (p *StaticPolicy) GetTopologyAwareAllocatableResources(
+	ctx context.Context,
+	req *pluginapi.GetTopologyAwareAllocatableResourcesRequest,
+) (*pluginapi.GetTopologyAwareAllocatableResourcesResponse, error) {
+	general.InfofV(4, "called")
+	if req == nil {
+		return nil, fmt.Errorf("GetTopologyAwareAllocatableResources got nil req")
+	}
+
+	p.RLock()
+	defer p.RUnlock()
+
+	// Get topology aware allocatable resources for all resource plugins
+	allocatableResources := make(map[string]*pluginapi.AllocatableTopologyAwareResource)
+	for _, resourcePlugin := range p.resourcePlugins {
+		allocatableResource, err := resourcePlugin.GetTopologyAwareAllocatableResources(ctx)
+		if err != nil {
+			general.Errorf("failed to get topology aware allocatable resources for plugin %s: %v", resourcePlugin.ResourceName(), err)
+			continue
+		}
+
+		if allocatableResource == nil {
+			continue
+		}
+
+		allocatableResources[allocatableResource.ResourceName] = allocatableResource.AllocatableTopologyAwareResource
+	}
+
+	return &pluginapi.GetTopologyAwareAllocatableResourcesResponse{
+		AllocatableResources: allocatableResources,
+	}, nil
+}
+
+// GetResourcePluginOptions returns options to be communicated with Resource Manager
+func (p *StaticPolicy) GetResourcePluginOptions(
+	context.Context,
+	*pluginapi.Empty,
+) (*pluginapi.ResourcePluginOptions, error) {
+	return &pluginapi.ResourcePluginOptions{
+		PreStartRequired:      false,
+		WithTopologyAlignment: true,
+		NeedReconcile:         false,
+		AssociatedDevices:     p.associatedDeviceNames.List(),
+	}, nil
+}
+
+// Allocate is called during pod admit so that the resource
+// plugin can allocate corresponding resource for the container
+// according to resource request
+func (p *StaticPolicy) Allocate(
+	ctx context.Context,
+	req *pluginapi.ResourceRequest,
+) (resp *pluginapi.ResourceAllocationResponse, err error) {
+	if req == nil {
+		return nil, fmt.Errorf("GetTopologyHints got nil req")
+	}
+
+	p.Lock()
+	defer func() {
+		if err != nil {
+			_ = p.removeContainer(req.PodUid, req.ContainerName, v1.ResourceName(req.ResourceName))
+		}
+		p.Unlock()
+	}()
+
+	resourcePlugin := p.getResourcePlugin(req.ResourceName)
+	if resourcePlugin == nil {
+		return nil, fmt.Errorf("failed to get resource plugin by name %s", req.ResourceName)
+	}
+
+	resp, err = resourcePlugin.Allocate(ctx, req, nil)
+	return resp, err
+}
+
+// AllocateForPod is called during pod admit so that the resource
+// plugin can allocate corresponding resource for the pod
+// according to resource request
+func (p *StaticPolicy) AllocateForPod(
+	_ context.Context,
+	req *pluginapi.PodResourceRequest,
+) (resp *pluginapi.PodResourceAllocationResponse, err error) {
+	return nil, util.ErrNotImplemented
+}
+
+// PreStartContainer is called, if indicated by resource plugin during registration phase,
+// before each container start. Resource plugin can run resource specific operations
+// such as resetting the resource before making resources available to the container
+func (p *StaticPolicy) PreStartContainer(
+	context.Context,
+	*pluginapi.PreStartContainerRequest,
+) (*pluginapi.PreStartContainerResponse, error) {
+	return &pluginapi.PreStartContainerResponse{}, nil
+}
+
+func (p *StaticPolicy) removePod(podUID string) error {
+	return p.removeWithUpdate(podUID, func(podResourceEntries state.PodResourceEntries) bool {
+		found := false
+		for _, podEntries := range podResourceEntries {
+			if podEntries[podUID] != nil {
+				found = true
+			}
+			delete(podEntries, podUID)
+		}
+		return found
+	})
+}
+
+// removeContainer removes container entry given the specific podUID, container name and resource name.
+func (p *StaticPolicy) removeContainer(podUID, containerName string, resourceName v1.ResourceName) error {
+	return p.removeWithUpdate(podUID, func(podResourceEntries state.PodResourceEntries) bool {
+		found := false
+		for resource, podEntries := range podResourceEntries {
+			if resourceName != resource {
+				continue
+			}
+
+			if _, ok := podEntries[podUID]; !ok {
+				continue
+			}
+
+			if _, ok := podEntries[podUID][containerName]; ok {
+				found = true
+			}
+			delete(podEntries[podUID], containerName)
+		}
+		return found
+	})
+}
+
+func (p *StaticPolicy) removeWithUpdate(
+	podUID string, removeFn func(podResourceEntries state.PodResourceEntries) bool,
+) error {
+	podResourceEntries := p.State.GetPodResourceEntries()
+
+	found := removeFn(podResourceEntries)
+	if !found {
+		return nil
+	}
+
+	machineState, err := p.GenerateMachineStateFromPodEntries(podResourceEntries)
+	if err != nil {
+		general.Errorf("pod: %s, GenerateMachineStateFromPodEntries failed with error: %v", podUID, err)
+		return fmt.Errorf("calculate machineState by updated pod entries failed with error: %v", err)
+	}
+
+	p.State.SetPodResourceEntries(podResourceEntries, false)
+	p.State.SetMachineState(machineState, false)
+
+	if err := p.State.StoreState(); err != nil {
+		general.Errorf("store state failed with error: %v", err)
+		return err
+	}
+
+	return nil
+}
+
+// clearResidualState is used to clean residual pods in local state
+func (p *StaticPolicy) clearResidualState(
+	_ *config.Configuration,
+	_ interface{},
+	_ *dynamicconfig.DynamicAgentConfiguration,
+	_ metrics.MetricEmitter,
+	_ *metaserver.MetaServer,
+) {
+	general.Infof("exec")
+	var (
+		err     error
+		podList []*v1.Pod
+	)
+	residualSet := make(map[string]bool)
+
+	defer func() {
+		_ = general.UpdateHealthzStateByError(gpuconsts.ClearResidualState, err)
+	}()
+
+	if p.MetaServer == nil {
+		general.Errorf("nil metaServer")
+		return
+	}
+
+	ctx := context.Background()
+	podList, err = p.MetaServer.GetPodList(ctx, nil)
+	if err != nil {
+		general.Errorf("get pod list failed: %v", err)
+		return
+	}
+
+	podSet := sets.NewString()
+	for _, pod := range podList {
+		podSet.Insert(fmt.Sprintf("%v", pod.UID))
+	}
+
+	p.Lock()
+	defer p.Unlock()
+
+	podResourceEntries := p.State.GetPodResourceEntries()
+	for _, podEntries := range podResourceEntries {
+		for podUID := range podEntries {
+			if !podSet.Has(podUID) {
+				residualSet[podUID] = true
+				p.residualHitMap[podUID] += 1
+				general.Infof("found pod: %s with state but doesn't show up in pod watcher, hit count: %d", podUID, p.residualHitMap[podUID])
+			}
+		}
+	}
+
+	podsToDelete := sets.NewString()
+	for podUID, hitCount := range p.residualHitMap {
+		if !residualSet[podUID] {
+			general.Infof("already found pod: %s in pod watcher or its state is cleared, delete it from residualHitMap", podUID)
+			delete(p.residualHitMap, podUID)
+			continue
+		}
+
+		if time.Duration(hitCount)*gpuconsts.StateCheckPeriod >= gpuconsts.MaxResidualTime {
+			podsToDelete.Insert(podUID)
+		}
+	}
+
+	if podsToDelete.Len() > 0 {
+		for {
+			podUID, found := podsToDelete.PopAny()
+			if !found {
+				break
+			}
+
+			general.Infof("clear residual pod: %s in state", podUID)
+			podResourceEntries.RemovePod(podUID)
+		}
+
+		machineState, err := p.GenerateMachineStateFromPodEntries(podResourceEntries)
+		if err != nil {
+			general.Errorf("GenerateMachineStateFromPodEntries failed with error: %v", err)
+			return
+		}
+
+		p.State.SetPodResourceEntries(podResourceEntries, false)
+		p.State.SetMachineState(machineState, false)
+
+		err = p.State.StoreState()
+		if err != nil {
+			general.Errorf("store state failed: %v", err)
+			return
+		}
+	}
+}
+
+func (p *StaticPolicy) UpdateAllocatableAssociatedDevices(
+	ctx context.Context, request *pluginapi.UpdateAllocatableAssociatedDevicesRequest,
+) (*pluginapi.UpdateAllocatableAssociatedDevicesResponse, error) {
+	if request == nil || len(request.Devices) == 0 {
+		return nil, fmt.Errorf("request is nil")
+	}
+
+	customDevicePlugin := p.getCustomDevicePlugin(request.DeviceName)
+	if customDevicePlugin == nil {
+		return nil, fmt.Errorf("no custom device plugin found for device %s", request.DeviceName)
+	}
+
+	return customDevicePlugin.UpdateAllocatableAssociatedDevices(ctx, request)
+}
+
+func (*StaticPolicy) GetAssociatedDeviceTopologyHints(
+	_ context.Context, _ *pluginapi.AssociatedDeviceRequest,
+) (*pluginapi.AssociatedDeviceHintsResponse, error) {
+	return &pluginapi.AssociatedDeviceHintsResponse{}, nil
+}
+
+// AllocateAssociatedDevice allocates a device in this sequence:
+// 1. Find the resource plugin that corresponds to the accompanyResourceName and allocate
+// 2. Find the custom device plugin that corresponds to the deviceName and allocate
+func (p *StaticPolicy) AllocateAssociatedDevice(
+	ctx context.Context, req *pluginapi.AssociatedDeviceRequest,
+) (resp *pluginapi.AssociatedDeviceAllocationResponse, respErr error) {
+	var isAccompanyResourcePlugin bool
+	var isAccompanyCustomDevicePlugin bool
+	if req == nil || req.ResourceRequest == nil || req.DeviceRequest == nil {
+		return nil, fmt.Errorf("req is nil")
+	}
+
+	p.Lock()
+	defer func() {
+		// Reset state for accompany resource and target resource if there is an error
+		if respErr != nil {
+			if isAccompanyResourcePlugin {
+				_ = p.removeContainer(req.ResourceRequest.PodUid, req.ResourceRequest.ContainerName, v1.ResourceName(req.AccompanyResourceName))
+			}
+			if isAccompanyCustomDevicePlugin {
+				accompanyDeviceType, _ := p.GetResourceTypeFromDeviceName(req.AccompanyResourceName)
+				if accompanyDeviceType != "" {
+					_ = p.removeContainer(req.ResourceRequest.PodUid, req.ResourceRequest.ContainerName, v1.ResourceName(accompanyDeviceType))
+				}
+			}
+			deviceType, _ := p.GetResourceTypeFromDeviceName(req.DeviceName)
+			if deviceType != "" {
+				_ = p.removeContainer(req.ResourceRequest.PodUid, req.ResourceRequest.ContainerName, v1.ResourceName(deviceType))
+			}
+		}
+		p.Unlock()
+	}()
+
+	// Find the target device that we want to allocate for
+	var targetDeviceReq *pluginapi.DeviceRequest
+	for _, deviceRequest := range req.DeviceRequest {
+		if deviceRequest.DeviceName == req.DeviceName {
+			targetDeviceReq = deviceRequest
+		}
+	}
+
+	if targetDeviceReq == nil {
+		return nil, fmt.Errorf("no target device plugin found for target device %s", req.DeviceName)
+	}
+
+	// Allocate accompany resource
+	// Check if accompany resource maps to a resource plugin; if it does, allocate it first
+	accompanyResourcePlugin := p.getResourcePlugin(req.AccompanyResourceName)
+	if accompanyResourcePlugin != nil {
+		_, err := accompanyResourcePlugin.Allocate(ctx, req.ResourceRequest, targetDeviceReq)
+		if err != nil {
+			return nil, fmt.Errorf("allocate accompany resource %s failed with error: %v", req.AccompanyResourceName, err)
+		}
+		isAccompanyResourcePlugin = true
+	} else {
+		// Accompany resource maps to a custom device plugin; allocate for it
+		accompanyCustomDevicePlugin := p.getCustomDevicePlugin(req.AccompanyResourceName)
+		if accompanyCustomDevicePlugin != nil {
+			// Get device request for accompany device
+			var accompanyDeviceReq *pluginapi.DeviceRequest
+			for _, deviceRequest := range req.DeviceRequest {
+				if deviceRequest.DeviceName == req.AccompanyResourceName {
+					accompanyDeviceReq = deviceRequest
+				}
+			}
+
+			if accompanyDeviceReq == nil {
+				return nil, fmt.Errorf("nil accompany device request")
+			}
+
+			_, err := p.allocateAssociatedDevice(ctx, accompanyCustomDevicePlugin, req.ResourceRequest, accompanyDeviceReq, "")
+			if err != nil {
+				return nil, fmt.Errorf("AllocateAssociatedDevice accompany resource %s failed with error: %v", req.AccompanyResourceName, err)
+			}
+			isAccompanyCustomDevicePlugin = true
+		}
+	}
+
+	// Allocate target custom device
+	targetCustomDevicePlugin := p.getCustomDevicePlugin(req.DeviceName)
+	if targetCustomDevicePlugin == nil {
+		return nil, fmt.Errorf("no custom device plugin found for target device %s", req.DeviceName)
+	}
+
+	return p.allocateAssociatedDevice(ctx, targetCustomDevicePlugin, req.ResourceRequest, targetDeviceReq, req.AccompanyResourceName)
+}
+
+func (p *StaticPolicy) allocateAssociatedDevice(
+	ctx context.Context, devicePlugin customdeviceplugin.CustomDevicePlugin,
+	resReq *pluginapi.ResourceRequest, deviceReq *pluginapi.DeviceRequest, accompanyResourceName string,
+) (*pluginapi.AssociatedDeviceAllocationResponse, error) {
+	defaultAccompanyResourceName := devicePlugin.DefaultAccompanyResourceName()
+	if defaultAccompanyResourceName != "" && accompanyResourceName != defaultAccompanyResourceName {
+		accompanyResourcePlugin := p.getResourcePlugin(defaultAccompanyResourceName)
+		if accompanyResourcePlugin != nil {
+			_, err := accompanyResourcePlugin.Allocate(ctx, resReq, deviceReq)
+			if err != nil {
+				_ = p.removeContainer(resReq.PodUid, resReq.ContainerName, v1.ResourceName(defaultAccompanyResourceName))
+				return nil, fmt.Errorf("allocate accompany resource %s failed with error: %v", defaultAccompanyResourceName, err)
+			}
+		}
+	}
+
+	return devicePlugin.AllocateAssociatedDevice(ctx, resReq, deviceReq, accompanyResourceName)
+}
+
+func (p *StaticPolicy) registerDefaultResourcePlugins() error {
+	allInitFuncs := resourcepluginregistry.GetRegisteredResourcePlugin()
+	for _, initFunc := range allInitFuncs {
+		resourcePlugin := initFunc(p.BasePlugin)
+		p.resourcePlugins[resourcePlugin.ResourceName()] = resourcePlugin
+		general.Infof("Registered resource plugin: %s", resourcePlugin.ResourceName())
+	}
+	return nil
+}
+
+func (p *StaticPolicy) registerDefaultCustomDevicePlugins() error {
+	allInitFuncs := devicepluginregistry.GetRegisteredCustomDevicePlugin()
+	for name := range allInitFuncs {
+		initFunc := allInitFuncs[name]
+		customDevicePlugin := initFunc(p.BasePlugin)
+		deviceNames := customDevicePlugin.DeviceNames()
+		for _, deviceName := range deviceNames {
+			p.customDevicePlugins[deviceName] = customDevicePlugin
+			p.associatedDeviceNames.Insert(deviceName)
+		}
+	}
+	return nil
+}
+
+func (p *StaticPolicy) getResourcePlugin(resourceName string) resourceplugin.ResourcePlugin {
+	resourcePlugin := p.resourcePlugins[resourceName]
+	return resourcePlugin
+}
+
+func (p *StaticPolicy) getCustomDevicePlugin(deviceName string) customdeviceplugin.CustomDevicePlugin {
+	customDevicePlugin := p.customDevicePlugins[deviceName]
+	return customDevicePlugin
+}
+
+func (p *StaticPolicy) RegisterResourcePlugin(resourcePlugin resourceplugin.ResourcePlugin) {
+	p.resourcePlugins[resourcePlugin.ResourceName()] = resourcePlugin
+}
+
+func (p *StaticPolicy) RegisterCustomDevicePlugin(plugin customdeviceplugin.CustomDevicePlugin) {
+	deviceNames := plugin.DeviceNames()
+	for _, deviceName := range deviceNames {
+		p.customDevicePlugins[deviceName] = plugin
+		p.associatedDeviceNames.Insert(deviceName)
+	}
+}
diff --git a/pkg/agent/qrm-plugins/gpu/staticpolicy/policy_test.go b/pkg/agent/qrm-plugins/gpu/staticpolicy/policy_test.go
new file mode 100644
index 0000000000..2f8a90930d
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/staticpolicy/policy_test.go
@@ -0,0 +1,683 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package staticpolicy
+
+import (
+	"context"
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/util/sets"
+	"k8s.io/apimachinery/pkg/util/uuid"
+	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	katalyst_base "github.com/kubewharf/katalyst-core/cmd/base"
+	"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/baseplugin"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/customdeviceplugin"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/resourceplugin"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state"
+	"github.com/kubewharf/katalyst-core/pkg/config"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver"
+	"github.com/kubewharf/katalyst-core/pkg/metrics"
+	"github.com/kubewharf/katalyst-core/pkg/util/machine"
+)
+
+const (
+	testResourcePluginName      = "resource-plugin-stub"
+	testCustomDevicePluginName  = "custom-device-plugin-stub"
+	testCustomDevicePluginName2 = "custom-device-plugin-stub-2"
+)
+
+func generateTestConfiguration(t *testing.T) *config.Configuration {
+	conf := config.NewConfiguration()
+	tmpDir := t.TempDir()
+	conf.QRMPluginSocketDirs = []string{tmpDir}
+	conf.CheckpointManagerDir = tmpDir
+
+	return conf
+}
+
+func generateTestGenericContext(t *testing.T, conf *config.Configuration) *agent.GenericContext {
+	genericCtx, err := katalyst_base.GenerateFakeGenericContext([]runtime.Object{})
+	if err != nil {
+		t.Fatalf("unable to generate test generic context: %v", err)
+	}
+
+	metaServer, err := metaserver.NewMetaServer(genericCtx.Client, metrics.DummyMetrics{}, conf)
+	if err != nil {
+		t.Fatalf("unable to generate test meta server: %v", err)
+	}
+
+	agentCtx := &agent.GenericContext{
+		GenericContext: genericCtx,
+		MetaServer:     metaServer,
+		PluginManager:  nil,
+	}
+
+	agentCtx.MetaServer = metaServer
+	return agentCtx
+}
+
+func TestNewStaticPolicy(t *testing.T) {
+	t.Parallel()
+
+	conf := generateTestConfiguration(t)
+	agentCtx := generateTestGenericContext(t, conf)
+
+	tmpDir := t.TempDir()
+	conf.GenericQRMPluginConfiguration.StateFileDirectory = tmpDir
+
+	_, policy, err := NewStaticPolicy(agentCtx, conf, nil, "test")
+	assert.NoError(t, err)
+	assert.NotNil(t, policy)
+}
+
+func makeTestStaticPolicy(t *testing.T) *StaticPolicy {
+	conf := generateTestConfiguration(t)
+	agentCtx := generateTestGenericContext(t, conf)
+
+	tmpDir := t.TempDir()
+	conf.GenericQRMPluginConfiguration.StateFileDirectory = tmpDir
+
+	stateImpl, err := state.NewCheckpointState(conf.QRMPluginsConfiguration, tmpDir, "test", "test-policy", state.NewDefaultResourceStateGeneratorRegistry(), true, metrics.DummyMetrics{})
+	assert.NoError(t, err)
+
+	deviceTopologyRegistry := machine.NewDeviceTopologyRegistry()
+
+	basePlugin := &baseplugin.BasePlugin{
+		Conf:                                  conf,
+		Emitter:                               metrics.DummyMetrics{},
+		MetaServer:                            agentCtx.MetaServer,
+		AgentCtx:                              agentCtx,
+		PodAnnotationKeptKeys:                 []string{},
+		PodLabelKeptKeys:                      []string{},
+		State:                                 stateImpl,
+		DeviceTopologyRegistry:                deviceTopologyRegistry,
+		DefaultResourceStateGeneratorRegistry: state.NewDefaultResourceStateGeneratorRegistry(),
+	}
+
+	staticPolicy := &StaticPolicy{
+		BasePlugin:            basePlugin,
+		resourcePlugins:       make(map[string]resourceplugin.ResourcePlugin),
+		customDevicePlugins:   make(map[string]customdeviceplugin.CustomDevicePlugin),
+		associatedDeviceNames: sets.NewString(),
+	}
+
+	err = staticPolicy.registerDefaultResourcePlugins()
+	assert.NoError(t, err)
+
+	err = staticPolicy.registerDefaultCustomDevicePlugins()
+	assert.NoError(t, err)
+
+	return staticPolicy
+}
+
+func TestStaticPolicy_Allocate(t *testing.T) {
+	t.Parallel()
+
+	policy := makeTestStaticPolicy(t)
+
+	// Register stubbed resource plugin
+	policy.RegisterResourcePlugin(resourceplugin.NewResourcePluginStub(policy.BasePlugin))
+
+	testName := "test"
+	podUID := string(uuid.NewUUID())
+
+	req := &pluginapi.ResourceRequest{
+		PodUid:         podUID,
+		PodNamespace:   testName,
+		PodName:        testName,
+		ContainerName:  testName,
+		ContainerType:  pluginapi.ContainerType_MAIN,
+		ContainerIndex: 0,
+		ResourceName:   testResourcePluginName,
+		ResourceRequests: map[string]float64{
+			testResourcePluginName: 2,
+		},
+		Labels:      map[string]string{},
+		Annotations: map[string]string{},
+	}
+
+	_, err := policy.Allocate(context.Background(), req)
+	assert.NoError(t, err)
+
+	// Check state
+	stateImpl := policy.State
+	allocationInfo := stateImpl.GetAllocationInfo(testResourcePluginName, podUID, testName)
+	fmt.Println(allocationInfo)
+	assert.NotNil(t, allocationInfo)
+
+	// Allocating to an invalid resource plugin returns error
+	invalidReq := &pluginapi.ResourceRequest{
+		PodUid:         string(uuid.NewUUID()),
+		PodNamespace:   testName,
+		PodName:        testName,
+		ContainerName:  testName,
+		ContainerType:  pluginapi.ContainerType_MAIN,
+		ContainerIndex: 0,
+		ResourceName:   "invalid-plugin",
+		ResourceRequests: map[string]float64{
+			"invalid-plugin": 2,
+		},
+		Labels:      map[string]string{},
+		Annotations: map[string]string{},
+	}
+
+	_, err = policy.Allocate(context.Background(), invalidReq)
+	assert.Error(t, err)
+}
+
+func TestStaticPolicy_RemovePod(t *testing.T) {
+	t.Parallel()
+	policy := makeTestStaticPolicy(t)
+
+	// Register stubbed resource plugin
+	policy.RegisterResourcePlugin(resourceplugin.NewResourcePluginStub(policy.BasePlugin))
+
+	deviceTopologyProviderStub := machine.NewDeviceTopologyProviderStub()
+
+	testDeviceTopology := &machine.DeviceTopology{
+		Devices: map[string]machine.DeviceInfo{
+			"test-1": {},
+		},
+	}
+	err := deviceTopologyProviderStub.SetDeviceTopology(testDeviceTopology)
+	assert.NoError(t, err)
+
+	policy.DeviceTopologyRegistry.RegisterDeviceTopologyProvider(testResourcePluginName, deviceTopologyProviderStub)
+
+	policy.DefaultResourceStateGeneratorRegistry.RegisterResourceStateGenerator(testResourcePluginName,
+		state.NewGenericDefaultResourceStateGenerator(testResourcePluginName, policy.DeviceTopologyRegistry))
+
+	testName := "test"
+	podUID := string(uuid.NewUUID())
+
+	req := &pluginapi.ResourceRequest{
+		PodUid:         podUID,
+		PodNamespace:   testName,
+		PodName:        testName,
+		ContainerName:  testName,
+		ContainerType:  pluginapi.ContainerType_MAIN,
+		ContainerIndex: 0,
+		ResourceName:   testResourcePluginName,
+		ResourceRequests: map[string]float64{
+			testResourcePluginName: 2,
+		},
+		Labels:      map[string]string{},
+		Annotations: map[string]string{},
+	}
+
+	_, err = policy.Allocate(context.Background(), req)
+	assert.NoError(t, err)
+
+	// Remove pod
+	_, err = policy.RemovePod(context.Background(), &pluginapi.RemovePodRequest{
+		PodUid: podUID,
+	})
+	assert.NoError(t, err)
+
+	// Check state
+	stateImpl := policy.State
+	allocationInfo := stateImpl.GetAllocationInfo(testResourcePluginName, podUID, testName)
+	assert.Nil(t, allocationInfo)
+}
+
+func TestStaticPolicy_GetTopologyHints(t *testing.T) {
+	t.Parallel()
+
+	policy := makeTestStaticPolicy(t)
+
+	policy.RegisterResourcePlugin(resourceplugin.NewResourcePluginStub(policy.BasePlugin))
+
+	testName := "test"
+	podUID := string(uuid.NewUUID())
+
+	req := &pluginapi.ResourceRequest{
+		PodUid:         podUID,
+		PodNamespace:   testName,
+		PodName:        testName,
+		ContainerName:  testName,
+		ContainerType:  pluginapi.ContainerType_MAIN,
+		ContainerIndex: 0,
+		ResourceName:   testResourcePluginName,
+		ResourceRequests: map[string]float64{
+			testResourcePluginName: 2,
+		},
+		Labels:      map[string]string{},
+		Annotations: map[string]string{},
+	}
+
+	resp, err := policy.GetTopologyHints(context.Background(), req)
+	assert.NoError(t, err)
+	assert.NotNil(t, resp)
+
+	// Getting topology hints from an invalid resource plugin returns error
+	invalidReq := &pluginapi.ResourceRequest{
+		PodUid:         podUID,
+		PodNamespace:   testName,
+		PodName:        testName,
+		ContainerName:  testName,
+		ContainerType:  pluginapi.ContainerType_MAIN,
+		ContainerIndex: 0,
+		ResourceName:   "invalid-plugin",
+		ResourceRequests: map[string]float64{
+			"invalid-plugin": 2,
+		},
+		Labels:      map[string]string{},
+		Annotations: map[string]string{},
+	}
+
+	_, err = policy.GetTopologyHints(context.Background(), invalidReq)
+	assert.Error(t, err)
+}
+
+func TestStaticPolicy_GetTopologyAwareResources(t *testing.T) {
+	t.Parallel()
+
+	policy := makeTestStaticPolicy(t)
+
+	policy.RegisterResourcePlugin(resourceplugin.NewResourcePluginStub(policy.BasePlugin))
+	testName := "test"
+	podUID := string(uuid.NewUUID())
+
+	// Allocate first
+	req := &pluginapi.ResourceRequest{
+		PodUid:         podUID,
+		PodNamespace:   testName,
+		PodName:        testName,
+		ContainerName:  testName,
+		ContainerType:  pluginapi.ContainerType_MAIN,
+		ContainerIndex: 0,
+		ResourceName:   testResourcePluginName,
+		ResourceRequests: map[string]float64{
+			testResourcePluginName: 2,
+		},
+		Labels:      map[string]string{},
+		Annotations: map[string]string{},
+	}
+
+	_, err := policy.Allocate(context.Background(), req)
+	assert.NoError(t, err)
+
+	// Get topology aware resources
+	getTopologyAwareResourcesReq := &pluginapi.GetTopologyAwareResourcesRequest{
+		PodUid:        podUID,
+		ContainerName: testName,
+	}
+
+	resp, err := policy.GetTopologyAwareResources(context.Background(), getTopologyAwareResourcesReq)
+	assert.NoError(t, err)
+	assert.NotNil(t, resp)
+
+	// Invalid request does not return error
+	invalidReq := &pluginapi.GetTopologyAwareResourcesRequest{
+		PodUid:        podUID,
+		ContainerName: "invalid-container",
+	}
+
+	resp, err = policy.GetTopologyAwareResources(context.Background(), invalidReq)
+	assert.NoError(t, err)
+	assert.NotNil(t, resp)
+}
+
+func TestStaticPolicy_mergeTopologyAwareResourcesResponse(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name          string
+		podUID        string
+		containerName string
+		respList      []*pluginapi.GetTopologyAwareResourcesResponse
+		expectedResp  *pluginapi.GetTopologyAwareResourcesResponse
+		expectedErr   bool
+	}{
+		{
+			name:          "test merging of response",
+			podUID:        "test-pod",
+			containerName: "test-container",
+			respList: []*pluginapi.GetTopologyAwareResourcesResponse{
+				{
+					PodUid:       "test-pod",
+					PodName:      "test-pod-name",
+					PodNamespace: "test-pod-namespace",
+					ContainerTopologyAwareResources: &pluginapi.ContainerTopologyAwareResources{
+						ContainerName: "test-container",
+						AllocatedResources: map[string]*pluginapi.TopologyAwareResource{
+							"test-resource-1": {},
+						},
+					},
+				},
+				{
+					PodUid:       "test-pod",
+					PodName:      "test-pod-name",
+					PodNamespace: "test-pod-namespace",
+					ContainerTopologyAwareResources: &pluginapi.ContainerTopologyAwareResources{
+						ContainerName: "test-container",
+						AllocatedResources: map[string]*pluginapi.TopologyAwareResource{
+							"test-resource-2": {},
+						},
+					},
+				},
+			},
+			expectedResp: &pluginapi.GetTopologyAwareResourcesResponse{
+				PodUid:       "test-pod",
+				PodName:      "test-pod-name",
+				PodNamespace: "test-pod-namespace",
+				ContainerTopologyAwareResources: &pluginapi.ContainerTopologyAwareResources{
+					ContainerName: "test-container",
+					AllocatedResources: map[string]*pluginapi.TopologyAwareResource{
+						"test-resource-1": {},
+						"test-resource-2": {},
+					},
+				},
+			},
+		},
+		{
+			name:          "pod name is not the same, return an error",
+			podUID:        "test-pod",
+			containerName: "test-container",
+			respList: []*pluginapi.GetTopologyAwareResourcesResponse{
+				{
+					PodUid:       "test-pod",
+					PodName:      "test-pod-name",
+					PodNamespace: "test-pod-namespace",
+					ContainerTopologyAwareResources: &pluginapi.ContainerTopologyAwareResources{
+						ContainerName: "test-container",
+						AllocatedResources: map[string]*pluginapi.TopologyAwareResource{
+							"test-resource-1": {},
+						},
+					},
+				},
+				{
+					PodUid:       "test-pod",
+					PodName:      "test-pod-name-2",
+					PodNamespace: "test-pod-namespace",
+					ContainerTopologyAwareResources: &pluginapi.ContainerTopologyAwareResources{
+						ContainerName: "test-container",
+						AllocatedResources: map[string]*pluginapi.TopologyAwareResource{
+							"test-resource-2": {},
+						},
+					},
+				},
+			},
+			expectedErr: true,
+		},
+		{
+			name:          "pod namespace is not the same, return an error",
+			podUID:        "test-pod",
+			containerName: "test-container",
+			respList: []*pluginapi.GetTopologyAwareResourcesResponse{
+				{
+					PodUid:       "test-pod",
+					PodName:      "test-pod-name",
+					PodNamespace: "test-pod-namespace",
+					ContainerTopologyAwareResources: &pluginapi.ContainerTopologyAwareResources{
+						ContainerName: "test-container",
+						AllocatedResources: map[string]*pluginapi.TopologyAwareResource{
+							"test-resource-1": {},
+						},
+					},
+				},
+				{
+					PodUid:       "test-pod",
+					PodName:      "test-pod-name",
+					PodNamespace: "test-pod-namespace-2",
+					ContainerTopologyAwareResources: &pluginapi.ContainerTopologyAwareResources{
+						ContainerName: "test-container",
+						AllocatedResources: map[string]*pluginapi.TopologyAwareResource{
+							"test-resource-2": {},
+						},
+					},
+				},
+			},
+			expectedErr: true,
+		},
+		{
+			name:          "container topology aware resources is nil, return an error",
+			podUID:        "test-pod",
+			containerName: "test-container",
+			respList: []*pluginapi.GetTopologyAwareResourcesResponse{
+				{
+					PodUid:       "test-pod",
+					PodName:      "test-pod-name",
+					PodNamespace: "test-pod-namespace",
+					ContainerTopologyAwareResources: &pluginapi.ContainerTopologyAwareResources{
+						ContainerName: "test-container",
+						AllocatedResources: map[string]*pluginapi.TopologyAwareResource{
+							"test-resource-1": {},
+						},
+					},
+				},
+				{
+					PodUid:                          "test-pod",
+					PodName:                         "test-pod-name",
+					PodNamespace:                    "test-pod-namespace",
+					ContainerTopologyAwareResources: nil,
+				},
+			},
+			expectedErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			policy := makeTestStaticPolicy(t)
+
+			resp, err := policy.mergeTopologyAwareResourcesResponse(tt.podUID, tt.containerName, tt.respList)
+			if tt.expectedErr {
+				assert.Error(t, err)
+			} else {
+				assert.NoError(t, err)
+				assert.Equal(t, tt.expectedResp, resp)
+			}
+		})
+	}
+}
+
+func TestStaticPolicy_GetTopologyAwareAllocatableResources(t *testing.T) {
+	t.Parallel()
+
+	policy := makeTestStaticPolicy(t)
+
+	policy.RegisterResourcePlugin(resourceplugin.NewResourcePluginStub(policy.BasePlugin))
+	testName := "test"
+	podUID := string(uuid.NewUUID())
+
+	// Allocate first
+	req := &pluginapi.ResourceRequest{
+		PodUid:         podUID,
+		PodNamespace:   testName,
+		PodName:        testName,
+		ContainerName:  testName,
+		ContainerType:  pluginapi.ContainerType_MAIN,
+		ContainerIndex: 0,
+		ResourceName:   testResourcePluginName,
+		ResourceRequests: map[string]float64{
+			testResourcePluginName: 2,
+		},
+		Labels:      map[string]string{},
+		Annotations: map[string]string{},
+	}
+
+	_, err := policy.Allocate(context.Background(), req)
+	assert.NoError(t, err)
+
+	getTopologyAwareAllocatableResourcesReq := &pluginapi.GetTopologyAwareAllocatableResourcesRequest{}
+	resp, err := policy.GetTopologyAwareAllocatableResources(context.Background(), getTopologyAwareAllocatableResourcesReq)
+	assert.NoError(t, err)
+	assert.NotNil(t, resp)
+}
+
+func TestStaticPolicy_UpdateAllocatableAssociatedDevices(t *testing.T) {
+	t.Parallel()
+
+	policy := makeTestStaticPolicy(t)
+
+	policy.RegisterResourcePlugin(resourceplugin.NewResourcePluginStub(policy.BasePlugin))
+	policy.RegisterCustomDevicePlugin(customdeviceplugin.NewCustomDevicePluginStub(policy.BasePlugin))
+
+	req := &pluginapi.UpdateAllocatableAssociatedDevicesRequest{
+		DeviceName: testCustomDevicePluginName,
+		Devices: []*pluginapi.AssociatedDevice{
+			{
+				ID: "test-device-1",
+			},
+		},
+	}
+
+	resp, err := policy.UpdateAllocatableAssociatedDevices(context.Background(), req)
+	assert.NoError(t, err)
+	assert.NotNil(t, resp)
+
+	// Test error handling for no device request
+	noDeviceRequest := &pluginapi.UpdateAllocatableAssociatedDevicesRequest{
+		DeviceName: testCustomDevicePluginName,
+	}
+
+	resp, err = policy.UpdateAllocatableAssociatedDevices(context.Background(), noDeviceRequest)
+	assert.Error(t, err)
+	assert.Nil(t, resp)
+
+	// Test error handling for non-existent custom device plugin
+	invalidReq := &pluginapi.UpdateAllocatableAssociatedDevicesRequest{
+		DeviceName: "non-existent-device",
+	}
+
+	resp, err = policy.UpdateAllocatableAssociatedDevices(context.Background(), invalidReq)
+	assert.Error(t, err)
+	assert.Nil(t, resp)
+}
+
+func TestStaticPolicy_AllocateAssociatedDevices(t *testing.T) {
+	t.Parallel()
+
+	policy := makeTestStaticPolicy(t)
+	policy.RegisterResourcePlugin(resourceplugin.NewResourcePluginStub(policy.BasePlugin))
+	policy.RegisterCustomDevicePlugin(customdeviceplugin.NewCustomDevicePluginStub(policy.BasePlugin))
+	policy.RegisterCustomDevicePlugin(customdeviceplugin.NewCustomDevicePluginStub2(policy.BasePlugin))
+
+	podUID := string(uuid.NewUUID())
+
+	testName := "test"
+
+	req := &pluginapi.AssociatedDeviceRequest{
+		ResourceRequest: &pluginapi.ResourceRequest{
+			PodUid:         podUID,
+			PodNamespace:   testName,
+			PodName:        testName,
+			ContainerName:  testName,
+			ContainerType:  pluginapi.ContainerType_MAIN,
+			ContainerIndex: 0,
+			ResourceName:   testResourcePluginName,
+			ResourceRequests: map[string]float64{
+				testResourcePluginName: 2,
+			},
+			Labels:      map[string]string{},
+			Annotations: map[string]string{},
+		},
+		DeviceRequest: []*pluginapi.DeviceRequest{
+			{
+				DeviceName: testCustomDevicePluginName,
+			},
+		},
+		DeviceName:            testCustomDevicePluginName,
+		AccompanyResourceName: testResourcePluginName,
+	}
+
+	resp, err := policy.AllocateAssociatedDevice(context.Background(), req)
+
+	assert.NoError(t, err)
+	assert.NotNil(t, resp)
+
+	// Verify in state
+	stateImpl := policy.State
+	allocationInfo := stateImpl.GetAllocationInfo(testCustomDevicePluginName, podUID, testName)
+	assert.NotNil(t, allocationInfo)
+
+	podUID = string(uuid.NewUUID())
+	// Error handling if there is no target device
+	noTargetDeviceReq := &pluginapi.AssociatedDeviceRequest{
+		ResourceRequest: &pluginapi.ResourceRequest{
+			PodUid:        podUID,
+			PodNamespace:  testName,
+			PodName:       testName,
+			ContainerName: testName,
+			ContainerType: pluginapi.ContainerType_MAIN,
+			ResourceName:  testResourcePluginName,
+			ResourceRequests: map[string]float64{
+				testResourcePluginName: 2,
+			},
+			Labels:      map[string]string{},
+			Annotations: map[string]string{},
+		},
+		DeviceRequest: []*pluginapi.DeviceRequest{
+			{
+				DeviceName: testCustomDevicePluginName,
+			},
+		},
+		DeviceName:            "invalid-device",
+		AccompanyResourceName: testResourcePluginName,
+	}
+
+	resp, err = policy.AllocateAssociatedDevice(context.Background(), noTargetDeviceReq)
+
+	assert.Error(t, err)
+	assert.Nil(t, resp)
+
+	// Accompany resource is another custom device plugin
+	req = &pluginapi.AssociatedDeviceRequest{
+		ResourceRequest: &pluginapi.ResourceRequest{
+			PodUid:        podUID,
+			PodNamespace:  testName,
+			PodName:       testName,
+			ContainerName: testName,
+			ContainerType: pluginapi.ContainerType_MAIN,
+			ResourceName:  testResourcePluginName,
+			ResourceRequests: map[string]float64{
+				testResourcePluginName: 2,
+			},
+			Labels:      map[string]string{},
+			Annotations: map[string]string{},
+		},
+		DeviceRequest: []*pluginapi.DeviceRequest{
+			{
+				DeviceName: testCustomDevicePluginName,
+			},
+			{
+				DeviceName: testCustomDevicePluginName2,
+			},
+		},
+		DeviceName:            testCustomDevicePluginName,
+		AccompanyResourceName: testCustomDevicePluginName2,
+	}
+
+	resp, err = policy.AllocateAssociatedDevice(context.Background(), req)
+	assert.NoError(t, err)
+	assert.NotNil(t, resp)
+
+	// Verify state
+	allocationInfo = stateImpl.GetAllocationInfo(testCustomDevicePluginName, podUID, testName)
+	assert.NotNil(t, allocationInfo)
+
+	allocationInfo = stateImpl.GetAllocationInfo(testCustomDevicePluginName2, podUID, testName)
+	assert.NotNil(t, allocationInfo)
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/manager/defaults.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/manager/defaults.go
new file mode 100644
index 0000000000..5202c9b862
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/manager/defaults.go
@@ -0,0 +1,70 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package manager
+
+import (
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/canonical"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/deviceaffinity"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory"
+	"github.com/kubewharf/katalyst-core/pkg/util/general"
+)
+
+// registerDefaultFilterStrategies register filtering strategies
+func registerDefaultFilterStrategies(manager *StrategyManager) {
+	if err := manager.RegisterFilteringStrategy(gpu_memory.NewGPUMemoryStrategy()); err != nil {
+		general.Errorf("Failed to register filtering strategy: %v", err)
+	}
+
+	if err := manager.RegisterFilteringStrategy(canonical.NewCanonicalStrategy()); err != nil {
+		general.Errorf("Failed to register sorting strategy: %v", err)
+	}
+}
+
+// registerDefaultSortingStrategies register sorting strategies
+func registerDefaultSortingStrategies(manager *StrategyManager) {
+	if err := manager.RegisterSortingStrategy(gpu_memory.NewGPUMemoryStrategy()); err != nil {
+		general.Errorf("Failed to register sorting strategy: %v", err)
+	}
+}
+
+// registerDefaultBindingStrategies register binding strategies
+func registerDefaultBindingStrategies(manager *StrategyManager) {
+	if err := manager.RegisterBindingStrategy(canonical.NewCanonicalStrategy()); err != nil {
+		general.Errorf("Failed to register binding strategy: %v", err)
+	}
+
+	if err := manager.RegisterBindingStrategy(deviceaffinity.NewDeviceAffinityStrategy()); err != nil {
+		general.Errorf("Failed to register binding strategy: %v", err)
+	}
+}
+
+// registerDefaultAllocationStrategies register allocation strategies
+func registerDefaultAllocationStrategies(manager *StrategyManager) {
+	if err := manager.RegisterGenericAllocationStrategy(allocationStrategyNameDefault,
+		[]string{canonical.StrategyNameCanonical, gpu_memory.StrategyNameGPUMemory},
+		gpu_memory.StrategyNameGPUMemory, canonical.StrategyNameCanonical); err != nil {
+		general.Errorf("Failed to register gpu-memory-default strategy: %v", err)
+	}
+}
+
+// registerDefaultStrategies registers the default strategies
+func registerDefaultStrategies(manager *StrategyManager) {
+	registerDefaultFilterStrategies(manager)
+	registerDefaultSortingStrategies(manager)
+	registerDefaultBindingStrategies(manager)
+	registerDefaultAllocationStrategies(manager)
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/manager/helper.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/manager/helper.go
new file mode 100644
index 0000000000..fe78af5de7
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/manager/helper.go
@@ -0,0 +1,68 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package manager
+
+import (
+	"fmt"
+
+	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate"
+	"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver"
+	"github.com/kubewharf/katalyst-core/pkg/metrics"
+	"github.com/kubewharf/katalyst-core/pkg/util/machine"
+)
+
+// AllocateGPUUsingStrategy performs GPU allocation using the strategy framework
+func AllocateGPUUsingStrategy(
+	resourceReq *pluginapi.ResourceRequest,
+	deviceReq *pluginapi.DeviceRequest,
+	gpuTopology *machine.DeviceTopology,
+	gpuConfig *qrm.GPUQRMPluginConfig,
+	emitter metrics.MetricEmitter,
+	metaServer *metaserver.MetaServer,
+	machineState state.AllocationResourcesMap,
+	qosLevel string,
+) (*allocate.AllocationResult, error) {
+	// Get hint nodes
+	hintNodes, err := machine.NewCPUSetUint64(deviceReq.GetHint().GetNodes()...)
+	if err != nil {
+		return &allocate.AllocationResult{
+			Success:      false,
+			ErrorMessage: fmt.Sprintf("failed to get hint nodes: %v", err),
+		}, err
+	}
+
+	// Create allocation context
+	ctx := &allocate.AllocationContext{
+		ResourceReq:        resourceReq,
+		DeviceReq:          deviceReq,
+		DeviceTopology:     gpuTopology,
+		GPUQRMPluginConfig: gpuConfig,
+		Emitter:            emitter,
+		MetaServer:         metaServer,
+		MachineState:       machineState,
+		QoSLevel:           qosLevel,
+		HintNodes:          hintNodes,
+	}
+
+	// Get the global strategy manager and perform allocation
+	manager := GetGlobalStrategyManager()
+	return manager.AllocateUsingStrategy(ctx)
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/manager/manager.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/manager/manager.go
new file mode 100644
index 0000000000..f77b4d1fa6
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/manager/manager.go
@@ -0,0 +1,193 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package manager
+
+import (
+	"fmt"
+	"sync"
+
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate/registry"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/allocation"
+	"github.com/kubewharf/katalyst-core/pkg/util/general"
+)
+
+const (
+	allocationStrategyNameDefault = "default"
+)
+
+// StrategyManager manages the selection of allocation strategies based on resource names
+type StrategyManager struct {
+	*registry.StrategyRegistry
+
+	// Mapping from resource name to strategy name
+	resourceToStrategy map[string]string
+
+	// Default strategy to use when no specific strategy is configured
+	defaultStrategy string
+
+	// Mutex for thread-safe access
+	mutex sync.RWMutex
+}
+
+// NewStrategyManager creates a new strategy manager
+func NewStrategyManager() *StrategyManager {
+	return &StrategyManager{
+		StrategyRegistry:   registry.NewStrategyRegistry(),
+		resourceToStrategy: make(map[string]string),
+		defaultStrategy:    allocationStrategyNameDefault,
+	}
+}
+
+// RegisterStrategyForResource registers a strategy for a specific resource name
+func (m *StrategyManager) RegisterStrategyForResource(resourceName, strategyName string) error {
+	m.mutex.Lock()
+	defer m.mutex.Unlock()
+
+	// Check if the strategy exists
+	_, err := m.GetAllocationStrategy(strategyName)
+	if err != nil {
+		return fmt.Errorf("strategy %s not found: %v", strategyName, err)
+	}
+
+	m.resourceToStrategy[resourceName] = strategyName
+	general.Infof("Registered strategy %s for resource %s", strategyName, resourceName)
+	return nil
+}
+
+func (m *StrategyManager) GetDefaultStrategy() (allocate.AllocationStrategy, error) {
+	m.mutex.RLock()
+	defer m.mutex.RUnlock()
+
+	return m.GetAllocationStrategy(m.defaultStrategy)
+}
+
+// SetDefaultStrategy sets the default strategy to use when no specific strategy is configured
+func (m *StrategyManager) SetDefaultStrategy(strategyName string) error {
+	m.mutex.Lock()
+	defer m.mutex.Unlock()
+
+	// Check if the strategy exists
+	_, err := m.GetAllocationStrategy(strategyName)
+	if err != nil {
+		return fmt.Errorf("strategy %s not found: %v", strategyName, err)
+	}
+
+	m.defaultStrategy = strategyName
+	general.Infof("Set default strategy to %s", strategyName)
+	return nil
+}
+
+// GetStrategyForResource returns the strategy name for a given resource
+func (m *StrategyManager) GetStrategyForResource(resourceName string) string {
+	m.mutex.RLock()
+	defer m.mutex.RUnlock()
+
+	if strategyName, exists := m.resourceToStrategy[resourceName]; exists {
+		return strategyName
+	}
+
+	return m.defaultStrategy
+}
+
+// getAllocationStrategyForResource returns the allocation strategy for a given resource
+func (m *StrategyManager) getAllocationStrategyForResource(customAllocationStrategy map[string]string, resourceName string) (allocate.AllocationStrategy, error) {
+	var strategyName string
+	if customStrategy, exists := customAllocationStrategy[resourceName]; exists {
+		strategyName = customStrategy
+	} else {
+		strategyName = m.GetStrategyForResource(resourceName)
+	}
+
+	return m.GetAllocationStrategy(strategyName)
+}
+
+// AllocateUsingStrategy performs allocation using the appropriate strategy for the resource
+func (m *StrategyManager) AllocateUsingStrategy(ctx *allocate.AllocationContext) (*allocate.AllocationResult, error) {
+	// Determine the device name
+	resourceName := ctx.DeviceReq.DeviceName
+	customAllocationStrategy := ctx.GPUQRMPluginConfig.CustomAllocationStrategy
+
+	// Get the strategy for this resource
+	strategy, err := m.getAllocationStrategyForResource(customAllocationStrategy, resourceName)
+	if err != nil {
+		return &allocate.AllocationResult{
+			Success:      false,
+			ErrorMessage: fmt.Sprintf("failed to get strategy for resource %s: %v", resourceName, err),
+		}, fmt.Errorf("failed to get strategy for resource %s: %v", resourceName, err)
+	}
+
+	general.InfoS("Using strategy for allocation",
+		"resourceName", resourceName,
+		"strategyName", strategy.Name(),
+		"podNamespace", ctx.ResourceReq.PodNamespace,
+		"podName", ctx.ResourceReq.PodName,
+		"containerName", ctx.ResourceReq.ContainerName)
+
+	// Perform allocation using the strategy
+	return strategy.Allocate(ctx)
+}
+
+// RegisterGenericAllocationStrategy registers a complete generic allocation strategy with the given name
+func (m *StrategyManager) RegisterGenericAllocationStrategy(
+	name string, filteringNames []string, sortingName, bindingName string,
+) error {
+	var filteringList []allocate.FilteringStrategy
+	for _, fn := range filteringNames {
+		filtering, err := m.StrategyRegistry.GetFilteringStrategy(fn)
+		if err != nil {
+			return fmt.Errorf("filtering strategy %s not found: %v", fn, err)
+		}
+		filteringList = append(filteringList, filtering)
+	}
+
+	sorting, err := m.StrategyRegistry.GetSortingStrategy(sortingName)
+	if err != nil {
+		return fmt.Errorf("sorting strategy %s not found: %v", sortingName, err)
+	}
+
+	binding, err := m.StrategyRegistry.GetBindingStrategy(bindingName)
+	if err != nil {
+		return fmt.Errorf("binding strategy %s not found: %v", bindingName, err)
+	}
+
+	err = m.StrategyRegistry.RegisterAllocationStrategy(allocation.NewGenericAllocationStrategy(name, m.StrategyRegistry, filteringList, sorting, binding))
+	if err != nil {
+		return fmt.Errorf("register allocation strategy %s failed: %v", name, err)
+	}
+
+	general.Infof("Registered allocation strategy: %s (filtering: %s, sorting: %s, binding: %s)",
+		name, filteringNames, sortingName, bindingName)
+	return nil
+}
+
+// Global strategy manager instance
+var (
+	globalStrategyManager *StrategyManager
+	once                  sync.Once
+)
+
+// GetGlobalStrategyManager returns the global strategy manager instance
+func GetGlobalStrategyManager() *StrategyManager {
+	once.Do(func() {
+		globalStrategyManager = NewStrategyManager()
+
+		// Register default strategies
+		registerDefaultStrategies(globalStrategyManager)
+	})
+	return globalStrategyManager
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/manager/manager_test.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/manager/manager_test.go
new file mode 100644
index 0000000000..5a9a075249
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/manager/manager_test.go
@@ -0,0 +1,61 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package manager
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/canonical"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory"
+)
+
+func TestStrategyManager(t *testing.T) {
+	t.Parallel()
+
+	manager := NewStrategyManager()
+
+	registerDefaultStrategies(manager)
+
+	// Test default strategy
+	assert.Equal(t, "default", manager.defaultStrategy)
+
+	// Test setting default strategy
+	err := manager.SetDefaultStrategy("test-allocation")
+	assert.Error(t, err) // Should fail because test-allocation is not registered yet
+
+	// Test registering strategy for resource
+	err = manager.RegisterStrategyForResource("test-resource", "test-allocation")
+	assert.Error(t, err) // Should fail because test-allocation is not registered yet
+
+	err = manager.RegisterGenericAllocationStrategy("test-allocation", []string{gpu_memory.StrategyNameGPUMemory},
+		gpu_memory.StrategyNameGPUMemory, canonical.StrategyNameCanonical)
+	assert.NoError(t, err)
+
+	// Now test registering strategy for resource
+	err = manager.RegisterStrategyForResource("test-resource", "test-allocation")
+	assert.NoError(t, err)
+
+	// Test getting strategy for resource
+	strategyName := manager.GetStrategyForResource("test-resource")
+	assert.Equal(t, "test-allocation", strategyName)
+
+	// Test getting default strategy for non-existent resource
+	strategyName = manager.GetStrategyForResource("non-existent-resource")
+	assert.Equal(t, "default", strategyName)
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/registry/registry.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/registry/registry.go
new file mode 100644
index 0000000000..48b58014f9
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/registry/registry.go
@@ -0,0 +1,151 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package registry
+
+import (
+	"fmt"
+	"sync"
+
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate"
+	"github.com/kubewharf/katalyst-core/pkg/util/general"
+)
+
+type StrategyRegistry struct {
+	filteringStrategies  map[string]allocate.FilteringStrategy
+	sortingStrategies    map[string]allocate.SortingStrategy
+	bindingStrategies    map[string]allocate.BindingStrategy
+	allocationStrategies map[string]allocate.AllocationStrategy
+	// Mutex for thread-safe access to registries
+	registryMutex sync.RWMutex
+}
+
+// NewStrategyRegistry creates a new instance of StrategyRegistry
+func NewStrategyRegistry() *StrategyRegistry {
+	return &StrategyRegistry{
+		filteringStrategies:  make(map[string]allocate.FilteringStrategy),
+		sortingStrategies:    make(map[string]allocate.SortingStrategy),
+		bindingStrategies:    make(map[string]allocate.BindingStrategy),
+		allocationStrategies: make(map[string]allocate.AllocationStrategy),
+	}
+}
+
+// RegisterFilteringStrategy registers a filtering strategy with the given name
+func (r *StrategyRegistry) RegisterFilteringStrategy(strategy allocate.FilteringStrategy) error {
+	r.registryMutex.Lock()
+	defer r.registryMutex.Unlock()
+
+	if _, exists := r.filteringStrategies[strategy.Name()]; exists {
+		return fmt.Errorf("filtering strategy with name %s already registered", strategy.Name())
+	}
+
+	r.filteringStrategies[strategy.Name()] = strategy
+	general.Infof("Registered filtering strategy: %s", strategy.Name())
+	return nil
+}
+
+// RegisterSortingStrategy registers a sorting strategy with the given name
+func (r *StrategyRegistry) RegisterSortingStrategy(strategy allocate.SortingStrategy) error {
+	r.registryMutex.Lock()
+	defer r.registryMutex.Unlock()
+
+	if _, exists := r.sortingStrategies[strategy.Name()]; exists {
+		return fmt.Errorf("sorting strategy with name %s already registered", strategy.Name())
+	}
+
+	r.sortingStrategies[strategy.Name()] = strategy
+	general.Infof("Registered sorting strategy: %s", strategy.Name())
+	return nil
+}
+
+// RegisterBindingStrategy registers a binding strategy with the given name
+func (r *StrategyRegistry) RegisterBindingStrategy(strategy allocate.BindingStrategy) error {
+	r.registryMutex.Lock()
+	defer r.registryMutex.Unlock()
+
+	if _, exists := r.bindingStrategies[strategy.Name()]; exists {
+		return fmt.Errorf("binding strategy with name %s already registered", strategy.Name())
+	}
+
+	r.bindingStrategies[strategy.Name()] = strategy
+	general.Infof("Registered binding strategy: %s", strategy.Name())
+	return nil
+}
+
+func (r *StrategyRegistry) RegisterAllocationStrategy(strategy allocate.AllocationStrategy) error {
+	r.registryMutex.Lock()
+	defer r.registryMutex.Unlock()
+
+	if _, exists := r.allocationStrategies[strategy.Name()]; exists {
+		return fmt.Errorf("allocation strategy with name %s already registered", strategy.Name())
+	}
+
+	r.allocationStrategies[strategy.Name()] = strategy
+	general.Infof("Registered allocation strategy: %s", strategy.Name())
+	return nil
+}
+
+// GetFilteringStrategy returns the filtering strategy with the given name
+func (r *StrategyRegistry) GetFilteringStrategy(name string) (allocate.FilteringStrategy, error) {
+	r.registryMutex.RLock()
+	defer r.registryMutex.RUnlock()
+
+	strategy, exists := r.filteringStrategies[name]
+	if !exists {
+		return nil, fmt.Errorf("filtering strategy %s not found", name)
+	}
+
+	return strategy, nil
+}
+
+// GetSortingStrategy returns the sorting strategy with the given name
+func (r *StrategyRegistry) GetSortingStrategy(name string) (allocate.SortingStrategy, error) {
+	r.registryMutex.RLock()
+	defer r.registryMutex.RUnlock()
+
+	strategy, exists := r.sortingStrategies[name]
+	if !exists {
+		return nil, fmt.Errorf("sorting strategy %s not found", name)
+	}
+
+	return strategy, nil
+}
+
+// GetBindingStrategy returns the binding strategy with the given name
+func (r *StrategyRegistry) GetBindingStrategy(name string) (allocate.BindingStrategy, error) {
+	r.registryMutex.RLock()
+	defer r.registryMutex.RUnlock()
+
+	strategy, exists := r.bindingStrategies[name]
+	if !exists {
+		return nil, fmt.Errorf("binding strategy %s not found", name)
+	}
+
+	return strategy, nil
+}
+
+// GetAllocationStrategy returns the allocation strategy with the given name
+func (r *StrategyRegistry) GetAllocationStrategy(name string) (allocate.AllocationStrategy, error) {
+	r.registryMutex.RLock()
+	defer r.registryMutex.RUnlock()
+
+	strategy, exists := r.allocationStrategies[name]
+	if !exists {
+		return nil, fmt.Errorf("allocation strategy %s not found", name)
+	}
+
+	return strategy, nil
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/registry/registry_test.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/registry/registry_test.go
new file mode 100644
index 0000000000..d013c6a985
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/registry/registry_test.go
@@ -0,0 +1,97 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package registry
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate"
+)
+
+type dummyStrategy struct {
+	name string
+}
+
+func (s *dummyStrategy) Name() string {
+	return s.name
+}
+
+func (s *dummyStrategy) Filter(_ *allocate.AllocationContext, allAvailableDevices []string) ([]string, error) {
+	return allAvailableDevices, nil
+}
+
+func (s *dummyStrategy) Sort(_ *allocate.AllocationContext, allAvailableDevices []string) ([]string, error) {
+	return allAvailableDevices, nil
+}
+
+func (s *dummyStrategy) Bind(_ *allocate.AllocationContext, _ []string) (*allocate.AllocationResult, error) {
+	return &allocate.AllocationResult{}, nil
+}
+
+func (s *dummyStrategy) Allocate(_ *allocate.AllocationContext) (*allocate.AllocationResult, error) {
+	return &allocate.AllocationResult{}, nil
+}
+
+func TestStrategyRegistry(t *testing.T) {
+	t.Parallel()
+
+	registry := NewStrategyRegistry()
+	// Test filtering strategy registration
+	filteringStrategy := &dummyStrategy{name: "test-filtering"}
+	err := registry.RegisterFilteringStrategy(filteringStrategy)
+	assert.NoError(t, err)
+
+	// Test duplicate registration
+	err = registry.RegisterFilteringStrategy(filteringStrategy)
+	assert.Error(t, err)
+
+	// Test filtering strategy retrieval
+	retrievedStrategy, err := registry.GetFilteringStrategy("test-filtering")
+	assert.NoError(t, err)
+	assert.Equal(t, "test-filtering", retrievedStrategy.Name())
+
+	// Test non-existent strategy
+	_, err = registry.GetFilteringStrategy("non-existent")
+	assert.Error(t, err)
+
+	// Test sorting strategy registration
+	sortingStrategy := &dummyStrategy{name: "test-sorting"}
+	err = registry.RegisterSortingStrategy(sortingStrategy)
+	assert.NoError(t, err)
+
+	// Test strategy retrieval
+	retrievedSortingStrategy, err := registry.GetSortingStrategy("test-sorting")
+	assert.NoError(t, err)
+	assert.Equal(t, "test-sorting", retrievedSortingStrategy.Name())
+
+	// Test binding strategy registration
+	bindingStrategy := &dummyStrategy{name: "test-binding"}
+	err = registry.RegisterBindingStrategy(bindingStrategy)
+	assert.NoError(t, err)
+
+	// Test allocation strategy registration
+	allocatingStrategy := &dummyStrategy{name: "test-allocation"}
+	err = registry.RegisterAllocationStrategy(allocatingStrategy)
+	assert.NoError(t, err)
+
+	// Test allocation strategy retrieval
+	allocationStrategy, err := registry.GetAllocationStrategy("test-allocation")
+	assert.NoError(t, err)
+	assert.Equal(t, "test-allocation", allocationStrategy.Name())
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/allocation/generic.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/allocation/generic.go
new file mode 100644
index 0000000000..f5c55eaf2d
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/allocation/generic.go
@@ -0,0 +1,176 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package allocation
+
+import (
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate/registry"
+	"github.com/kubewharf/katalyst-core/pkg/util/general"
+)
+
+// GenericAllocationStrategy combines filtering, sorting, and binding strategies
+type GenericAllocationStrategy struct {
+	name                string
+	registry            *registry.StrategyRegistry
+	filteringStrategies []allocate.FilteringStrategy
+	sortingStrategy     allocate.SortingStrategy
+	bindingStrategy     allocate.BindingStrategy
+}
+
+// NewGenericAllocationStrategy creates a new allocation strategy with the given components
+func NewGenericAllocationStrategy(name string,
+	registry *registry.StrategyRegistry,
+	filtering []allocate.FilteringStrategy,
+	sorting allocate.SortingStrategy,
+	binding allocate.BindingStrategy,
+) *GenericAllocationStrategy {
+	return &GenericAllocationStrategy{
+		name:                name,
+		registry:            registry,
+		filteringStrategies: filtering,
+		sortingStrategy:     sorting,
+		bindingStrategy:     binding,
+	}
+}
+
+var _ allocate.AllocationStrategy = &GenericAllocationStrategy{}
+
+func (s *GenericAllocationStrategy) Name() string {
+	return s.name
+}
+
+func (s *GenericAllocationStrategy) Clone(name string) *GenericAllocationStrategy {
+	filteringStrategies := make([]allocate.FilteringStrategy, len(s.filteringStrategies))
+	copy(filteringStrategies, s.filteringStrategies)
+	return &GenericAllocationStrategy{
+		name:                name,
+		registry:            s.registry,
+		filteringStrategies: filteringStrategies,
+		sortingStrategy:     s.sortingStrategy,
+		bindingStrategy:     s.bindingStrategy,
+	}
+}
+
+// Allocate performs the allocation using the combined strategies
+func (s *GenericAllocationStrategy) Allocate(ctx *allocate.AllocationContext) (*allocate.AllocationResult, error) {
+	var err error
+	resourceName := ctx.DeviceReq.DeviceName
+	allAvailableDevices := append(ctx.DeviceReq.ReusableDevices, ctx.DeviceReq.AvailableDevices...)
+	// Apply filtering strategy
+	for _, fs := range s.getFilteringStrategies(ctx, resourceName) {
+		allAvailableDevices, err = fs.Filter(ctx, allAvailableDevices)
+		if err != nil {
+			return &allocate.AllocationResult{
+				Success:      false,
+				ErrorMessage: err.Error(),
+			}, err
+		}
+	}
+
+	// Apply sorting strategy
+	sortedDevices, err := s.getSortingStrategy(ctx, resourceName).Sort(ctx, allAvailableDevices)
+	if err != nil {
+		return &allocate.AllocationResult{
+			Success:      false,
+			ErrorMessage: err.Error(),
+		}, err
+	}
+
+	// Apply binding strategy
+	result, err := s.getBindingStrategy(ctx, resourceName).Bind(ctx, sortedDevices)
+	if err != nil {
+		return &allocate.AllocationResult{
+			Success:      false,
+			ErrorMessage: err.Error(),
+		}, err
+	}
+
+	return result, nil
+}
+
+// GetFilteringStrategy returns the filtering strategy
+func (s *GenericAllocationStrategy) GetFilteringStrategy() []allocate.FilteringStrategy {
+	return s.filteringStrategies
+}
+
+// SetFilteringStrategy sets the filtering strategy
+func (s *GenericAllocationStrategy) SetFilteringStrategy(filteringStrategies []allocate.FilteringStrategy) {
+	s.filteringStrategies = filteringStrategies
+}
+
+// GetSortingStrategy returns the sorting strategy
+func (s *GenericAllocationStrategy) GetSortingStrategy() allocate.SortingStrategy {
+	return s.sortingStrategy
+}
+
+// SetSortingStrategy sets the sorting strategy
+func (s *GenericAllocationStrategy) SetSortingStrategy(sortingStrategy allocate.SortingStrategy) {
+	s.sortingStrategy = sortingStrategy
+}
+
+// GetBindingStrategy returns the binding strategy
+func (s *GenericAllocationStrategy) GetBindingStrategy() allocate.BindingStrategy {
+	return s.bindingStrategy
+}
+
+// SetBindingStrategy sets the binding strategy
+func (s *GenericAllocationStrategy) SetBindingStrategy(bindingStrategy allocate.BindingStrategy) {
+	s.bindingStrategy = bindingStrategy
+}
+
+func (s *GenericAllocationStrategy) getFilteringStrategies(ctx *allocate.AllocationContext, resourceName string) []allocate.FilteringStrategy {
+	if strategyNames, ok := ctx.GPUQRMPluginConfig.CustomFilteringStrategies[resourceName]; ok {
+		filteringStrategies := make([]allocate.FilteringStrategy, len(strategyNames))
+		for _, fs := range strategyNames {
+			fs, err := s.registry.GetFilteringStrategy(fs)
+			if err != nil {
+				general.Errorf("failed to get filtering strategy %s: %v", fs, err)
+				continue
+			}
+			filteringStrategies = append(filteringStrategies, fs)
+		}
+		return filteringStrategies
+	} else {
+		return s.filteringStrategies
+	}
+}
+
+func (s *GenericAllocationStrategy) getSortingStrategy(ctx *allocate.AllocationContext, resourceName string) allocate.SortingStrategy {
+	if strategyName, ok := ctx.GPUQRMPluginConfig.CustomSortingStrategy[resourceName]; ok {
+		sortingStrategy, err := s.registry.GetSortingStrategy(strategyName)
+		if err != nil {
+			general.Errorf("failed to get sorting strategy %s: %v", strategyName, err)
+			sortingStrategy = s.sortingStrategy
+		}
+		return sortingStrategy
+	} else {
+		return s.sortingStrategy
+	}
+}
+
+func (s *GenericAllocationStrategy) getBindingStrategy(ctx *allocate.AllocationContext, resourceName string) allocate.BindingStrategy {
+	if strategyName, ok := ctx.GPUQRMPluginConfig.CustomBindingStrategy[resourceName]; ok {
+		bindingStrategy, err := s.registry.GetBindingStrategy(strategyName)
+		if err != nil {
+			general.Errorf("failed to get binding strategy %s: %v", strategyName, err)
+			bindingStrategy = s.bindingStrategy
+		}
+		return bindingStrategy
+	} else {
+		return s.bindingStrategy
+	}
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/canonical/bind.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/canonical/bind.go
new file mode 100644
index 0000000000..fb3c8c65e7
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/canonical/bind.go
@@ -0,0 +1,80 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package canonical
+
+import (
+	"fmt"
+
+	"k8s.io/apimachinery/pkg/util/sets"
+
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies"
+	"github.com/kubewharf/katalyst-core/pkg/util/general"
+)
+
+// Bind binds the sorted GPU devices to the allocation context
+// It creates allocation info for the selected devices
+func (s *CanonicalStrategy) Bind(
+	ctx *allocate.AllocationContext, sortedDevices []string,
+) (*allocate.AllocationResult, error) {
+	valid, errMsg := strategies.IsBindingContextValid(ctx, sortedDevices)
+	if !valid {
+		return &allocate.AllocationResult{
+			Success:      false,
+			ErrorMessage: errMsg,
+		}, fmt.Errorf(errMsg)
+	}
+
+	devicesToAllocate := int(ctx.DeviceReq.DeviceRequest)
+	allocatedDevices := sets.NewString()
+	allocateDevices := func(devices ...string) bool {
+		for _, device := range devices {
+			allocatedDevices.Insert(device)
+			if devicesToAllocate == allocatedDevices.Len() {
+				return true
+			}
+		}
+		return false
+	}
+
+	// First try to bind reusable devices
+	if allocateDevices(ctx.DeviceReq.ReusableDevices...) {
+		return &allocate.AllocationResult{
+			AllocatedDevices: allocatedDevices.UnsortedList(),
+			Success:          true,
+		}, nil
+	}
+
+	// Then try to bind devices from sorted list
+	if allocateDevices(sortedDevices...) {
+		general.InfoS("Successfully bound devices",
+			"podNamespace", ctx.ResourceReq.PodNamespace,
+			"podName", ctx.ResourceReq.PodName,
+			"containerName", ctx.ResourceReq.ContainerName,
+			"allocatedDevices", allocatedDevices.List())
+
+		return &allocate.AllocationResult{
+			AllocatedDevices: allocatedDevices.UnsortedList(),
+			Success:          true,
+		}, nil
+	}
+
+	return &allocate.AllocationResult{
+		Success:      false,
+		ErrorMessage: fmt.Sprintf("not enough devices: need %d, have %d", devicesToAllocate, len(sortedDevices)),
+	}, fmt.Errorf("not enough devices: need %d, have %d", devicesToAllocate, len(sortedDevices))
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/canonical/bind_test.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/canonical/bind_test.go
new file mode 100644
index 0000000000..6093729181
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/canonical/bind_test.go
@@ -0,0 +1,143 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package canonical
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate"
+	"github.com/kubewharf/katalyst-core/pkg/util/machine"
+)
+
+func TestCanonicalStrategy_Bind(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name           string
+		ctx            *allocate.AllocationContext
+		sortedDevices  []string
+		expectedResult *allocate.AllocationResult
+		expectedErr    bool
+	}{
+		{
+			name: "device topology is nil",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &v1alpha1.ResourceRequest{
+					PodNamespace:  "default",
+					PodName:       "podName",
+					ContainerName: "containerName",
+				},
+				DeviceReq: &v1alpha1.DeviceRequest{
+					DeviceRequest: 2,
+				},
+			},
+			sortedDevices: []string{"gpu-1", "gpu-2"},
+			expectedErr:   true,
+		},
+		{
+			name: "device request is greater than available devices",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &v1alpha1.ResourceRequest{
+					PodNamespace:  "default",
+					PodName:       "podName",
+					ContainerName: "containerName",
+				},
+				DeviceReq: &v1alpha1.DeviceRequest{
+					DeviceRequest: 4,
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {},
+						"gpu-2": {},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu-1", "gpu-2"},
+			expectedErr:   true,
+		},
+		{
+			name: "bind all the reusable devices first",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &v1alpha1.ResourceRequest{
+					PodNamespace:  "default",
+					PodName:       "podName",
+					ContainerName: "containerName",
+				},
+				DeviceReq: &v1alpha1.DeviceRequest{
+					DeviceRequest:   2,
+					ReusableDevices: []string{"gpu-1", "gpu-2"},
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {},
+						"gpu-2": {},
+						"gpu-3": {},
+						"gpu-4": {},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4"},
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-1", "gpu-2"},
+			},
+		},
+		{
+			name: "bind all the reusable devices first and then the available devices",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &v1alpha1.ResourceRequest{
+					PodNamespace:  "default",
+					PodName:       "podName",
+					ContainerName: "containerName",
+				},
+				DeviceReq: &v1alpha1.DeviceRequest{
+					DeviceRequest:   4,
+					ReusableDevices: []string{"gpu-1", "gpu-2"},
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {},
+						"gpu-2": {},
+						"gpu-3": {},
+						"gpu-4": {},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4"},
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4"},
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			canonicalStrategy := NewCanonicalStrategy()
+			result, err := canonicalStrategy.Bind(tt.ctx, tt.sortedDevices)
+			if tt.expectedErr {
+				assert.Error(t, err)
+			} else {
+				assert.NoError(t, err)
+				assert.ElementsMatch(t, tt.expectedResult.AllocatedDevices, result.AllocatedDevices)
+			}
+		})
+	}
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/canonical/canonical.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/canonical/canonical.go
new file mode 100644
index 0000000000..a63094487c
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/canonical/canonical.go
@@ -0,0 +1,43 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package canonical
+
+import (
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate"
+)
+
+const (
+	StrategyNameCanonical = "canonical"
+)
+
+// CanonicalStrategy binds GPU devices to the allocation context
+type CanonicalStrategy struct{}
+
+// NewCanonicalStrategy creates a new default binding strategy
+func NewCanonicalStrategy() *CanonicalStrategy {
+	return &CanonicalStrategy{}
+}
+
+var (
+	_ allocate.BindingStrategy   = &CanonicalStrategy{}
+	_ allocate.FilteringStrategy = &CanonicalStrategy{}
+)
+
+// Name returns the name of the binding strategy
+func (s *CanonicalStrategy) Name() string {
+	return StrategyNameCanonical
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/canonical/filter.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/canonical/filter.go
new file mode 100644
index 0000000000..7782527ae5
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/canonical/filter.go
@@ -0,0 +1,42 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package canonical
+
+import (
+	"k8s.io/apimachinery/pkg/util/sets"
+
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate"
+	gpuutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/util"
+)
+
+// Filter filters the available devices based on whether they are already occupied.
+// The assumption is that each device can only be allocated to one container at most.
+// It only returns devices that are not occupied yet.
+func (s *CanonicalStrategy) Filter(
+	ctx *allocate.AllocationContext, allAvailableDevices []string,
+) ([]string, error) {
+	filteredDevices := sets.NewString()
+	for _, device := range allAvailableDevices {
+		if !ctx.HintNodes.IsEmpty() && !gpuutil.IsNUMAAffinityDevice(device, ctx.DeviceTopology, ctx.HintNodes) {
+			continue
+		}
+
+		filteredDevices.Insert(device)
+	}
+
+	return filteredDevices.UnsortedList(), nil
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/canonical/filter_test.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/canonical/filter_test.go
new file mode 100644
index 0000000000..b53e40c3f3
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/canonical/filter_test.go
@@ -0,0 +1,85 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package canonical
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate"
+	"github.com/kubewharf/katalyst-core/pkg/util/machine"
+)
+
+func TestCanonicalStrategy_Filter(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name                    string
+		ctx                     *allocate.AllocationContext
+		availableDevices        []string
+		expectedFilteredDevices []string
+	}{
+		{
+			name: "empty hint nodes does not filter",
+			ctx: &allocate.AllocationContext{
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							NumaNodes: []int{0, 1},
+						},
+						"gpu-2": {
+							NumaNodes: []int{2, 3},
+						},
+					},
+				},
+			},
+			availableDevices:        []string{"gpu-1", "gpu-2"},
+			expectedFilteredDevices: []string{"gpu-1", "gpu-2"},
+		},
+		{
+			name: "filtered devices by hint nodes",
+			ctx: &allocate.AllocationContext{
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							NumaNodes: []int{0, 1},
+						},
+						"gpu-2": {
+							NumaNodes: []int{2, 3},
+						},
+					},
+				},
+				HintNodes: machine.NewCPUSet(0, 1, 2),
+			},
+			availableDevices:        []string{"gpu-1", "gpu-2"},
+			expectedFilteredDevices: []string{"gpu-1"},
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+
+			canonicalStrategy := NewCanonicalStrategy()
+			actualFilteredDevices, err := canonicalStrategy.Filter(tt.ctx, tt.availableDevices)
+			assert.NoError(t, err)
+			assert.ElementsMatch(t, tt.expectedFilteredDevices, actualFilteredDevices, "filtered devices are not equal")
+		})
+	}
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/deviceaffinity/bind.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/deviceaffinity/bind.go
new file mode 100644
index 0000000000..d71b992235
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/deviceaffinity/bind.go
@@ -0,0 +1,388 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package deviceaffinity
+
+import (
+	"fmt"
+	"sort"
+
+	"github.com/google/uuid"
+	"k8s.io/apimachinery/pkg/util/sets"
+
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies"
+	"github.com/kubewharf/katalyst-core/pkg/util/general"
+	"github.com/kubewharf/katalyst-core/pkg/util/machine"
+)
+
+// affinityGroup is a group of devices that have affinity to each other.
+// It is uniquely identified by an id.
+type affinityGroup struct {
+	id                 string
+	unallocatedDevices sets.String
+}
+
+// Bind binds the sorted devices to the allocation context by searching for the devices that have affinity to each other.
+func (s *DeviceAffinityStrategy) Bind(
+	ctx *allocate.AllocationContext, sortedDevices []string,
+) (*allocate.AllocationResult, error) {
+	general.InfoS("device affinity strategy binding called",
+		"available devices", sortedDevices)
+
+	valid, errMsg := strategies.IsBindingContextValid(ctx, sortedDevices)
+	if !valid {
+		return &allocate.AllocationResult{
+			Success:      false,
+			ErrorMessage: errMsg,
+		}, fmt.Errorf(errMsg)
+	}
+
+	devicesToAllocate := int(ctx.DeviceReq.DeviceRequest)
+	reusableDevicesSet := sets.NewString(ctx.DeviceReq.ReusableDevices...)
+
+	// All devices that are passed into the strategy are unallocated devices
+	unallocatedDevicesSet := sets.NewString(sortedDevices...)
+
+	// Get a map of affinity groups that is grouped by priority
+	affinityMap := ctx.DeviceTopology.GroupDeviceAffinity()
+
+	// Get affinity groups organized by priority level
+	affinityGroupsMap := s.getAffinityGroupsByPriority(affinityMap, unallocatedDevicesSet)
+
+	// Allocate reusable devices first
+	allocatedDevices, err := s.allocateCandidateDevices(affinityGroupsMap,
+		reusableDevicesSet.Intersection(unallocatedDevicesSet), devicesToAllocate, sets.NewString())
+	if err != nil {
+		return &allocate.AllocationResult{
+			Success:      false,
+			ErrorMessage: fmt.Sprintf("failed to allocate reusable devices: %v", err),
+		}, fmt.Errorf("failed to allocate reusable devices: %v", err)
+	}
+
+	if len(allocatedDevices) == devicesToAllocate {
+		return &allocate.AllocationResult{
+			Success:          true,
+			AllocatedDevices: allocatedDevices.UnsortedList(),
+		}, nil
+	}
+
+	// Next, allocate left available devices
+	availableDevices := unallocatedDevicesSet.Difference(allocatedDevices)
+	allocatedDevices, err = s.allocateCandidateDevices(affinityGroupsMap,
+		availableDevices, devicesToAllocate, allocatedDevices)
+	if err != nil {
+		return &allocate.AllocationResult{
+			Success:      false,
+			ErrorMessage: fmt.Sprintf("failed to allocate available devices with affinity: %v", err),
+		}, fmt.Errorf("failed to allocate available devices with affinity: %v", err)
+	}
+
+	// Return result once we have allocated all the devices
+	if len(allocatedDevices) == devicesToAllocate {
+		return &allocate.AllocationResult{
+			Success:          true,
+			AllocatedDevices: allocatedDevices.UnsortedList(),
+		}, nil
+	}
+
+	return &allocate.AllocationResult{
+		Success:      false,
+		ErrorMessage: fmt.Sprintf("not enough devices to allocate: need %d, have %d", devicesToAllocate, len(allocatedDevices)),
+	}, fmt.Errorf("not enough devices to allocate: need %d, have %d", devicesToAllocate, len(allocatedDevices))
+}
+
+// getAffinityGroupsByPriority forms a map of affinityGroup by priority.
+func (s *DeviceAffinityStrategy) getAffinityGroupsByPriority(
+	affinityMap map[machine.AffinityPriority][]machine.DeviceIDs, unallocatedDevicesSet sets.String,
+) map[machine.AffinityPriority][]affinityGroup {
+	affinityGroupsMap := make(map[machine.AffinityPriority][]affinityGroup)
+	for priority, affinityDevices := range affinityMap {
+		affinityGroupsMap[priority] = s.getAffinityGroups(affinityDevices, unallocatedDevicesSet)
+	}
+
+	return affinityGroupsMap
+}
+
+// getAffinityGroups forms a list of affinityGroup with unallocated devices.
+func (s *DeviceAffinityStrategy) getAffinityGroups(
+	affinityDevices []machine.DeviceIDs, unallocatedDevicesSet sets.String,
+) []affinityGroup {
+	affinityGroups := make([]affinityGroup, 0, len(affinityDevices))
+
+	// Calculate the number of unallocated devices for each affinity group
+	for _, devices := range affinityDevices {
+		unallocatedDevices := sets.NewString()
+		for _, device := range devices {
+			if unallocatedDevicesSet.Has(device) {
+				unallocatedDevices.Insert(device)
+			}
+		}
+		affinityGroups = append(affinityGroups, affinityGroup{
+			unallocatedDevices: unallocatedDevices,
+			id:                 uuid.NewString(),
+		})
+	}
+
+	return affinityGroups
+}
+
+// allocateCandidateDevices optimally allocates GPU devices based on affinity priorities.
+// This method implements a sophisticated allocation strategy that:
+// 1. Prioritizes device groups with higher affinity levels
+// 2. Minimizes fragmentation by selecting devices with strong mutual affinity
+// 3. Balances between fulfilling exact requirements and maintaining optimal groupings
+//
+// Parameters:
+//   - affinityGroupsMap: Mapping of affinity priorities to device groups with those priorities
+//   - candidateDevicesSet: Set of available devices that can be allocated
+//   - devicesToAllocate: Total number of devices that need to be allocated
+//   - allocatedDevices: Set of devices that have already been allocated in previous iterations
+//
+// Returns:
+//   - sets.String: The complete set of allocated devices after this allocation round
+//   - error: Any error encountered during the allocation process
+func (s *DeviceAffinityStrategy) allocateCandidateDevices(
+	affinityGroupsMap map[machine.AffinityPriority][]affinityGroup,
+	candidateDevicesSet sets.String,
+	devicesToAllocate int,
+	allocatedDevices sets.String,
+) (sets.String, error) {
+	// Early termination conditions
+	if len(allocatedDevices) == devicesToAllocate || len(candidateDevicesSet) == 0 {
+		return allocatedDevices, nil
+	}
+
+	// Calculate remaining devices needed
+	remainingDevicesToAllocate := devicesToAllocate - len(allocatedDevices)
+
+	// Fast path: If we need all remaining candidates, allocate them all
+	if remainingDevicesToAllocate >= len(candidateDevicesSet) {
+		allocatedDevices = allocatedDevices.Union(candidateDevicesSet)
+		return allocatedDevices, nil
+	}
+
+	// Process affinity groups from highest to lowest priority
+	for priority := 0; priority < len(affinityGroupsMap); priority++ {
+		affinityPriority := machine.AffinityPriority(priority)
+		affinityGroups, exists := affinityGroupsMap[affinityPriority]
+		if !exists || len(affinityGroups) == 0 {
+			continue
+		}
+
+		// Prepare group information for evaluation
+		groupInfos := s.prepareGroupInfos(affinityGroups, candidateDevicesSet, allocatedDevices)
+		if len(groupInfos) == 0 {
+			continue
+		}
+
+		// Sort groups by allocation suitability
+		s.sortGroupsByPriority(groupInfos, remainingDevicesToAllocate)
+
+		// Try to allocate from the best matching groups
+		if result, fullyAllocated := s.tryAllocateFromGroups(
+			groupInfos, remainingDevicesToAllocate, allocatedDevices, devicesToAllocate,
+		); fullyAllocated {
+			return result, nil
+		}
+
+		// For the lowest priority, use more flexible allocation strategies
+		if priority == len(affinityGroupsMap)-1 {
+			return s.handleLowestPriorityAllocation(
+				groupInfos, affinityGroupsMap, candidateDevicesSet,
+				devicesToAllocate, allocatedDevices, remainingDevicesToAllocate,
+			)
+		}
+	}
+
+	return allocatedDevices, nil
+}
+
+// prepareGroupInfos processes affinity groups and extracts relevant allocation information.
+// This helper method filters out groups with no candidate devices and calculates
+// the intersection between group devices and available candidates.
+func (s *DeviceAffinityStrategy) prepareGroupInfos(
+	affinityGroups []affinityGroup,
+	candidateDevicesSet sets.String,
+	allocatedDevices sets.String,
+) []groupInfo {
+	groupInfos := make([]groupInfo, 0, len(affinityGroups))
+
+	for _, group := range affinityGroups {
+		// Find devices in this group that are also candidates
+		candidates := group.unallocatedDevices.Intersection(candidateDevicesSet)
+		if candidates.Len() == 0 {
+			continue // Skip groups with no matching candidates
+		}
+
+		// Calculate unallocated and allocated device sets for this group
+		unallocated := group.unallocatedDevices.Difference(allocatedDevices)
+		if unallocated.Len() == 0 {
+			continue // Skip groups where all devices are already allocated
+		}
+
+		allocated := group.unallocatedDevices.Intersection(allocatedDevices)
+
+		groupInfos = append(groupInfos, groupInfo{
+			group:       group,
+			candidates:  candidates,
+			allocated:   allocated,
+			unallocated: unallocated,
+		})
+	}
+
+	return groupInfos
+}
+
+// sortGroupsByPriority sorts affinity groups based on allocation suitability.
+// The sorting criteria are:
+// 1. Proximity to the exact number of devices needed (closer is better)
+// 2. Total unallocated devices (smaller is better to minimize fragmentation)
+// 3. Already allocated devices (larger is better to maintain consistency)
+func (s *DeviceAffinityStrategy) sortGroupsByPriority(
+	groupInfos []groupInfo,
+	remainingDevicesToAllocate int,
+) {
+	sort.Slice(groupInfos, func(i, j int) bool {
+		// Calculate absolute difference from needed devices
+		diffI := abs(groupInfos[i].candidates.Len() - remainingDevicesToAllocate)
+		diffJ := abs(groupInfos[j].candidates.Len() - remainingDevicesToAllocate)
+
+		// Prefer groups closer to the exact number needed
+		if diffI != diffJ {
+			return diffI < diffJ
+		}
+
+		// Prefer groups with fewer unallocated devices to reduce fragmentation
+		if groupInfos[i].unallocated.Len() != groupInfos[j].unallocated.Len() {
+			return groupInfos[i].unallocated.Len() < groupInfos[j].unallocated.Len()
+		}
+
+		// Prefer groups with more already allocated devices for consistency
+		if groupInfos[i].allocated.Len() != groupInfos[j].allocated.Len() {
+			return groupInfos[i].allocated.Len() > groupInfos[j].allocated.Len()
+		}
+
+		return groupInfos[i].group.id < groupInfos[j].group.id
+	})
+}
+
+// tryAllocateFromGroups attempts to allocate devices from the prioritized groups.
+// It first tries to find an exact match, then falls back to partial allocations.
+func (s *DeviceAffinityStrategy) tryAllocateFromGroups(
+	groupInfos []groupInfo,
+	remainingDevicesToAllocate int,
+	allocatedDevices sets.String,
+	devicesToAllocate int,
+) (sets.String, bool) {
+	// Try to find groups that can exactly satisfy the remaining requirement
+	for _, group := range groupInfos {
+		// Check if this group can satisfy the exact remaining requirement and
+		// ensure affinity allocation if there are already allocated devices
+		if remainingDevicesToAllocate <= group.candidates.Len() &&
+			!(allocatedDevices.Len() > 0 && group.allocated.Len() <= 0) {
+
+			// Add all candidate devices from this group
+			for _, device := range group.candidates.List() {
+				allocatedDevices.Insert(device)
+				if len(allocatedDevices) == devicesToAllocate {
+					return allocatedDevices, true // Fully allocated
+				}
+			}
+			return allocatedDevices, true
+		}
+	}
+
+	return allocatedDevices, false // Not fully allocated
+}
+
+// handleLowestPriorityAllocation implements flexible allocation strategies for the lowest priority.
+// This method is more permissive in its allocation strategy to ensure device requirements are met.
+func (s *DeviceAffinityStrategy) handleLowestPriorityAllocation(
+	groupInfos []groupInfo,
+	affinityGroupsMap map[machine.AffinityPriority][]affinityGroup,
+	candidateDevicesSet sets.String,
+	devicesToAllocate int,
+	allocatedDevices sets.String,
+	remainingDevicesToAllocate int,
+) (sets.String, error) {
+	// First try to allocate entire groups that fit within the remaining requirement and
+	// ensure affinity allocation if there are already allocated devices
+	for _, group := range groupInfos {
+		if remainingDevicesToAllocate >= group.candidates.Len() &&
+			!(allocatedDevices.Len() > 0 && group.allocated.Len() <= 0) {
+
+			// Allocate all devices from this group
+			allocatedDevices = allocatedDevices.Union(group.candidates)
+
+			// Recursively allocate the remaining devices
+			return s.allocateCandidateDevices(
+				affinityGroupsMap,
+				candidateDevicesSet.Difference(group.candidates),
+				devicesToAllocate,
+				allocatedDevices,
+			)
+		}
+	}
+
+	// If no exact matches, try partial allocations from larger groups
+	for _, group := range groupInfos {
+		// Check if this group can contribute to the remaining requirement
+		if remainingDevicesToAllocate >= group.candidates.Len() {
+			// Allocate all devices from this group and continue
+			allocatedDevices = allocatedDevices.Union(group.candidates)
+
+			return s.allocateCandidateDevices(
+				affinityGroupsMap,
+				candidateDevicesSet.Difference(group.candidates),
+				devicesToAllocate,
+				allocatedDevices,
+			)
+		} else {
+			// Recursively allocate a subset of devices from this group
+			devices, err := s.allocateCandidateDevices(
+				affinityGroupsMap,
+				group.candidates,
+				remainingDevicesToAllocate,
+				group.allocated,
+			)
+			if err != nil {
+				return nil, err
+			}
+
+			return allocatedDevices.Union(devices), nil
+		}
+	}
+
+	return allocatedDevices, nil
+}
+
+// abs returns the absolute value of an integer.
+func abs(x int) int {
+	if x < 0 {
+		return -x
+	}
+	return x
+}
+
+// groupInfo contains pre-calculated information about an affinity group
+// to optimize the allocation process by avoiding repeated calculations.
+type groupInfo struct {
+	group       affinityGroup
+	candidates  sets.String // Devices in this group that are also candidates
+	allocated   sets.String // Devices in this group that are already allocated
+	unallocated sets.String // Devices in this group that are not yet allocated
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/deviceaffinity/bind_test.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/deviceaffinity/bind_test.go
new file mode 100644
index 0000000000..2dcec69535
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/deviceaffinity/bind_test.go
@@ -0,0 +1,2179 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package deviceaffinity
+
+import (
+	"reflect"
+	"sort"
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"github.com/google/go-cmp/cmp/cmpopts"
+	v1 "k8s.io/api/core/v1"
+	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate"
+	"github.com/kubewharf/katalyst-core/pkg/util/machine"
+)
+
+func TestBind_NumberOfDevicesAllocated(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name               string
+		ctx                *allocate.AllocationContext
+		sortedDevices      []string
+		expectedResult     *allocate.AllocationResult
+		expectedErr        bool
+		isRandom           bool
+		expectedResultSize int
+	}{
+		{
+			name: "able to allocate 1 device in affinity group of size 2",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: nil,
+					DeviceRequest:   1,
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-6"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-8"},
+							},
+						},
+						"gpu-8": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-7"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices:      []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+			isRandom:           true,
+			expectedResultSize: 1,
+		},
+		{
+			name: "able to allocate 2 devices in affinity group of size 2",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: nil,
+					DeviceRequest:   2,
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-6"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-8"},
+							},
+						},
+						"gpu-8": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-7"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices:      []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+			isRandom:           true,
+			expectedResultSize: 2,
+		},
+		{
+			name: "able to allocate 3 devices in affinity size of group 2",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: nil,
+					DeviceRequest:   3,
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices:      []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4"},
+			isRandom:           true,
+			expectedResultSize: 3,
+		},
+		{
+			name: "able to allocate 4 devices in affinity size of group 2",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: nil,
+					DeviceRequest:   4,
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4"},
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4"},
+				Success:          true,
+			},
+		},
+		{
+			name: "able to allocate 2 devices in affinity size of group 4",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: nil,
+					DeviceRequest:   2,
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-2", "gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-2", "gpu-3"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices:      []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4"},
+			isRandom:           true,
+			expectedResultSize: 2,
+		},
+		{
+			name: "allocate all reusable devices first",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: []string{"gpu-1", "gpu-2"},
+					DeviceRequest:   2,
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4"},
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-1", "gpu-2"},
+				Success:          true,
+			},
+		},
+		{
+			name: "allocate the reusable devices with the best affinity to each other first",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: []string{"gpu-1", "gpu-3", "gpu-4"},
+					DeviceRequest:   2, // should allocate gpu-3 and gpu-4
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu-1", "gpu-3", "gpu-4"},
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-3", "gpu-4"},
+				Success:          true,
+			},
+		},
+		{
+			name: "supports bin-packing of 1 allocated device",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: []string{"gpu-1", "gpu-3"}, // gpu-4 is already allocated, so we should allocate gpu-3 to support bin-packing
+					DeviceRequest:   1,
+				},
+				// Level 0: [gpu-1, gpu-2], [gpu-3, gpu-4]
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu-1", "gpu-2", "gpu-3"},
+			// gpu-4 is already allocated, so we allocate gpu-3 for bin-packing
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-3"},
+				Success:          true,
+			},
+		},
+		{
+			name: "supports bin-packing of 1 request with only available devices",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: nil,
+					DeviceRequest:   1,
+				},
+				// Level 0: [gpu-1, gpu-2], [gpu-3, gpu-4]
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-6"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-8"},
+							},
+						},
+						"gpu-8": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-7"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu1", "gpu2", "gpu-3", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-3"},
+				Success:          true,
+			},
+		},
+		{
+			name: "supports of bin-packing of 2 allocated devices",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: []string{"gpu-1", "gpu-3", "gpu-5", "gpu-6"}, // gpu-5 and gou-6 should be allocated because gpu-7 is already allocated and this supports bin-packing
+					DeviceRequest:   2,
+				},
+				// Level 0: [gpu-1, gpu-2, gpu-3, gpu-4], [gpu-5, gpu-6, gpu-7, gpu-8]
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-2", "gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-2", "gpu-3"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-6", "gpu-8"},
+							},
+						},
+						"gpu-8": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-6", "gpu-7"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-8"},
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-5", "gpu-6"},
+				Success:          true,
+			},
+		},
+		{
+			name: "bin-packing of more allocated devices are preferred over less allocated devices",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: []string{"gpu-2", "gpu-3", "gpu-6", "gpu-7", "gpu-9", "gpu-10"},
+					DeviceRequest:   4,
+				},
+				// Level 0: [gpu-1, gpu-2, gpu-3, gpu-4], [gpu-5, gpu-6, gpu-7, gpu-8], [gpu-9, gpu-10, gpu-11, gpu-12]
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-2", "gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-2", "gpu-3"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-6", "gpu-8"},
+							},
+						},
+						"gpu-8": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-6", "gpu-7"},
+							},
+						},
+						"gpu-9": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-10", "gpu-11", "gpu-12"},
+							},
+						},
+						"gpu-10": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-9", "gpu-11", "gpu-12"},
+							},
+						},
+						"gpu-11": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-9", "gpu-10", "gpu-12"},
+							},
+						},
+						"gpu-12": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-9", "gpu-10", "gpu-11"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu-2", "gpu-3", "gpu-6", "gpu-7", "gpu-8", "gpu-9", "gpu-10", "gpu-11", "gpu-12"},
+			// [gpu-1, gpu-2, gpu-3, gpu-4] already has 2 allocated devices, [gpu-5, gpu-6, gpu-7, gpu-8] has 1 allocated device, [gpu-9, gpu-10, gpu-11, gpu-12] has no allocated devices
+			// To support bin-packing, we will allocate gpu-2 and gpu-3 first, then allocate gpu-6 and gpu-7
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-2", "gpu-3", "gpu-6", "gpu-7"},
+				Success:          true,
+			},
+		},
+		{
+			name: "finds first level of device affinity, then finds second level of device affinity",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-5", "gpu-7"}, // gpu-1 and gpu-2 have affinity in 1st level, gpu-5 and gpu-7 have affinity in 2nd level
+					DeviceRequest:   4,
+				},
+				// Level 0: [gpu-1, gpu-2], [gpu-3, gpu-4], [gpu-5, gpu-6], [gpu-7, gpu-8]
+				// Level 1: [gpu-1, gpu-2, gpu-3, gpu-4], [gpu-5, gpu-6, gpu-7, gpu-8]
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+								1: {"gpu-2", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+								1: {"gpu-1", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+								1: {"gpu-1", "gpu-2", "gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+								1: {"gpu-1", "gpu-2", "gpu-3"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-6"},
+								1: {"gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5"},
+								1: {"gpu-5", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-8"},
+								1: {"gpu-5", "gpu-6", "gpu-8"},
+							},
+						},
+						"gpu-8": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-7"},
+								1: {"gpu-5", "gpu-6", "gpu-7"},
+							},
+						},
+					},
+				},
+			},
+			// Allocate gpu-1, gpu-2 in first level, then allocate gpu-3 as it has affinity with gpu-1 and gpu-2
+			// Then allocate gpu-5 as gpu-6 is already allocated
+			sortedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-5", "gpu-7", "gpu-8"},
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-5"},
+				Success:          true,
+			},
+		},
+		{
+			name: "allocate reusable devices first, then allocate available devices with affinity to the allocated reusable devices",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: []string{"gpu-1", "gpu-5"},
+					DeviceRequest:   4,
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-6"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-8"},
+							},
+						},
+						"gpu-8": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-7"},
+							},
+						},
+					},
+				},
+				MachineState: map[v1.ResourceName]state.AllocationMap{
+					"gpu": {
+						"gpu-1": {},
+						"gpu-2": {},
+						"gpu-3": {},
+						"gpu-4": {},
+						"gpu-5": {},
+						"gpu-6": {},
+						"gpu-7": {},
+						"gpu-8": {},
+					},
+				},
+			},
+			// gpu-1 and gpu-5 are already allocated as they are reusable
+			// gpu-2 and gpu-6 should be allocated as they have affinity to the already allocated gpu-1 and gpu-5
+			sortedDevices: []string{"gpu-1", "gpu-5", "gpu-2", "gpu-6", "gpu-7", "gpu-8", "gpu-3", "gpu-4"},
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-1", "gpu-2", "gpu-5", "gpu-6"},
+				Success:          true,
+			},
+		},
+		{
+			name: "after allocating available devices with affinity to reusable devices, still not enough devices, allocate more available devices",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: []string{"gpu-1", "gpu-5"},
+					DeviceRequest:   6,
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-6"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-8"},
+							},
+						},
+						"gpu-8": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-7"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7"},
+			// gpu-1 and gpu-5 are allocated because they are reusable devices
+			// gpu-2 and gpu-6 should be allocated as they have affinity to the already allocated gpu-1 and gpu-5
+			// gpu-3 and gpu-4 should be allocated as they have affinity to each other
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6"},
+				Success:          true,
+			},
+		},
+		{
+			name: "allocation of reusable devices in descending order of intersection size",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-9"},
+					DeviceRequest:   6,
+				},
+				// 1 level: [gpu-1, gpu-2, gpu-3, gpu-4], [gpu-5, gpu-6, gpu-7, gpu-8], [gpu-9, gpu-10, gpu-11, gpu-12]
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-2", "gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-2", "gpu-3"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-6", "gpu-8"},
+							},
+						},
+						"gpu-8": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-6", "gpu-7"},
+							},
+						},
+						"gpu-9": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-10", "gpu-11", "gpu-12"},
+							},
+						},
+						"gpu-10": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-9", "gpu-11", "gpu-12"},
+							},
+						},
+						"gpu-11": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-9", "gpu-10", "gpu-12"},
+							},
+						},
+						"gpu-12": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-9", "gpu-10", "gpu-11"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-9"},
+			// Should allocate gpu-1, gpu-2, gpu-3 and gpu-4 first because they have the most intersection size with an affinity group
+			// Followed by gpu-5 and gpu-6 as they have the second most intersection size with an affinity group
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6"},
+				Success:          true,
+			},
+		},
+		{
+			name: "allocation of available devices in descending order of intersection size after allocating all reusable devices",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: []string{"gpu-1", "gpu-5", "gpu-9", "gpu-10"},
+					DeviceRequest:   8,
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-2", "gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-2", "gpu-3"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-6", "gpu-8"},
+							},
+						},
+						"gpu-8": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-6", "gpu-7"},
+							},
+						},
+						"gpu-9": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-10", "gpu-11", "gpu-12"},
+							},
+						},
+						"gpu-10": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-9", "gpu-11", "gpu-12"},
+							},
+						},
+						"gpu-11": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-9", "gpu-10", "gpu-12"},
+							},
+						},
+						"gpu-12": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-9", "gpu-10", "gpu-11"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-7", "gpu-8", "gpu-9", "gpu-10", "gpu-11"},
+			// gpu-6 and gpu-12 is allocated
+			// Allocate gpu-1, gpu-5, gpu-9 and gpu-10 first because they are reusable devices
+			// Allocate gpu-2, gpu-3 and gpu-4 next because they have the most intersection with an affinity group
+			// Between (gpu-7, gpu-8) and (gpu-10, gpu-11), allocate gpu-10 and gpu-11 because the affinity group has fewer unallocated devices (bin-packing).
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-9", "gpu-10", "gpu-11"},
+				Success:          true,
+			},
+		},
+		{
+			name: "allocate available devices that have affinity with reusable devices from highest to lowest priority",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: []string{"gpu-1", "gpu-5"},
+					DeviceRequest:   6,
+				},
+				// Level 0: [gpu-1, gpu-2], [gpu-3, gpu-4], [gpu-5, gpu-6], [gpu-7, gpu-8], [gpu-9, gpu-10], [gpu-11, gpu-12]
+				// Level 1: [gpu-1, gpu-2, gpu-3, gpu-4], [gpu-5, gpu-6, gpu-7, gpu-8], [gpu-9, gpu-10, gpu-11, gpu-12]
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+								1: {"gpu-2", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+								1: {"gpu-1", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+								1: {"gpu-1", "gpu-2", "gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+								1: {"gpu-1", "gpu-2", "gpu-3"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-6"},
+								1: {"gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5"},
+								1: {"gpu-5", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-8"},
+								1: {"gpu-5", "gpu-6", "gpu-8"},
+							},
+						},
+						"gpu-8": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-7"},
+								1: {"gpu-5", "gpu-6", "gpu-7"},
+							},
+						},
+						"gpu-9": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-10"},
+								1: {"gpu-10", "gpu-11", "gpu-12"},
+							},
+						},
+						"gpu-10": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-9"},
+								1: {"gpu-9", "gpu-11", "gpu-12"},
+							},
+						},
+						"gpu-11": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-12"},
+								1: {"gpu-9", "gpu-10", "gpu-12"},
+							},
+						},
+						"gpu-12": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-11"},
+								1: {"gpu-9", "gpu-10", "gpu-11"},
+							},
+						},
+					},
+				},
+				MachineState: map[v1.ResourceName]state.AllocationMap{
+					"gpu": {
+						"gpu-1": {},
+						"gpu-2": {},
+						"gpu-3": {},
+						"gpu-4": {},
+						"gpu-5": {},
+						"gpu-6": {},
+						"gpu-7": {},
+						"gpu-8": {},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-5", "gpu-6", "gpu-7", "gpu-8", "gpu-9", "gpu-10", "gpu-11", "gpu-12"},
+			// Allocate gpu-1 and gpu-5 first because they are reusable devices
+			// Allocate gpu-2 next because they have affinity with gpu-1 at the highest affinity priority (level 0)
+			// Allocate gpu-6 and gpu-8 next because they have affinity with gpu-5 at the next highest affinity priority (level 1)
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-1", "gpu-2", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+				Success:          true,
+			},
+		},
+		{
+			name: "allocation of odd number of devices",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: []string{"gpu-1", "gpu-3", "gpu-5"},
+					DeviceRequest:   5,
+				},
+				// Level 0: [gpu-1, gpu-2], [gpu-3, gpu-4], [gpu-5, gpu-6], [gpu-7, gpu-8]
+				// Level 1: [gpu-1, gpu-2, gpu-3, gpu-4], [gpu-5, gpu-6, gpu-7, gpu-8]
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+								1: {"gpu-2", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+								1: {"gpu-1", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+								1: {"gpu-1", "gpu-2", "gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+								1: {"gpu-1", "gpu-2", "gpu-3"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-6"},
+								1: {"gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5"},
+								1: {"gpu-5", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-8"},
+								1: {"gpu-5", "gpu-6", "gpu-8"},
+							},
+						},
+						"gpu-8": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-7"},
+								1: {"gpu-5", "gpu-6", "gpu-7"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-7", "gpu-8"},
+			// Allocate gpu-1, gpu-3 and gpu-5 first because they are reusable devices
+			// Allocate gpu-2 and gpu-4 next because they have affinity with gpu-1 and gpu-3 at the highest affinity priority (level 0)
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5"},
+				Success:          true,
+			},
+		},
+		{
+			name: "allocation of available devices for 1st level of affinity priority if there are no reusable devices",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: []string{},
+					DeviceRequest:   2,
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+								1: {"gpu-2", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+								1: {"gpu-1", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+								1: {"gpu-1", "gpu-2", "gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+								1: {"gpu-1", "gpu-2", "gpu-3"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu-1", "gpu-3", "gpu-4"},
+			// No reusable devices to allocate.
+			// Allocate gpu-1 and gpu-2 because they have the best affinity to each other.
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-3", "gpu-4"},
+				Success:          true,
+			},
+		},
+		{
+			name: "when first priority level is not able to determine an allocation, go to the next priority level to allocate",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: []string{"gpu-1", "gpu-2", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+					DeviceRequest:   4,
+				},
+				// Level 0: [gpu-1, gpu-2], [gpu-3, gpu-4], [gpu-5, gpu-6], [gpu-7, gpu-8]
+				// Level 1: [gpu-1, gpu-2, gpu-3, gpu-4], [gpu-5, gpu-6, gpu-7, gpu-8]
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+								1: {"gpu-2", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+								1: {"gpu-1", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+								1: {"gpu-1", "gpu-2", "gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+								1: {"gpu-1", "gpu-2", "gpu-3"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-6"},
+								1: {"gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5"},
+								1: {"gpu-5", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-8"},
+								1: {"gpu-5", "gpu-6", "gpu-8"},
+							},
+						},
+						"gpu-8": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-7"},
+								1: {"gpu-5", "gpu-6", "gpu-7"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu-1", "gpu-2", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+			// All of the above devices are reusable devices
+			// At first priority level, they all give an intersection size of 2, but there are 6 of them, and we only need 4 of them
+			// Since there is another priority level, we go to that priority level to get the best device affinity
+			// (gpu-1 and gpu-2), (gpu-5 and gpu-6), (gpu-7, gpu-8) are affinity groups at priority level 0
+			// But (gpu-5, gpu-6, gpu-7, gpu-8) are affinity groups at priority level 1, so we allocate gpu-5, gpu-6, gpu-7, gpu-8
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+				Success:          true,
+			},
+		},
+		{
+			name: "reusable devices are bin-packed, so we allocate the remaining available devices without considering affinity with the reusable devices",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: []string{"gpu-1", "gpu-4"},
+					DeviceRequest:   4,
+				},
+				// Level 0: [gpu-1, gpu-2], [gpu-3, gpu-4], [gpu-5, gpu-6], [gpu-7, gpu-8]
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-6"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-8"},
+							},
+						},
+						"gpu-8": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-7"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu-1", "gpu-4", "gpu-5", "gpu-7", "gpu-8"},
+			// gpu-1 and gpu-4 are allocated because they are reusable devices.
+			// gpu-2 and gpu-3 are already allocated so we cannot find any affinity to the reusable devices.
+			// allocate gpu-7 and gpu-8 because they have affinity to one another.
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-1", "gpu-4", "gpu-7", "gpu-8"},
+				Success:          true,
+			},
+		},
+		{
+			name: "insufficient devices to allocate, causing an error",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: []string{"gpu-1", "gpu-2"},
+					DeviceRequest:   4,
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu-1", "gpu-2"},
+			// gpu-3 and gpu-4 are already allocated
+			// gpu-1 and gpu-2 are not enough to satisfy the request
+			expectedResult: &allocate.AllocationResult{
+				Success: false,
+			},
+			expectedErr: true,
+		},
+		{
+			name: "if there is another priority level, allocate only the max intersection and then go to the next priority level and allocate",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: []string{"gpu-1", "gpu-5", "gpu-6", "gpu-7"},
+					DeviceRequest:   3,
+				},
+				// Level 0: [gpu-1, gpu-2], [gpu-3, gpu-4], [gpu-5, gpu-6], [gpu-7, gpu-8]
+				// Level 1: [gpu-1, gpu-2, gpu-3, gpu-4], [gpu-5, gpu-6, gpu-7, gpu-8]
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+								1: {"gpu-2", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+								1: {"gpu-1", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+								1: {"gpu-1", "gpu-2", "gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+								1: {"gpu-1", "gpu-2", "gpu-3"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-6"},
+								1: {"gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5"},
+								1: {"gpu-5", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-8"},
+								1: {"gpu-5", "gpu-6", "gpu-8"},
+							},
+						},
+						"gpu-8": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-7"},
+								1: {"gpu-5", "gpu-6", "gpu-7"},
+							},
+						},
+					},
+				},
+				MachineState: map[v1.ResourceName]state.AllocationMap{
+					"gpu": {
+						"gpu-1": {},
+						"gpu-2": {},
+						"gpu-3": {},
+						"gpu-4": {},
+						"gpu-5": {},
+						"gpu-6": {},
+						"gpu-7": {},
+						"gpu-8": {},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu-1", "gpu-5", "gpu-6", "gpu-7"},
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-5", "gpu-6", "gpu-7"},
+				Success:          true,
+			},
+		},
+		{
+			name: "4 devices in affinity priority 0, 8 devices in affinity priority 1, allocate 8 devices",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: nil,
+					DeviceRequest:   2,
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"0": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-2", "gpu-3", "gpu-4"},
+								1: {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-2", "gpu-3", "gpu-4"},
+								1: {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-2", "gpu-3", "gpu-4"},
+								1: {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-2", "gpu-3", "gpu-4"},
+								1: {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+								1: {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+								1: {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+								1: {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-8": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+								1: {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-9": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-9", "gpu-10", "gpu-11", "gpu-12"},
+								1: {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+							},
+						},
+						"gpu-10": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-9", "gpu-10", "gpu-11", "gpu-12"},
+								1: {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+							},
+						},
+						"gpu-11": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-9", "gpu-10", "gpu-11", "gpu-12"},
+								1: {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+							},
+						},
+						"gpu-12": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-9", "gpu-10", "gpu-11", "gpu-12"},
+								1: {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+							},
+						},
+						"gpu-13": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+								1: {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+							},
+						},
+						"gpu-14": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+								1: {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+							},
+						},
+						"gpu-15": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+								1: {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+							},
+						},
+						"gpu-16": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+								1: {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices:      []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+			isRandom:           true,
+			expectedResultSize: 2,
+		},
+		{
+			name: "allocate first level of affinity priority devices first, then second level of affinity priority, both have affinity to reusable devices",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: []string{"gpu-0"},
+					DeviceRequest:   3,
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-0": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+								1: {"gpu-1", "gpu-4", "gpu-5"},
+							},
+						},
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-0"},
+								1: {"gpu-0", "gpu-4", "gpu-5"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+								1: {"gpu-3", "gpu-6", "gpu-7"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+								1: {"gpu-2", "gpu-6", "gpu-7"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5"},
+								1: {"gpu-0", "gpu-1", "gpu-5"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+								1: {"gpu-0", "gpu-1", "gpu-4"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-7"},
+								1: {"gpu-2", "gpu-3", "gpu-7"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-6"},
+								1: {"gpu-2", "gpu-3", "gpu-6"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices: []string{"gpu-0", "gpu-1", "gpu-2", "gpu-4", "gpu-6", "gpu-7"},
+			expectedResult: &allocate.AllocationResult{
+				AllocatedDevices: []string{"gpu-0", "gpu-1", "gpu-4"},
+				Success:          true,
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			deviceBindingStrategy := NewDeviceAffinityStrategy()
+			result, err := deviceBindingStrategy.Bind(tt.ctx, tt.sortedDevices)
+			if (err != nil) != tt.expectedErr {
+				t.Errorf("Bind() error = %v, expectedErr %v", err, tt.expectedErr)
+			}
+			verifyAllocationResult(t, result, tt.expectedResult, tt.isRandom, tt.expectedResultSize)
+		})
+	}
+}
+
+func TestBind_DeviceAffinity(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name                          string
+		ctx                           *allocate.AllocationContext
+		sortedDevices                 []string
+		expectedErr                   bool
+		expectedAffinityPriorityLevel int
+	}{
+		{
+			name: "1 level of device affinity, 2 devices in a group",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: nil,
+					DeviceRequest:   2,
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-6"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-8"},
+							},
+						},
+						"gpu-8": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-7"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices:                 []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+			expectedAffinityPriorityLevel: 0,
+		},
+		{
+			name: "2 devices in affinity priority 0, 4 devices in affinity priority 1",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: nil,
+					DeviceRequest:   4,
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2"},
+								1: {"gpu-2", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1"},
+								1: {"gpu-1", "gpu-3", "gpu-4"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-4"},
+								1: {"gpu-1", "gpu-2", "gpu-4"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-3"},
+								1: {"gpu-1", "gpu-2", "gpu-3"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-6"},
+								1: {"gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5"},
+								1: {"gpu-5", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-8"},
+								1: {"gpu-5", "gpu-6", "gpu-8"},
+							},
+						},
+						"gpu-8": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-7"},
+								1: {"gpu-5", "gpu-6", "gpu-7"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices:                 []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+			expectedAffinityPriorityLevel: 1,
+		},
+		{
+			name: "4 devices in affinity priority 0, 8 devices in affinity priority 1, allocate 4 devices",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8", "gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+					DeviceRequest:   4,
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2", "gpu-3", "gpu-4"},
+								1: {"gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-3", "gpu-4"},
+								1: {"gpu-1", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-2", "gpu-4"},
+								1: {"gpu-1", "gpu-2", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-2", "gpu-3"},
+								1: {"gpu-1", "gpu-2", "gpu-3", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-6", "gpu-7", "gpu-8"},
+								1: {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-7", "gpu-8"},
+								1: {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-6", "gpu-8"},
+								1: {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-8"},
+							},
+						},
+						"gpu-8": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-6", "gpu-7"},
+								1: {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7"},
+							},
+						},
+						"gpu-9": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-10", "gpu-11", "gpu-12"},
+								1: {"gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+							},
+						},
+						"gpu-10": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-9", "gpu-11", "gpu-12"},
+								1: {"gpu-9", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+							},
+						},
+						"gpu-11": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-9", "gpu-10", "gpu-12"},
+								1: {"gpu-9", "gpu-10", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+							},
+						},
+						"gpu-12": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-9", "gpu-10", "gpu-11"},
+								1: {"gpu-9", "gpu-10", "gpu-11", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+							},
+						},
+						"gpu-13": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-14", "gpu-15", "gpu-16"},
+								1: {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-14", "gpu-15", "gpu-16"},
+							},
+						},
+						"gpu-14": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-13", "gpu-15", "gpu-16"},
+								1: {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-15", "gpu-16"},
+							},
+						},
+						"gpu-15": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-13", "gpu-14", "gpu-16"},
+								1: {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-16"},
+							},
+						},
+						"gpu-16": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-13", "gpu-14", "gpu-15"},
+								1: {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices:                 []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8", "gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+			expectedAffinityPriorityLevel: 0,
+		},
+		{
+			name: "4 devices in affinity priority 0, 8 devices in affinity priority 1, allocate 8 devices",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &pluginapi.ResourceRequest{
+					PodUid:        "pod-1",
+					ContainerName: "container-1",
+				},
+				DeviceReq: &pluginapi.DeviceRequest{
+					DeviceName:      "gpu",
+					ReusableDevices: []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8", "gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+					DeviceRequest:   8,
+				},
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-2", "gpu-3", "gpu-4"},
+								1: {"gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-2": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-3", "gpu-4"},
+								1: {"gpu-1", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-3": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-2", "gpu-4"},
+								1: {"gpu-1", "gpu-2", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-4": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-1", "gpu-2", "gpu-3"},
+								1: {"gpu-1", "gpu-2", "gpu-3", "gpu-5", "gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-5": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-6", "gpu-7", "gpu-8"},
+								1: {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-6", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-6": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-7", "gpu-8"},
+								1: {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-7", "gpu-8"},
+							},
+						},
+						"gpu-7": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-6", "gpu-8"},
+								1: {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-8"},
+							},
+						},
+						"gpu-8": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-5", "gpu-6", "gpu-7"},
+								1: {"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7"},
+							},
+						},
+						"gpu-9": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-10", "gpu-11", "gpu-12"},
+								1: {"gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+							},
+						},
+						"gpu-10": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-9", "gpu-11", "gpu-12"},
+								1: {"gpu-9", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+							},
+						},
+						"gpu-11": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-9", "gpu-10", "gpu-12"},
+								1: {"gpu-9", "gpu-10", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+							},
+						},
+						"gpu-12": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-9", "gpu-10", "gpu-11"},
+								1: {"gpu-9", "gpu-10", "gpu-11", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+							},
+						},
+						"gpu-13": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-14", "gpu-15", "gpu-16"},
+								1: {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-14", "gpu-15", "gpu-16"},
+							},
+						},
+						"gpu-14": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-13", "gpu-15", "gpu-16"},
+								1: {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-15", "gpu-16"},
+							},
+						},
+						"gpu-15": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-13", "gpu-14", "gpu-16"},
+								1: {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-16"},
+							},
+						},
+						"gpu-16": {
+							DeviceAffinity: map[machine.AffinityPriority]machine.DeviceIDs{
+								0: {"gpu-13", "gpu-14", "gpu-15"},
+								1: {"gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15"},
+							},
+						},
+					},
+				},
+			},
+			sortedDevices:                 []string{"gpu-1", "gpu-2", "gpu-3", "gpu-4", "gpu-5", "gpu-6", "gpu-7", "gpu-8", "gpu-9", "gpu-10", "gpu-11", "gpu-12", "gpu-13", "gpu-14", "gpu-15", "gpu-16"},
+			expectedAffinityPriorityLevel: 1,
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			deviceBindingStrategy := NewDeviceAffinityStrategy()
+			result, err := deviceBindingStrategy.Bind(tt.ctx, tt.sortedDevices)
+			if (err != nil) != tt.expectedErr {
+				t.Errorf("Bind() error = %v, expectedErr %v", err, tt.expectedErr)
+			}
+
+			verifyResultIsAffinity(t, result, tt.ctx.DeviceTopology, tt.expectedAffinityPriorityLevel)
+		})
+	}
+}
+
+func verifyAllocationResult(
+	t *testing.T, result *allocate.AllocationResult, expectedResult *allocate.AllocationResult, isRandom bool,
+	expectedResultSize int,
+) {
+	if isRandom {
+		if len(result.AllocatedDevices) != expectedResultSize {
+			t.Errorf("result.AllocatedDevices = %v, expectedResultSize = %v", result.AllocatedDevices, expectedResultSize)
+		}
+		return
+	}
+	if (result == nil) != (expectedResult == nil) {
+		t.Errorf("result = %v, expectedResult = %v", result, expectedResult)
+		return
+	}
+	if result.Success != expectedResult.Success {
+		t.Errorf("result.Success = %v, expectedResult.Success = %v", result.Success, expectedResult.Success)
+		return
+	}
+	if len(result.AllocatedDevices) != len(expectedResult.AllocatedDevices) {
+		t.Errorf("result.AllocatedDevices = %v, expectedResult.AllocatedDevices = %v", result.AllocatedDevices, expectedResult.AllocatedDevices)
+		return
+	}
+	if diff := cmp.Diff(result.AllocatedDevices, expectedResult.AllocatedDevices,
+		cmpopts.SortSlices(func(a, b string) bool { return a < b }),
+	); diff != "" {
+		t.Errorf("Bind() mismatch (-got +want):\n%s", diff)
+	}
+}
+
+func verifyResultIsAffinity(
+	t *testing.T, result *allocate.AllocationResult, topology *machine.DeviceTopology,
+	expectedAffinityPriorityLevel int,
+) {
+	affinityMap := topology.GroupDeviceAffinity()
+	priorityLevelDevices := affinityMap[machine.AffinityPriority(expectedAffinityPriorityLevel)]
+
+	sort.Slice(result.AllocatedDevices, func(i, j int) bool {
+		return result.AllocatedDevices[i] < result.AllocatedDevices[j]
+	})
+
+	for _, deviceIDs := range priorityLevelDevices {
+		sort.Slice(deviceIDs, func(i, j int) bool { return deviceIDs[i] < deviceIDs[j] })
+		if reflect.DeepEqual(deviceIDs, machine.DeviceIDs(result.AllocatedDevices)) {
+			return
+		}
+	}
+
+	t.Errorf("result = %v, did not find it within an affinity group", result.AllocatedDevices)
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/deviceaffinity/device_affinity.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/deviceaffinity/device_affinity.go
new file mode 100644
index 0000000000..d637483ea9
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/deviceaffinity/device_affinity.go
@@ -0,0 +1,40 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package deviceaffinity
+
+import (
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate"
+)
+
+const (
+	StrategyNameDeviceAffinity = "deviceAffinity"
+)
+
+// DeviceAffinityStrategy knows how to bind devices that have affinity to each other
+type DeviceAffinityStrategy struct{}
+
+// NewDeviceAffinityStrategy creates a new device affinity strategy with the given canonical strategy
+func NewDeviceAffinityStrategy() *DeviceAffinityStrategy {
+	return &DeviceAffinityStrategy{}
+}
+
+var _ allocate.BindingStrategy = &DeviceAffinityStrategy{}
+
+// Name returns the name of the binding strategy
+func (s *DeviceAffinityStrategy) Name() string {
+	return StrategyNameDeviceAffinity
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/filter.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/filter.go
new file mode 100644
index 0000000000..56c65fcd71
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/filter.go
@@ -0,0 +1,78 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gpu_memory
+
+import (
+	"fmt"
+
+	"k8s.io/apimachinery/pkg/util/sets"
+
+	"github.com/kubewharf/katalyst-api/pkg/consts"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
+	"github.com/kubewharf/katalyst-core/pkg/util/general"
+)
+
+// Filter filters the available GPU devices based on available GPU memory
+// It returns devices that have enough available memory for the request
+func (s *GPUMemoryStrategy) Filter(ctx *allocate.AllocationContext, allAvailableDevices []string) ([]string, error) {
+	if ctx.DeviceTopology == nil {
+		return nil, fmt.Errorf("GPU topology is nil")
+	}
+
+	_, gpuMemory, err := util.GetQuantityFromResourceRequests(ctx.ResourceReq.ResourceRequests, string(consts.ResourceGPUMemory), false)
+	if err != nil {
+		general.Warningf("getReqQuantityFromResourceReq failed with error: %v, use default available devices", err)
+		return allAvailableDevices, nil
+	}
+
+	if gpuMemory == 0 {
+		general.Infof("GPU Memory is 0, use default available devices")
+		return allAvailableDevices, nil
+	}
+
+	filteredDevices, err := s.filterGPUDevices(ctx, gpuMemory, allAvailableDevices)
+	if err != nil {
+		return nil, err
+	}
+
+	return filteredDevices, nil
+}
+
+func (s *GPUMemoryStrategy) filterGPUDevices(
+	ctx *allocate.AllocationContext,
+	gpuMemoryRequest float64,
+	allAvailableDevices []string,
+) ([]string, error) {
+	gpuRequest := ctx.DeviceReq.GetDeviceRequest()
+	gpuMemoryPerGPU := gpuMemoryRequest / float64(gpuRequest)
+	gpuMemoryAllocatablePerGPU := float64(ctx.GPUQRMPluginConfig.GPUMemoryAllocatablePerGPU.Value())
+
+	machineState := ctx.MachineState[consts.ResourceGPUMemory]
+	filteredDevices := sets.NewString()
+	for _, device := range allAvailableDevices {
+		if !machineState.IsRequestSatisfied(device, gpuMemoryPerGPU, gpuMemoryAllocatablePerGPU) {
+			general.Warningf("must include gpu %s has enough memory to allocate, gpuMemoryAllocatable: %f, gpuMemoryAllocated: %f, gpuMemoryPerGPU: %f",
+				device, gpuMemoryAllocatablePerGPU, machineState.GetQuantityAllocated(device), gpuMemoryPerGPU)
+			continue
+		}
+
+		filteredDevices.Insert(device)
+	}
+
+	return filteredDevices.UnsortedList(), nil
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/filter_test.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/filter_test.go
new file mode 100644
index 0000000000..4b206afc7d
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/filter_test.go
@@ -0,0 +1,220 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gpu_memory
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	"k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	"github.com/kubewharf/katalyst-api/pkg/consts"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate"
+	"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm"
+	"github.com/kubewharf/katalyst-core/pkg/util/machine"
+)
+
+func TestGPUMemoryStrategy_Filter(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name                    string
+		ctx                     *allocate.AllocationContext
+		availableDevices        []string
+		expectedFilteredDevices []string
+		expectedErr             bool
+	}{
+		{
+			name: "gpu topology is nil",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &v1alpha1.ResourceRequest{
+					ResourceRequests: map[string]float64{
+						string(consts.ResourceGPUMemory): 4,
+					},
+				},
+				DeviceReq: &v1alpha1.DeviceRequest{
+					DeviceRequest: 2,
+				},
+				GPUQRMPluginConfig: &qrm.GPUQRMPluginConfig{
+					GPUMemoryAllocatablePerGPU: *resource.NewQuantity(2, resource.DecimalSI),
+				},
+			},
+			expectedErr: true,
+		},
+		{
+			name: "gpu memory does not exist, just allocate every device",
+			ctx: &allocate.AllocationContext{
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-0": {},
+						"gpu-1": {},
+						"gpu-2": {},
+					},
+				},
+				ResourceReq: &v1alpha1.ResourceRequest{
+					ResourceRequests: map[string]float64{
+						string(consts.ResourceMemoryBandwidth): 4,
+					},
+				},
+				DeviceReq: &v1alpha1.DeviceRequest{
+					DeviceRequest: 2,
+				},
+				GPUQRMPluginConfig: &qrm.GPUQRMPluginConfig{
+					GPUMemoryAllocatablePerGPU: *resource.NewQuantity(2, resource.DecimalSI),
+				},
+			},
+			availableDevices:        []string{"gpu-0", "gpu-1", "gpu-2"},
+			expectedFilteredDevices: []string{"gpu-0", "gpu-1", "gpu-2"},
+		},
+		{
+			name: "gpu memory is 0, so we use all the available devices",
+			ctx: &allocate.AllocationContext{
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-0": {},
+						"gpu-1": {},
+						"gpu-2": {},
+					},
+				},
+				ResourceReq: &v1alpha1.ResourceRequest{
+					ResourceRequests: map[string]float64{
+						string(consts.ResourceGPUMemory): 0,
+					},
+				},
+				DeviceReq: &v1alpha1.DeviceRequest{
+					DeviceRequest: 2,
+				},
+				GPUQRMPluginConfig: &qrm.GPUQRMPluginConfig{
+					GPUMemoryAllocatablePerGPU: *resource.NewQuantity(2, resource.DecimalSI),
+				},
+			},
+			availableDevices:        []string{"gpu-0", "gpu-1", "gpu-2"},
+			expectedFilteredDevices: []string{"gpu-0", "gpu-1", "gpu-2"},
+		},
+		{
+			name: "allocate available devices with available gpu memory",
+			ctx: &allocate.AllocationContext{
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-0": {},
+						"gpu-1": {},
+						"gpu-2": {},
+					},
+				},
+				ResourceReq: &v1alpha1.ResourceRequest{
+					ResourceRequests: map[string]float64{
+						string(consts.ResourceGPUMemory): 4,
+					},
+				},
+				DeviceReq: &v1alpha1.DeviceRequest{
+					DeviceRequest: 2,
+				},
+				GPUQRMPluginConfig: &qrm.GPUQRMPluginConfig{
+					GPUMemoryAllocatablePerGPU: *resource.NewQuantity(2, resource.DecimalSI),
+				},
+				MachineState: map[v1.ResourceName]state.AllocationMap{
+					consts.ResourceGPUMemory: {
+						"gpu-0": {},
+						"gpu-1": {},
+						"gpu-2": {},
+					},
+				},
+			},
+			availableDevices:        []string{"gpu-0", "gpu-1", "gpu-2"},
+			expectedFilteredDevices: []string{"gpu-0", "gpu-1", "gpu-2"},
+		},
+		{
+			name: "exclude devices with not enough gpu memory",
+			ctx: &allocate.AllocationContext{
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-0": {},
+						"gpu-1": {},
+						"gpu-2": {},
+						"gpu-3": {},
+					},
+				},
+				ResourceReq: &v1alpha1.ResourceRequest{
+					ResourceRequests: map[string]float64{
+						string(consts.ResourceGPUMemory): 4,
+					},
+				},
+				DeviceReq: &v1alpha1.DeviceRequest{
+					DeviceRequest: 2,
+				},
+				GPUQRMPluginConfig: &qrm.GPUQRMPluginConfig{
+					GPUMemoryAllocatablePerGPU: *resource.NewQuantity(2, resource.DecimalSI),
+				},
+				MachineState: map[v1.ResourceName]state.AllocationMap{
+					consts.ResourceGPUMemory: {
+						// 2 GB allocated
+						"gpu-0": {
+							PodEntries: map[string]state.ContainerEntries{
+								"pod-0": {
+									"container-0": &state.AllocationInfo{
+										AllocatedAllocation: state.Allocation{
+											Quantity: 2,
+										},
+									},
+								},
+							},
+						},
+						"gpu-1": {},
+						"gpu-2": {},
+						"gpu-3": {
+							// 1 GB allocated
+							PodEntries: map[string]state.ContainerEntries{
+								"pod-1": {
+									"container-0": &state.AllocationInfo{
+										AllocatedAllocation: state.Allocation{
+											Quantity: 0.5,
+										},
+									},
+									"container-1": &state.AllocationInfo{
+										AllocatedAllocation: state.Allocation{
+											Quantity: 0.5,
+										},
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			availableDevices:        []string{"gpu-0", "gpu-1", "gpu-2", "gpu-3"},
+			expectedFilteredDevices: []string{"gpu-1", "gpu-2"},
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			strategy := NewGPUMemoryStrategy()
+			filteredDevices, err := strategy.Filter(tt.ctx, tt.availableDevices)
+			if tt.expectedErr {
+				assert.Error(t, err)
+			} else {
+				assert.NoError(t, err)
+				assert.ElementsMatch(t, tt.expectedFilteredDevices, filteredDevices)
+			}
+		})
+	}
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/gpu_memory.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/gpu_memory.go
new file mode 100644
index 0000000000..06d0881eab
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/gpu_memory.go
@@ -0,0 +1,41 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gpu_memory
+
+import "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate"
+
+const (
+	StrategyNameGPUMemory = "gpu-memory"
+)
+
+// GPUMemoryStrategy filters GPU devices based on available GPU memory
+type GPUMemoryStrategy struct{}
+
+var (
+	_ allocate.FilteringStrategy = &GPUMemoryStrategy{}
+	_ allocate.SortingStrategy   = &GPUMemoryStrategy{}
+)
+
+// NewGPUMemoryStrategy creates a new GPU memory filtering strategy
+func NewGPUMemoryStrategy() *GPUMemoryStrategy {
+	return &GPUMemoryStrategy{}
+}
+
+// Name returns the name of the filtering strategy
+func (s *GPUMemoryStrategy) Name() string {
+	return StrategyNameGPUMemory
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/sort.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/sort.go
new file mode 100644
index 0000000000..e4cfaef234
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/sort.go
@@ -0,0 +1,88 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gpu_memory
+
+import (
+	"fmt"
+	"sort"
+
+	"github.com/kubewharf/katalyst-api/pkg/consts"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/util"
+	qrmutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
+	"github.com/kubewharf/katalyst-core/pkg/util/general"
+)
+
+// Sort sorts the filtered GPU devices based on available GPU memory
+// It prioritizes devices with less available memory and considers NUMA affinity
+func (s *GPUMemoryStrategy) Sort(ctx *allocate.AllocationContext, filteredDevices []string) ([]string, error) {
+	if ctx.DeviceTopology == nil {
+		return nil, fmt.Errorf("GPU topology is nil")
+	}
+
+	_, gpuMemory, err := qrmutil.GetQuantityFromResourceRequests(ctx.ResourceReq.ResourceRequests, string(consts.ResourceGPUMemory), false)
+	if err != nil {
+		general.Warningf("getReqQuantityFromResourceReq failed with error: %v, use default filtered devices", err)
+		return filteredDevices, nil
+	}
+
+	if gpuMemory == 0 {
+		general.Infof("GPU Memory is 0, use default available devices")
+		return filteredDevices, nil
+	}
+
+	gpuMemoryAllocatablePerGPU := float64(ctx.GPUQRMPluginConfig.GPUMemoryAllocatablePerGPU.Value())
+	machineState := ctx.MachineState[consts.ResourceGPUMemory]
+
+	// Create a slice of device info with available memory
+	type deviceInfo struct {
+		ID              string
+		AvailableMemory float64
+		NUMAAffinity    bool
+	}
+
+	devices := make([]deviceInfo, 0, len(filteredDevices))
+
+	for _, deviceID := range filteredDevices {
+		availableMemory := gpuMemoryAllocatablePerGPU - machineState.GetQuantityAllocated(deviceID)
+		devices = append(devices, deviceInfo{
+			ID:              deviceID,
+			AvailableMemory: availableMemory,
+			NUMAAffinity:    util.IsNUMAAffinityDevice(deviceID, ctx.DeviceTopology, ctx.HintNodes),
+		})
+	}
+
+	// Sort devices: first by NUMA affinity (preferred), then by available memory (ascending)
+	sort.Slice(devices, func(i, j int) bool {
+		// If both devices have NUMA affinity or both don't, sort by available memory
+		if devices[i].NUMAAffinity == devices[j].NUMAAffinity {
+			return devices[i].AvailableMemory < devices[j].AvailableMemory
+		}
+
+		// Prefer devices with NUMA affinity
+		return devices[i].NUMAAffinity && !devices[j].NUMAAffinity
+	})
+
+	// Extract sorted device IDs
+	sortedDevices := make([]string, len(devices))
+	for i, device := range devices {
+		sortedDevices[i] = device.ID
+	}
+
+	general.InfoS("Sorted devices", "count", len(sortedDevices), "devices", sortedDevices)
+	return sortedDevices, nil
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/sort_test.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/sort_test.go
new file mode 100644
index 0000000000..3511861ba8
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/gpu_memory/sort_test.go
@@ -0,0 +1,178 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gpu_memory
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	"k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	"github.com/kubewharf/katalyst-api/pkg/consts"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state"
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate"
+	"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm"
+	"github.com/kubewharf/katalyst-core/pkg/util/machine"
+)
+
+func TestGPUMemoryStrategy_Sort(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name                  string
+		ctx                   *allocate.AllocationContext
+		filteredDevices       []string
+		expectedSortedDevices []string
+		expectedErr           bool
+	}{
+		{
+			name: "nil gpu topology",
+			ctx: &allocate.AllocationContext{
+				ResourceReq: &v1alpha1.ResourceRequest{
+					ResourceRequests: map[string]float64{
+						string(consts.ResourceGPUMemory): 1,
+					},
+				},
+			},
+			filteredDevices: []string{"gpu-1", "gpu-2"},
+			expectedErr:     true,
+		},
+		{
+			name: "gpu memory is 0 returns all available devices without sorting",
+			ctx: &allocate.AllocationContext{
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {},
+						"gpu-2": {},
+					},
+				},
+				ResourceReq: &v1alpha1.ResourceRequest{
+					ResourceRequests: map[string]float64{
+						string(consts.ResourceGPUMemory): 0,
+					},
+				},
+			},
+			filteredDevices:       []string{"gpu-1", "gpu-2"},
+			expectedSortedDevices: []string{"gpu-1", "gpu-2"},
+		},
+		{
+			name: "devices are sorted by NUMA affinity first",
+			ctx: &allocate.AllocationContext{
+				DeviceTopology: &machine.DeviceTopology{
+					// gpu-1 has NUMA affinity but gpu-2 does not
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							NumaNodes: []int{1},
+						},
+						"gpu-2": {
+							NumaNodes: []int{0},
+						},
+					},
+				},
+				ResourceReq: &v1alpha1.ResourceRequest{
+					ResourceRequests: map[string]float64{
+						string(consts.ResourceGPUMemory): 1,
+					},
+				},
+				GPUQRMPluginConfig: &qrm.GPUQRMPluginConfig{
+					GPUMemoryAllocatablePerGPU: *resource.NewQuantity(2, resource.DecimalSI),
+				},
+				HintNodes: machine.NewCPUSet(0),
+			},
+			filteredDevices:       []string{"gpu-1", "gpu-2"},
+			expectedSortedDevices: []string{"gpu-2", "gpu-1"},
+		},
+		{
+			name: "for devices with NUMA affinity, they are sorted by available memory in ascending order",
+			ctx: &allocate.AllocationContext{
+				DeviceTopology: &machine.DeviceTopology{
+					Devices: map[string]machine.DeviceInfo{
+						"gpu-1": {
+							NumaNodes: []int{0},
+						},
+						"gpu-2": {
+							NumaNodes: []int{1},
+						},
+						"gpu-3": {
+							NumaNodes: []int{2},
+						},
+					},
+				},
+				ResourceReq: &v1alpha1.ResourceRequest{
+					ResourceRequests: map[string]float64{
+						string(consts.ResourceGPUMemory): 1,
+					},
+				},
+				GPUQRMPluginConfig: &qrm.GPUQRMPluginConfig{
+					GPUMemoryAllocatablePerGPU: *resource.NewQuantity(4, resource.DecimalSI),
+				},
+				HintNodes: machine.NewCPUSet(0, 1),
+				MachineState: map[v1.ResourceName]state.AllocationMap{
+					consts.ResourceGPUMemory: {
+						"gpu-1": {
+							PodEntries: map[string]state.ContainerEntries{
+								"pod-0": {
+									"container-0": {
+										AllocatedAllocation: state.Allocation{
+											Quantity: 1,
+										},
+									},
+								},
+							},
+						},
+						"gpu-2": {
+							PodEntries: map[string]state.ContainerEntries{
+								"pod-1": {
+									"container-1": {
+										AllocatedAllocation: state.Allocation{
+											Quantity: 1,
+										},
+									},
+									"container-2": {
+										AllocatedAllocation: state.Allocation{
+											Quantity: 1,
+										},
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			filteredDevices:       []string{"gpu-1", "gpu-2", "gpu-3"},
+			expectedSortedDevices: []string{"gpu-2", "gpu-1", "gpu-3"},
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			strategy := NewGPUMemoryStrategy()
+			sortedDevices, err := strategy.Sort(tt.ctx, tt.filteredDevices)
+
+			if tt.expectedErr {
+				assert.Error(t, err)
+			} else {
+				assert.NoError(t, err)
+				assert.Equal(t, tt.expectedSortedDevices, sortedDevices)
+			}
+		})
+	}
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/util.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/util.go
new file mode 100644
index 0000000000..fb5eeaa5e1
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/strategies/util.go
@@ -0,0 +1,38 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package strategies
+
+import (
+	"fmt"
+
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/strategy/allocate"
+)
+
+// IsBindingContextValid checks if the context given for binding is valid
+func IsBindingContextValid(ctx *allocate.AllocationContext, sortedDevices []string) (bool, string) {
+	if ctx.DeviceTopology == nil {
+		return false, "GPU topology is nil"
+	}
+
+	// Determine how many devices to allocate
+	devicesToAllocate := int(ctx.DeviceReq.DeviceRequest)
+	if devicesToAllocate > len(sortedDevices) {
+		return false, fmt.Sprintf("not enough devices: need %d, have %d", devicesToAllocate, len(sortedDevices))
+	}
+
+	return true, ""
+}
diff --git a/pkg/agent/qrm-plugins/gpu/strategy/allocate/types.go b/pkg/agent/qrm-plugins/gpu/strategy/allocate/types.go
new file mode 100644
index 0000000000..09d5c9b574
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/strategy/allocate/types.go
@@ -0,0 +1,85 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package allocate
+
+import (
+	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/gpu/state"
+	"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm"
+	"github.com/kubewharf/katalyst-core/pkg/metaserver"
+	"github.com/kubewharf/katalyst-core/pkg/metrics"
+	"github.com/kubewharf/katalyst-core/pkg/util/machine"
+)
+
+// AllocationContext contains all the information needed for GPU allocation
+type AllocationContext struct {
+	ResourceReq        *pluginapi.ResourceRequest
+	DeviceReq          *pluginapi.DeviceRequest
+	DeviceTopology     *machine.DeviceTopology
+	GPUQRMPluginConfig *qrm.GPUQRMPluginConfig
+	Emitter            metrics.MetricEmitter
+	MetaServer         *metaserver.MetaServer
+	MachineState       state.AllocationResourcesMap
+	QoSLevel           string
+	HintNodes          machine.CPUSet
+}
+
+// AllocationResult contains the result of GPU allocation
+type AllocationResult struct {
+	AllocatedDevices []string
+	Success          bool
+	ErrorMessage     string
+}
+
+// FilteringStrategy defines the interface for filtering GPU devices
+type FilteringStrategy interface {
+	// Name returns the name of the filtering strategy
+	Name() string
+
+	// Filter filters the available GPU devices based on the allocation context
+	// Returns a list of filtered device IDs
+	Filter(ctx *AllocationContext, allAvailableDevices []string) ([]string, error)
+}
+
+// SortingStrategy defines the interface for sorting GPU devices
+type SortingStrategy interface {
+	// Name returns the name of the sorting strategy
+	Name() string
+
+	// Sort sorts the filtered GPU devices based on the allocation context
+	// Returns a prioritized list of device IDs
+	Sort(ctx *AllocationContext, filteredDevices []string) ([]string, error)
+}
+
+// BindingStrategy defines the interface for binding GPU devices
+type BindingStrategy interface {
+	// Name returns the name of the binding strategy
+	Name() string
+
+	// Bind binds the sorted GPU devices to the allocation context
+	// Returns the final allocation result
+	Bind(ctx *AllocationContext, sortedDevices []string) (*AllocationResult, error)
+}
+
+type AllocationStrategy interface {
+	// Name returns the name of the allocation strategy
+	Name() string
+
+	// Allocate performs the allocation using the combined strategies
+	Allocate(ctx *AllocationContext) (*AllocationResult, error)
+}
diff --git a/pkg/agent/qrm-plugins/gpu/util/util.go b/pkg/agent/qrm-plugins/gpu/util/util.go
new file mode 100644
index 0000000000..c8c1331593
--- /dev/null
+++ b/pkg/agent/qrm-plugins/gpu/util/util.go
@@ -0,0 +1,100 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package util
+
+import (
+	"fmt"
+	"math"
+
+	pkgerrors "github.com/pkg/errors"
+	"k8s.io/apimachinery/pkg/api/errors"
+	"k8s.io/apimachinery/pkg/util/sets"
+	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
+
+	qrmutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
+	"github.com/kubewharf/katalyst-core/pkg/util/general"
+	"github.com/kubewharf/katalyst-core/pkg/util/machine"
+)
+
+var ErrNoAvailableGPUMemoryHints = pkgerrors.New("no available gpu memory hints")
+
+func GetNUMANodesCountToFitGPUReq(
+	gpuReq float64, cpuTopology *machine.CPUTopology, gpuTopology *machine.DeviceTopology,
+) (int, int, error) {
+	if gpuTopology == nil {
+		return 0, 0, fmt.Errorf("GetNUMANodesCountToFitGPUReq got nil gpuTopology")
+	}
+
+	numaCount := cpuTopology.NumNUMANodes
+	if numaCount == 0 {
+		return 0, 0, fmt.Errorf("there is no NUMA in cpuTopology")
+	}
+
+	if len(gpuTopology.Devices)%numaCount != 0 {
+		general.Warningf("GPUs count %d cannot be evenly divisible by NUMA count %d", len(gpuTopology.Devices), numaCount)
+	}
+
+	gpusPerNUMA := (len(gpuTopology.Devices) + numaCount - 1) / numaCount
+	numaCountNeeded := int(math.Ceil(gpuReq / float64(gpusPerNUMA)))
+	if numaCountNeeded == 0 {
+		numaCountNeeded = 1
+	}
+	if numaCountNeeded > numaCount {
+		return 0, 0, fmt.Errorf("invalid gpu req: %.3f in topology with NUMAs count: %d and GPUs count: %d", gpuReq, numaCount, len(gpuTopology.Devices))
+	}
+
+	gpusCountNeededPerNUMA := int(math.Ceil(gpuReq / float64(numaCountNeeded)))
+	return numaCountNeeded, gpusCountNeededPerNUMA, nil
+}
+
+func IsNUMAAffinityDevice(
+	device string, deviceTopology *machine.DeviceTopology, hintNodes machine.CPUSet,
+) bool {
+	info, ok := deviceTopology.Devices[device]
+	if !ok {
+		general.Errorf("failed to find device info for device %s", device)
+		return false
+	}
+
+	return machine.NewCPUSet(info.GetNUMANodes()...).IsSubsetOf(hintNodes)
+}
+
+// GetGPUCount extracts GPU count from resource request
+func GetGPUCount(req *pluginapi.ResourceRequest, deviceNames []string) (float64, sets.String, error) {
+	gpuCount := float64(0)
+	gpuNames := sets.NewString()
+
+	for _, resourceName := range deviceNames {
+		_, request, err := qrmutil.GetQuantityFromResourceRequests(req.ResourceRequests, resourceName, false)
+		if err != nil && !errors.IsNotFound(err) {
+			return 0, nil, err
+		}
+
+		if request == 0 {
+			continue
+		}
+
+		gpuCount += request
+		gpuNames.Insert(resourceName)
+	}
+
+	if gpuCount == 0 {
+		return 0, gpuNames, fmt.Errorf("no available GPU count")
+	}
+
+	return gpuCount, gpuNames, nil
+}
diff --git a/pkg/agent/qrm-plugins/io/staticpolicy/policy.go b/pkg/agent/qrm-plugins/io/staticpolicy/policy.go
index f0e11f9d91..39c943b217 100644
--- a/pkg/agent/qrm-plugins/io/staticpolicy/policy.go
+++ b/pkg/agent/qrm-plugins/io/staticpolicy/policy.go
@@ -48,6 +48,7 @@ const (
 // StaticPolicy is the static io policy
 type StaticPolicy struct {
 	sync.Mutex
+	pluginapi.UnimplementedResourcePluginServer
 
 	name      string
 	stopCh    chan struct{}
diff --git a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go
index 6db20be45e..9b5a83f67f 100644
--- a/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go
+++ b/pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go
@@ -96,6 +96,7 @@ const (
 
 type DynamicPolicy struct {
 	sync.RWMutex
+	pluginapi.UnimplementedResourcePluginServer
 
 	stopCh                  chan struct{}
 	started                 bool
diff --git a/pkg/agent/qrm-plugins/network/state/state_net.go b/pkg/agent/qrm-plugins/network/state/state_mem.go
similarity index 100%
rename from pkg/agent/qrm-plugins/network/state/state_net.go
rename to pkg/agent/qrm-plugins/network/state/state_mem.go
diff --git a/pkg/agent/qrm-plugins/network/staticpolicy/policy.go b/pkg/agent/qrm-plugins/network/staticpolicy/policy.go
index 328984c592..ece3b452a9 100644
--- a/pkg/agent/qrm-plugins/network/staticpolicy/policy.go
+++ b/pkg/agent/qrm-plugins/network/staticpolicy/policy.go
@@ -78,6 +78,7 @@ const (
 // StaticPolicy is the static network policy
 type StaticPolicy struct {
 	sync.Mutex
+	pluginapi.UnimplementedResourcePluginServer
 
 	name           string
 	stopCh         chan struct{}
diff --git a/pkg/agent/qrm-plugins/util/util.go b/pkg/agent/qrm-plugins/util/util.go
index 5eaca3dfd7..99158b1f16 100644
--- a/pkg/agent/qrm-plugins/util/util.go
+++ b/pkg/agent/qrm-plugins/util/util.go
@@ -25,6 +25,8 @@ import (
 	"strings"
 
 	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
+	"k8s.io/apimachinery/pkg/runtime/schema"
 	"k8s.io/klog/v2"
 	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
 
@@ -43,27 +45,32 @@ func GetQuantityFromResourceReq(req *pluginapi.ResourceRequest) (int, float64, e
 		return 0, 0, fmt.Errorf("invalid req.ResourceRequests length: %d", len(req.ResourceRequests))
 	}
 
-	for key := range req.ResourceRequests {
-		switch key {
-		case string(v1.ResourceCPU):
-			return general.Max(int(math.Ceil(req.ResourceRequests[key])), 0), req.ResourceRequests[key], nil
-		case string(apiconsts.ReclaimedResourceMilliCPU):
-			return general.Max(int(math.Ceil(req.ResourceRequests[key]/1000.0)), 0), req.ResourceRequests[key] / 1000.0, nil
-		case string(v1.ResourceMemory), string(apiconsts.ReclaimedResourceMemory):
-			return general.Max(int(math.Ceil(req.ResourceRequests[key])), 0), req.ResourceRequests[key], nil
-		case string(apiconsts.ResourceNetBandwidth):
-			if req.Annotations[PodAnnotationQuantityFromQRMDeclarationKey] == PodAnnotationQuantityFromQRMDeclarationTrue {
-				general.Infof("detect %s: %s, return %s: 0 instead of %s: %.2f",
-					PodAnnotationQuantityFromQRMDeclarationKey, PodAnnotationQuantityFromQRMDeclarationTrue, key, key, req.ResourceRequests[key])
-				return 0, 0, nil
-			}
-			return general.Max(int(math.Ceil(req.ResourceRequests[key])), 0), req.ResourceRequests[key], nil
-		default:
-			return 0, 0, fmt.Errorf("invalid request resource name: %s", key)
+	return GetQuantityFromResourceRequests(req.ResourceRequests, req.ResourceName, IsQuantityFromQRMDeclaration(req.Annotations))
+}
+
+func GetQuantityFromResourceRequests(resourceRequests map[string]float64, resourceName string, isQuantityFromQRMDeclaration bool) (int, float64, error) {
+	quantity, ok := resourceRequests[resourceName]
+	if !ok {
+		return 0, 0, errors.NewNotFound(schema.GroupResource{}, resourceName)
+	}
+
+	switch resourceName {
+	case string(apiconsts.ReclaimedResourceMilliCPU):
+		return general.Max(int(math.Ceil(quantity/1000.0)), 0), quantity / 1000.0, nil
+	case string(apiconsts.ResourceNetBandwidth):
+		if isQuantityFromQRMDeclaration {
+			general.Infof("detect %s: %s, return %s: 0 instead of %s: %.2f",
+				PodAnnotationQuantityFromQRMDeclarationKey, PodAnnotationQuantityFromQRMDeclarationTrue, resourceName, resourceName, quantity)
+			return 0, 0, nil
 		}
+		return general.Max(int(math.Ceil(quantity)), 0), quantity, nil
+	default:
+		return general.Max(int(math.Ceil(quantity)), 0), quantity, nil
 	}
+}
 
-	return 0, 0, fmt.Errorf("unexpected end")
+func IsQuantityFromQRMDeclaration(podAnnotations map[string]string) bool {
+	return podAnnotations[PodAnnotationQuantityFromQRMDeclarationKey] == PodAnnotationQuantityFromQRMDeclarationTrue
 }
 
 // IsDebugPod returns true if the pod annotations show up any configurable debug key
@@ -371,3 +378,20 @@ func GetPodAggregatedRequestResource(req *pluginapi.ResourceRequest) (int, float
 		return 0, 0, fmt.Errorf("not support resource name: %s", req.ResourceName)
 	}
 }
+
+// CreateEmptyAllocationResponse creates an empty allocation response
+func CreateEmptyAllocationResponse(resourceReq *pluginapi.ResourceRequest, resourceName string) *pluginapi.ResourceAllocationResponse {
+	return &pluginapi.ResourceAllocationResponse{
+		PodUid:         resourceReq.PodUid,
+		PodNamespace:   resourceReq.PodNamespace,
+		PodName:        resourceReq.PodName,
+		ContainerName:  resourceReq.ContainerName,
+		ContainerType:  resourceReq.ContainerType,
+		ContainerIndex: resourceReq.ContainerIndex,
+		PodRole:        resourceReq.PodRole,
+		PodType:        resourceReq.PodType,
+		ResourceName:   resourceName,
+		Labels:         general.DeepCopyMap(resourceReq.Labels),
+		Annotations:    general.DeepCopyMap(resourceReq.Annotations),
+	}
+}
diff --git a/pkg/agent/qrm-plugins/util/util_test.go b/pkg/agent/qrm-plugins/util/util_test.go
index ddcd12377c..fe8f85a0f5 100644
--- a/pkg/agent/qrm-plugins/util/util_test.go
+++ b/pkg/agent/qrm-plugins/util/util_test.go
@@ -17,12 +17,13 @@ limitations under the License.
 package util
 
 import (
-	"fmt"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
+	"k8s.io/apimachinery/pkg/runtime/schema"
 	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
 
 	"github.com/kubewharf/katalyst-api/pkg/consts"
@@ -41,6 +42,7 @@ func TestGetQuantityFromResourceReq(t *testing.T) {
 	}{
 		{
 			req: &pluginapi.ResourceRequest{
+				ResourceName: string(v1.ResourceCPU),
 				ResourceRequests: map[string]float64{
 					string(v1.ResourceCPU): 123,
 				},
@@ -49,6 +51,7 @@ func TestGetQuantityFromResourceReq(t *testing.T) {
 		},
 		{
 			req: &pluginapi.ResourceRequest{
+				ResourceName: string(consts.ReclaimedResourceMilliCPU),
 				ResourceRequests: map[string]float64{
 					string(consts.ReclaimedResourceMilliCPU): 234001,
 				},
@@ -57,6 +60,7 @@ func TestGetQuantityFromResourceReq(t *testing.T) {
 		},
 		{
 			req: &pluginapi.ResourceRequest{
+				ResourceName: string(v1.ResourceMemory),
 				ResourceRequests: map[string]float64{
 					string(v1.ResourceMemory): 256,
 				},
@@ -65,6 +69,7 @@ func TestGetQuantityFromResourceReq(t *testing.T) {
 		},
 		{
 			req: &pluginapi.ResourceRequest{
+				ResourceName: string(consts.ReclaimedResourceMemory),
 				ResourceRequests: map[string]float64{
 					string(consts.ReclaimedResourceMemory): 1345,
 				},
@@ -73,18 +78,19 @@ func TestGetQuantityFromResourceReq(t *testing.T) {
 		},
 		{
 			req: &pluginapi.ResourceRequest{
+				ResourceName: string(v1.ResourceCPU),
 				ResourceRequests: map[string]float64{
 					"test": 1345,
 				},
 			},
-			err: fmt.Errorf("invalid request resource name: %s", "test"),
+			err: errors.NewNotFound(schema.GroupResource{}, string(v1.ResourceCPU)),
 		},
 	}
 
 	for _, tc := range testCases {
 		res, _, err := GetQuantityFromResourceReq(tc.req)
 		if tc.err != nil {
-			as.NotNil(err)
+			as.Equal(tc.err, err)
 		} else {
 			as.EqualValues(tc.result, res)
 		}
diff --git a/pkg/agent/resourcemanager/fetcher/kubelet/topology/topology_adapter.go b/pkg/agent/resourcemanager/fetcher/kubelet/topology/topology_adapter.go
index 48b47588f8..15d3b4d870 100644
--- a/pkg/agent/resourcemanager/fetcher/kubelet/topology/topology_adapter.go
+++ b/pkg/agent/resourcemanager/fetcher/kubelet/topology/topology_adapter.go
@@ -349,8 +349,8 @@ func (p *topologyAdapterImpl) Run(ctx context.Context, handler func()) error {
 
 // validatePodResourcesServerResponse validate pod resources server response, if the resource is empty,
 // maybe the kubelet or qrm plugin is restarting
-func (p *topologyAdapterImpl) validatePodResourcesServerResponse(allocatableResourcesResponse *podresv1.
-	AllocatableResourcesResponse, listPodResourcesResponse *podresv1.ListPodResourcesResponse,
+func (p *topologyAdapterImpl) validatePodResourcesServerResponse(allocatableResourcesResponse *podresv1.AllocatableResourcesResponse,
+	listPodResourcesResponse *podresv1.ListPodResourcesResponse,
 ) error {
 	if len(p.needValidationResources) > 0 {
 		if allocatableResourcesResponse == nil {
@@ -396,6 +396,11 @@ func (p *topologyAdapterImpl) addNumaSocketChildrenZoneNodes(generator *util.Top
 				continue
 			}
 
+			if parentZoneNode == nil {
+				// skip the resource which doesn't have parent zone node
+				continue
+			}
+
 			err = generator.AddNode(parentZoneNode, zoneNode)
 			if err != nil {
 				errList = append(errList, err)
@@ -1133,6 +1138,12 @@ func (p *topologyAdapterImpl) generateZoneNode(quantity podresv1.TopologyAwareQu
 			},
 		}
 
+		if identifier, ok := quantity.Annotations[apiconsts.ResourceAnnotationKeyResourceIdentifier]; ok && len(identifier) == 0 {
+			// if quantity has resource identifier annotation, but it is empty, it means it is unique and the parent zone node
+			// already exists, we can just return the zone node and nil parent zone node
+			return zoneNode, nil, nil
+		}
+
 		switch quantity.TopologyLevel {
 		case podresv1.TopologyLevel_NUMA:
 			parentZoneNode := util.GenerateNumaZoneNode(nodeID)
@@ -1141,7 +1152,9 @@ func (p *topologyAdapterImpl) generateZoneNode(quantity podresv1.TopologyAwareQu
 			parentZoneNode := util.GenerateSocketZoneNode(nodeID)
 			return zoneNode, &parentZoneNode, nil
 		default:
-			return zoneNode, nil, fmt.Errorf("quantity %v unsupport topology level: %s", quantity, quantity.TopologyLevel)
+			// if quantity topology level is not numa or socket, it means that the zone is a child of socket or numa,
+			// and the zone node is determined by the quantity name or its resource identifier if existed.
+			return zoneNode, nil, nil
 		}
 	}
 }
diff --git a/pkg/config/agent/qrm/gpu_plugin.go b/pkg/config/agent/qrm/gpu_plugin.go
new file mode 100644
index 0000000000..f610910083
--- /dev/null
+++ b/pkg/config/agent/qrm/gpu_plugin.go
@@ -0,0 +1,44 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package qrm
+
+import (
+	"k8s.io/apimachinery/pkg/api/resource"
+
+	"github.com/kubewharf/katalyst-core/pkg/config/agent/qrm/gpustrategy"
+)
+
+type GPUQRMPluginConfig struct {
+	// PolicyName is used to switch between several strategies
+	PolicyName string
+	// GPUDeviceNames is the names of the GPU device
+	GPUDeviceNames []string
+	// RDMADeviceNames is the names of the RDMA device
+	RDMADeviceNames []string
+	// GPUMemoryAllocatablePerGPU is the total memory allocatable for each GPU
+	GPUMemoryAllocatablePerGPU resource.Quantity
+	// SkipGPUStateCorruption skip gpu state corruption, and it will be used after updating state properties
+	SkipGPUStateCorruption bool
+
+	*gpustrategy.GPUStrategyConfig
+}
+
+func NewGPUQRMPluginConfig() *GPUQRMPluginConfig {
+	return &GPUQRMPluginConfig{
+		GPUStrategyConfig: gpustrategy.NewGPUStrategyConfig(),
+	}
+}
diff --git a/pkg/config/agent/qrm/gpustrategy/allocate.go b/pkg/config/agent/qrm/gpustrategy/allocate.go
new file mode 100644
index 0000000000..2653b1ecb8
--- /dev/null
+++ b/pkg/config/agent/qrm/gpustrategy/allocate.go
@@ -0,0 +1,28 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gpustrategy
+
+type AllocateStrategyConfig struct {
+	CustomFilteringStrategies map[string][]string
+	CustomSortingStrategy     map[string]string
+	CustomBindingStrategy     map[string]string
+	CustomAllocationStrategy  map[string]string
+}
+
+func NewGPUAllocateStrategyConfig() *AllocateStrategyConfig {
+	return &AllocateStrategyConfig{}
+}
diff --git a/pkg/config/agent/qrm/gpustrategy/strategy_base.go b/pkg/config/agent/qrm/gpustrategy/strategy_base.go
new file mode 100644
index 0000000000..a901f80bdc
--- /dev/null
+++ b/pkg/config/agent/qrm/gpustrategy/strategy_base.go
@@ -0,0 +1,27 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package gpustrategy
+
+type GPUStrategyConfig struct {
+	*AllocateStrategyConfig
+}
+
+func NewGPUStrategyConfig() *GPUStrategyConfig {
+	return &GPUStrategyConfig{
+		AllocateStrategyConfig: NewGPUAllocateStrategyConfig(),
+	}
+}
diff --git a/pkg/config/agent/qrm/qrm_base.go b/pkg/config/agent/qrm/qrm_base.go
index 18136bb490..9f90e6a512 100644
--- a/pkg/config/agent/qrm/qrm_base.go
+++ b/pkg/config/agent/qrm/qrm_base.go
@@ -22,12 +22,10 @@ import (
 )
 
 type GenericQRMPluginConfiguration struct {
-	StateFileDirectory         string
-	InMemoryStateFileDirectory string
-	QRMPluginSocketDirs        []string
-	ExtraStateFileAbsPath      string
-	PodDebugAnnoKeys           []string
-	UseKubeletReservedConfig   bool
+	QRMPluginSocketDirs      []string
+	ExtraStateFileAbsPath    string
+	PodDebugAnnoKeys         []string
+	UseKubeletReservedConfig bool
 	// PodAnnotationKeptKeys indicates pod annotation keys will be kept in qrm state
 	PodAnnotationKeptKeys []string
 	// PodLabelKeptKeys indicates pod label keys will be kept in qrm state
@@ -50,6 +48,7 @@ type QRMPluginsConfiguration struct {
 	*MemoryQRMPluginConfig
 	*NetworkQRMPluginConfig
 	*IOQRMPluginConfig
+	*GPUQRMPluginConfig
 }
 
 func NewGenericQRMPluginConfiguration() *GenericQRMPluginConfiguration {
@@ -69,5 +68,6 @@ func NewQRMPluginsConfiguration() *QRMPluginsConfiguration {
 		MemoryQRMPluginConfig:  NewMemoryQRMPluginConfig(),
 		NetworkQRMPluginConfig: NewNetworkQRMPluginConfig(),
 		IOQRMPluginConfig:      NewIOQRMPluginConfig(),
+		GPUQRMPluginConfig:     NewGPUQRMPluginConfig(),
 	}
 }
diff --git a/pkg/util/machine/device.go b/pkg/util/machine/device.go
new file mode 100644
index 0000000000..4d2bc923b9
--- /dev/null
+++ b/pkg/util/machine/device.go
@@ -0,0 +1,308 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package machine
+
+import (
+	"fmt"
+	"sort"
+	"sync"
+
+	"k8s.io/apimachinery/pkg/util/sets"
+	"k8s.io/utils/strings/slices"
+
+	"github.com/kubewharf/katalyst-core/pkg/util/general"
+	"github.com/kubewharf/katalyst-core/pkg/util/native"
+)
+
+type DeviceTopologyProvider interface {
+	GetDeviceTopology() (*DeviceTopology, bool, error)
+	SetDeviceTopology(*DeviceTopology) error
+}
+
+// DeviceTopologyRegistry is a registry of all topology providers that knows how to provide topology information of machine devices
+type DeviceTopologyRegistry struct {
+	mux sync.RWMutex
+
+	// deviceTopologyProviders is a mapping of device name to their respective topology provider
+	deviceTopologyProviders map[string]DeviceTopologyProvider
+
+	// deviceTopologyAffinityProviders is a mapping of device name to their respective affinity provider
+	deviceTopologyAffinityProviders map[string]DeviceAffinityProvider
+}
+
+func NewDeviceTopologyRegistry() *DeviceTopologyRegistry {
+	return &DeviceTopologyRegistry{
+		deviceTopologyProviders:         make(map[string]DeviceTopologyProvider),
+		deviceTopologyAffinityProviders: make(map[string]DeviceAffinityProvider),
+	}
+}
+
+// RegisterDeviceTopologyProvider registers a device topology provider for the specified device name.
+func (r *DeviceTopologyRegistry) RegisterDeviceTopologyProvider(
+	deviceName string, deviceTopologyProvider DeviceTopologyProvider,
+) {
+	r.mux.Lock()
+	defer r.mux.Unlock()
+
+	r.deviceTopologyProviders[deviceName] = deviceTopologyProvider
+}
+
+func (r *DeviceTopologyRegistry) RegisterTopologyAffinityProvider(
+	deviceName string, deviceAffinityProvider DeviceAffinityProvider,
+) {
+	r.mux.Lock()
+	defer r.mux.Unlock()
+
+	r.deviceTopologyAffinityProviders[deviceName] = deviceAffinityProvider
+}
+
+// SetDeviceTopology sets the device topology for the specified device name.
+func (r *DeviceTopologyRegistry) SetDeviceTopology(deviceName string, deviceTopology *DeviceTopology) error {
+	r.mux.Lock()
+	defer r.mux.Unlock()
+
+	topologyProvider, ok := r.deviceTopologyProviders[deviceName]
+	if !ok {
+		return fmt.Errorf("no device topology provider found for device %s", deviceName)
+	}
+
+	topologyAffinityProvider, ok := r.deviceTopologyAffinityProviders[deviceName]
+	if ok {
+		topologyAffinityProvider.SetDeviceAffinity(deviceTopology)
+		general.Infof("set device affinity provider for device %s, %v", deviceName, deviceTopology)
+	} else {
+		general.Infof("no device affinity provider found for device %s", deviceName)
+	}
+
+	return topologyProvider.SetDeviceTopology(deviceTopology)
+}
+
+// GetAllDeviceTopologyProviders returns all registered device topology providers.
+func (r *DeviceTopologyRegistry) GetAllDeviceTopologyProviders() map[string]DeviceTopologyProvider {
+	r.mux.RLock()
+	defer r.mux.RUnlock()
+
+	return r.deviceTopologyProviders
+}
+
+// GetDeviceTopology gets the device topology for the specified device name.
+func (r *DeviceTopologyRegistry) GetDeviceTopology(deviceName string) (*DeviceTopology, bool, error) {
+	r.mux.RLock()
+	defer r.mux.RUnlock()
+
+	provider, ok := r.deviceTopologyProviders[deviceName]
+	if !ok {
+		return nil, false, fmt.Errorf("no device topology provider found for device %s", deviceName)
+	}
+	return provider.GetDeviceTopology()
+}
+
+// GetDeviceNUMAAffinity retrieves a map of a certain device A to the list of devices in device B that it has an affinity with.
+// A device is considered to have an affinity with another device if they are on the exact same NUMA node(s)
+func (r *DeviceTopologyRegistry) GetDeviceNUMAAffinity(deviceA, deviceB string) (map[string][]string, error) {
+	deviceTopologyKey, numaReady, err := r.GetDeviceTopology(deviceA)
+	if err != nil {
+		return nil, fmt.Errorf("error getting device topology for device %s: %v", deviceA, err)
+	}
+	if !numaReady {
+		return nil, fmt.Errorf("device topology for device %s is not ready", deviceA)
+	}
+
+	deviceTopologyValue, numaReady, err := r.GetDeviceTopology(deviceB)
+	if err != nil {
+		return nil, fmt.Errorf("error getting device topology for device %s: %v", deviceB, err)
+	}
+	if !numaReady {
+		return nil, fmt.Errorf("device topology for device %s is not ready", deviceB)
+	}
+
+	deviceAffinity := make(map[string][]string)
+	for keyName, keyInfo := range deviceTopologyKey.Devices {
+		devicesWithAffinity := make([]string, 0)
+		for valueName, valueInfo := range deviceTopologyValue.Devices {
+			deviceKeyNUMANodes := keyInfo.GetNUMANodes()
+			deviceValueNUMANodes := valueInfo.GetNUMANodes()
+
+			if len(deviceKeyNUMANodes) != 0 && sets.NewInt(deviceKeyNUMANodes...).Equal(sets.NewInt(deviceValueNUMANodes...)) {
+				devicesWithAffinity = append(devicesWithAffinity, valueName)
+			}
+		}
+		deviceAffinity[keyName] = devicesWithAffinity
+	}
+
+	return deviceAffinity, nil
+}
+
+type DeviceTopology struct {
+	Devices map[string]DeviceInfo
+}
+
+// GroupDeviceAffinity forms a topology graph such that all devices within a DeviceIDs group have an affinity with each other.
+// They are differentiated by their affinity priority level.
+// E.g. Output:
+//
+//	{
+//		0: {{"gpu-0", "gpu-1"}, {"gpu-2", "gpu-3"}},
+//		1: {{"gpu-0", "gpu-1", "gpu-2", "gpu-3"}}
+//	}
+//
+// means that gpu-0 and gpu-1 have an affinity with each other, gpu-2 and gpu-3 have an affinity with each other in affinity priority 0.
+// and gpu-0, gpu-1, gpu-2, and gpu-3 have an affinity with each other in affinity priority 1.
+func (t *DeviceTopology) GroupDeviceAffinity() map[AffinityPriority][]DeviceIDs {
+	deviceAffinityGroup := make(map[AffinityPriority][]DeviceIDs)
+	for deviceId, deviceInfo := range t.Devices {
+		for priority, affinityDeviceIDs := range deviceInfo.DeviceAffinity {
+			// Add itself in the group if it is not already included
+			if !slices.Contains(affinityDeviceIDs, deviceId) {
+				affinityDeviceIDs = append(affinityDeviceIDs, deviceId)
+			}
+			// Sort the strings for easier deduplication
+			sort.Strings(affinityDeviceIDs)
+			if _, ok := deviceAffinityGroup[priority]; !ok {
+				deviceAffinityGroup[priority] = make([]DeviceIDs, 0)
+			}
+
+			// Add the affinityDeviceIDs to the priority level if it is not already there
+			if !containsGroup(deviceAffinityGroup[priority], affinityDeviceIDs) {
+				deviceAffinityGroup[priority] = append(deviceAffinityGroup[priority], affinityDeviceIDs)
+			}
+
+		}
+	}
+	return deviceAffinityGroup
+}
+
+func containsGroup(groups []DeviceIDs, candidate DeviceIDs) bool {
+	for _, g := range groups {
+		if slices.Equal(g, candidate) {
+			return true
+		}
+	}
+	return false
+}
+
+type DeviceInfo struct {
+	Health    string
+	NumaNodes []int
+	// DeviceAffinity is the map of priority level to the other deviceIds that a particular deviceId has an affinity with
+	DeviceAffinity map[AffinityPriority]DeviceIDs
+}
+
+// AffinityPriority is the level of affinity that a deviceId has with another deviceId.
+// The lowest affinityPriority value is 0, and in this level, devices have the most affinity with one another,
+// so it is of highest priority to try to allocate these devices together.
+// As the affinityPriority value increases, devices do not have as much affinity with each other,
+// so it is of lower priority to try to allocate these devices together.
+type AffinityPriority int
+
+type DeviceIDs []string
+
+func (i DeviceInfo) GetNUMANodes() []int {
+	if i.NumaNodes == nil {
+		return []int{}
+	}
+	return i.NumaNodes
+}
+
+type deviceTopologyProviderImpl struct {
+	mutex         sync.RWMutex
+	resourceNames []string
+
+	deviceTopology    *DeviceTopology
+	numaTopologyReady bool
+}
+
+func NewDeviceTopologyProvider(resourceNames []string) DeviceTopologyProvider {
+	deviceTopology, err := initDeviceTopology(resourceNames)
+	if err != nil {
+		deviceTopology = getEmptyDeviceTopology()
+		general.Warningf("initDeviceTopology failed with error: %v", err)
+	} else {
+		general.Infof("initDeviceTopology success: %v", deviceTopology)
+	}
+
+	return &deviceTopologyProviderImpl{
+		resourceNames:  resourceNames,
+		deviceTopology: deviceTopology,
+	}
+}
+
+func (p *deviceTopologyProviderImpl) SetDeviceTopology(deviceTopology *DeviceTopology) error {
+	p.mutex.Lock()
+	defer p.mutex.Unlock()
+	if deviceTopology == nil {
+		return fmt.Errorf("deviceTopology is nil when setting device topology")
+	}
+
+	p.deviceTopology = deviceTopology
+	p.numaTopologyReady = checkDeviceNUMATopologyReady(deviceTopology)
+	return nil
+}
+
+func (p *deviceTopologyProviderImpl) GetDeviceTopology() (*DeviceTopology, bool, error) {
+	p.mutex.RLock()
+	defer p.mutex.RUnlock()
+
+	if p.deviceTopology == nil {
+		return nil, false, fmt.Errorf("deviceTopology is nil when getting device topology")
+	}
+
+	return p.deviceTopology, p.numaTopologyReady, nil
+}
+
+func getEmptyDeviceTopology() *DeviceTopology {
+	return &DeviceTopology{
+		Devices: make(map[string]DeviceInfo),
+	}
+}
+
+func initDeviceTopology(resourceNames []string) (*DeviceTopology, error) {
+	deviceTopology := getEmptyDeviceTopology()
+
+	kubeletCheckpoint, err := native.GetKubeletCheckpoint()
+	if err != nil {
+		general.Errorf("Failed to get kubelet checkpoint: %v", err)
+		return deviceTopology, nil
+	}
+
+	_, registeredDevs := kubeletCheckpoint.GetDataInLatestFormat()
+	for _, resourceName := range resourceNames {
+		devices, ok := registeredDevs[resourceName]
+		if !ok {
+			continue
+		}
+
+		for _, id := range devices {
+			// get NUMA node from UpdateAllocatableAssociatedDevices
+			deviceTopology.Devices[id] = DeviceInfo{}
+		}
+	}
+	return deviceTopology, nil
+}
+
+func checkDeviceNUMATopologyReady(topology *DeviceTopology) bool {
+	if topology == nil {
+		return false
+	}
+
+	for _, device := range topology.Devices {
+		if device.NumaNodes == nil {
+			return false
+		}
+	}
+	return true
+}
diff --git a/pkg/util/machine/device_affinity.go b/pkg/util/machine/device_affinity.go
new file mode 100644
index 0000000000..5a5fc0f222
--- /dev/null
+++ b/pkg/util/machine/device_affinity.go
@@ -0,0 +1,23 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package machine
+
+// DeviceAffinityProvider knows how to form affinity between devices
+type DeviceAffinityProvider interface {
+	// SetDeviceAffinity modifies DeviceTopology by retrieving each device's affinity to other devices
+	SetDeviceAffinity(*DeviceTopology)
+}
diff --git a/pkg/util/machine/device_stub.go b/pkg/util/machine/device_stub.go
new file mode 100644
index 0000000000..6e9e97796d
--- /dev/null
+++ b/pkg/util/machine/device_stub.go
@@ -0,0 +1,43 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package machine
+
+import "sync"
+
+type deviceTopologyProviderStub struct {
+	mutex          sync.RWMutex
+	deviceTopology *DeviceTopology
+}
+
+func NewDeviceTopologyProviderStub() DeviceTopologyProvider {
+	return &deviceTopologyProviderStub{}
+}
+
+func (d *deviceTopologyProviderStub) SetDeviceTopology(deviceTopology *DeviceTopology) error {
+	d.mutex.Lock()
+	defer d.mutex.Unlock()
+
+	d.deviceTopology = deviceTopology
+	return nil
+}
+
+func (d *deviceTopologyProviderStub) GetDeviceTopology() (*DeviceTopology, bool, error) {
+	d.mutex.RLock()
+	defer d.mutex.RUnlock()
+
+	return d.deviceTopology, true, nil
+}
diff --git a/pkg/util/machine/device_test.go b/pkg/util/machine/device_test.go
new file mode 100644
index 0000000000..d9399627b8
--- /dev/null
+++ b/pkg/util/machine/device_test.go
@@ -0,0 +1,448 @@
+/*
+Copyright 2022 The Katalyst Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package machine
+
+import (
+	"sort"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestDeviceTopologyRegistry_GetDeviceNUMAAffinity(t *testing.T) {
+	t.Parallel()
+
+	npuTopology := &DeviceTopology{
+		Devices: map[string]DeviceInfo{
+			"npu-0": {NumaNodes: []int{0}},
+			"npu-1": {NumaNodes: []int{1}},
+			"npu-2": {NumaNodes: []int{0, 1}},
+		},
+	}
+
+	gpuTopology := &DeviceTopology{
+		Devices: map[string]DeviceInfo{
+			"gpu-0": {NumaNodes: []int{0}},
+			"gpu-1": {NumaNodes: []int{1}},
+			"gpu-2": {NumaNodes: []int{2}},
+		},
+	}
+
+	xpuTopology := &DeviceTopology{
+		Devices: map[string]DeviceInfo{
+			"xpu-0": {NumaNodes: []int{0}},
+			"xpu-1": {NumaNodes: []int{1}},
+			"xpu-2": {NumaNodes: nil},
+		},
+	}
+
+	dpuTopology := &DeviceTopology{
+		Devices: map[string]DeviceInfo{
+			"dpu-0": {NumaNodes: []int{1}},
+			"dpu-1": {NumaNodes: []int{0}},
+			"dpu-2": {NumaNodes: []int{}},
+		},
+	}
+
+	// Register device topology providers
+	registry := NewDeviceTopologyRegistry()
+	registry.RegisterDeviceTopologyProvider("npu", NewDeviceTopologyProviderStub())
+	registry.RegisterDeviceTopologyProvider("gpu", NewDeviceTopologyProviderStub())
+	registry.RegisterDeviceTopologyProvider("xpu", NewDeviceTopologyProviderStub())
+	registry.RegisterDeviceTopologyProvider("dpu", NewDeviceTopologyProviderStub())
+	err := registry.SetDeviceTopology("npu", npuTopology)
+	assert.NoError(t, err)
+	err = registry.SetDeviceTopology("gpu", gpuTopology)
+	assert.NoError(t, err)
+	err = registry.SetDeviceTopology("xpu", xpuTopology)
+	assert.NoError(t, err)
+	err = registry.SetDeviceTopology("dpu", dpuTopology)
+	assert.NoError(t, err)
+
+	tests := []struct {
+		name        string
+		deviceA     string
+		deviceB     string
+		expected    map[string][]string
+		expectedErr bool
+	}{
+		{
+			name:    "npu to gpu affinity",
+			deviceA: "npu",
+			deviceB: "gpu",
+			expected: map[string][]string{
+				"npu-0": {"gpu-0"},
+				"npu-1": {"gpu-1"},
+				"npu-2": {},
+			},
+		},
+		{
+			name:        "non-existent device A",
+			deviceA:     "invalid device",
+			deviceB:     "gpu",
+			expectedErr: true,
+		},
+		{
+			name:        "non-existent device B",
+			deviceA:     "npu",
+			deviceB:     "invalid device",
+			expectedErr: true,
+		},
+		{
+			name:    "devices with empty numa nodes are not considered to have affinity with each other",
+			deviceA: "xpu",
+			deviceB: "dpu",
+			expected: map[string][]string{
+				"xpu-0": {"dpu-1"},
+				"xpu-1": {"dpu-0"},
+				"xpu-2": {},
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			actual, err := registry.GetDeviceNUMAAffinity(tt.deviceA, tt.deviceB)
+			if tt.expectedErr {
+				assert.Error(t, err)
+			} else {
+				assert.NoError(t, err)
+				evaluateDeviceNUMAAffinity(t, actual, tt.expected)
+			}
+		})
+	}
+}
+
+func TestDeviceTopology_GroupDeviceAffinity(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name                   string
+		deviceTopology         *DeviceTopology
+		expectedDeviceAffinity map[AffinityPriority][]DeviceIDs
+	}{
+		{
+			name: "test simple affinity of 2 devices to 1 group with only affinity priority level",
+			deviceTopology: &DeviceTopology{
+				Devices: map[string]DeviceInfo{
+					"npu-0": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-1"},
+						},
+					},
+					"npu-1": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-0"},
+						},
+					},
+					"npu-2": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-3"},
+						},
+					},
+					"npu-3": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-2"},
+						},
+					},
+				},
+			},
+			expectedDeviceAffinity: map[AffinityPriority][]DeviceIDs{
+				0: {DeviceIDs([]string{"npu-0", "npu-1"}), DeviceIDs([]string{"npu-2", "npu-3"})},
+			},
+		},
+		{
+			name: "test simple affinity of 4 devices to 1 group with only affinity priority level",
+			deviceTopology: &DeviceTopology{
+				Devices: map[string]DeviceInfo{
+					"npu-0": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-1", "npu-2", "npu-3"},
+						},
+					},
+					"npu-1": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-0", "npu-2", "npu-3"},
+						},
+					},
+					"npu-2": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-0", "npu-1", "npu-3"},
+						},
+					},
+					"npu-3": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-0", "npu-1", "npu-2"},
+						},
+					},
+					"npu-4": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-5", "npu-6", "npu-7"},
+						},
+					},
+					"npu-5": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-4", "npu-6", "npu-7"},
+						},
+					},
+					"npu-6": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-4", "npu-5", "npu-7"},
+						},
+					},
+					"npu-7": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-4", "npu-5", "npu-6"},
+						},
+					},
+				},
+			},
+			expectedDeviceAffinity: map[AffinityPriority][]DeviceIDs{
+				0: {DeviceIDs([]string{"npu-0", "npu-1", "npu-2", "npu-3"}), DeviceIDs([]string{"npu-4", "npu-5", "npu-6", "npu-7"})},
+			},
+		},
+		{
+			name: "device topology includes self for one affinity level",
+			deviceTopology: &DeviceTopology{
+				Devices: map[string]DeviceInfo{
+					"npu-0": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-0", "npu-1"},
+						},
+					},
+					"npu-1": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-0", "npu-1"},
+						},
+					},
+					"npu-2": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-2", "npu-3"},
+						},
+					},
+					"npu-3": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-2", "npu-3"},
+						},
+					},
+				},
+			},
+			expectedDeviceAffinity: map[AffinityPriority][]DeviceIDs{
+				0: {DeviceIDs([]string{"npu-0", "npu-1"}), DeviceIDs([]string{"npu-2", "npu-3"})},
+			},
+		},
+		{
+			name: "test simple affinity of 2 devices to 1 group with 2 affinity priority level",
+			deviceTopology: &DeviceTopology{
+				Devices: map[string]DeviceInfo{
+					"npu-0": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-1"},
+							1: {"npu-1", "npu-2", "npu-3"},
+						},
+					},
+					"npu-1": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-0"},
+							1: {"npu-0", "npu-2", "npu-3"},
+						},
+					},
+					"npu-2": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-3"},
+							1: {"npu-0", "npu-1", "npu-3"},
+						},
+					},
+					"npu-3": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-2"},
+							1: {"npu-0", "npu-1", "npu-2"},
+						},
+					},
+				},
+			},
+			expectedDeviceAffinity: map[AffinityPriority][]DeviceIDs{
+				0: {DeviceIDs([]string{"npu-0", "npu-1"}), DeviceIDs([]string{"npu-2", "npu-3"})},
+				1: {DeviceIDs([]string{"npu-0", "npu-1", "npu-2", "npu-3"})},
+			},
+		},
+		{
+			name: "device topology includes self for 2 affinity levels",
+			deviceTopology: &DeviceTopology{
+				Devices: map[string]DeviceInfo{
+					"npu-0": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-0", "npu-1"},
+							1: {"npu-0", "npu-1", "npu-2", "npu-3"},
+						},
+					},
+					"npu-1": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-0", "npu-1"},
+							1: {"npu-0", "npu-1", "npu-2", "npu-3"},
+						},
+					},
+					"npu-2": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-2", "npu-3"},
+							1: {"npu-0", "npu-1", "npu-2", "npu-3"},
+						},
+					},
+					"npu-3": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-2", "npu-3"},
+							1: {"npu-0", "npu-1", "npu-2", "npu-3"},
+						},
+					},
+				},
+			},
+			expectedDeviceAffinity: map[AffinityPriority][]DeviceIDs{
+				0: {DeviceIDs([]string{"npu-0", "npu-1"}), DeviceIDs([]string{"npu-2", "npu-3"})},
+				1: {DeviceIDs([]string{"npu-0", "npu-1", "npu-2", "npu-3"})},
+			},
+		},
+		{
+			name: "unsorted device topology has no effect on result",
+			deviceTopology: &DeviceTopology{
+				Devices: map[string]DeviceInfo{
+					"npu-0": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-2", "npu-1", "npu-3"},
+						},
+					},
+					"npu-1": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-3", "npu-0", "npu-2"},
+						},
+					},
+					"npu-2": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-1", "npu-0", "npu-3"},
+						},
+					},
+					"npu-3": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-0", "npu-2", "npu-1"},
+						},
+					},
+					"npu-4": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-6", "npu-5", "npu-7"},
+						},
+					},
+					"npu-5": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-7", "npu-4", "npu-6"},
+						},
+					},
+					"npu-6": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-5", "npu-4", "npu-7"},
+						},
+					},
+					"npu-7": {
+						DeviceAffinity: map[AffinityPriority]DeviceIDs{
+							0: {"npu-6", "npu-4", "npu-5"},
+						},
+					},
+				},
+			},
+			expectedDeviceAffinity: map[AffinityPriority][]DeviceIDs{
+				0: {DeviceIDs([]string{"npu-0", "npu-1", "npu-2", "npu-3"}), DeviceIDs([]string{"npu-4", "npu-5", "npu-6", "npu-7"})},
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			deviceAffinity := tt.deviceTopology.GroupDeviceAffinity()
+			evaluateDeviceAffinity(t, deviceAffinity, tt.expectedDeviceAffinity)
+		})
+	}
+}
+
+func evaluateDeviceNUMAAffinity(t *testing.T, expectedDeviceNUMAAffinity, actualDeviceNUMAAffinity map[string][]string) {
+	if len(actualDeviceNUMAAffinity) != len(expectedDeviceNUMAAffinity) {
+		t.Errorf("deviceNUMAAffinity lengths don't match, expected %d, got %d", len(expectedDeviceNUMAAffinity), len(actualDeviceNUMAAffinity))
+		return
+	}
+
+	for device, expected := range expectedDeviceNUMAAffinity {
+		actual, ok := actualDeviceNUMAAffinity[device]
+		if !ok {
+			t.Errorf("expected device numa affinity for device %v, but it is not found", device)
+			return
+		}
+
+		assert.ElementsMatch(t, expected, actual, "device numa affinity are not equal")
+	}
+}
+
+func evaluateDeviceAffinity(t *testing.T, expectedDeviceAffinity, actualDeviceAffinity map[AffinityPriority][]DeviceIDs) {
+	if len(actualDeviceAffinity) != len(expectedDeviceAffinity) {
+		t.Errorf("expected %d affinities, got %d", len(expectedDeviceAffinity), len(actualDeviceAffinity))
+		return
+	}
+
+	for priority, expected := range expectedDeviceAffinity {
+		actual, ok := actualDeviceAffinity[priority]
+		if !ok {
+			t.Errorf("expected affinities for priority %v, but it is not found", priority)
+			return
+		}
+
+		if !equalDeviceIDsGroupsIgnoreOrder(t, expected, actual) {
+			return
+		}
+	}
+}
+
+func equalDeviceIDsGroupsIgnoreOrder(t *testing.T, expected, actual []DeviceIDs) bool {
+	if len(expected) != len(actual) {
+		t.Errorf("expected %d devices, got %d", len(expected), len(actual))
+		return false
+	}
+
+	// Convert each DeviceIDs slice into a normalized, comparable form
+	normalize := func(groups []DeviceIDs) []string {
+		res := make([]string, len(groups))
+		for i, group := range groups {
+			sorted := append([]string{}, group...)
+			sort.Strings(sorted)
+			res[i] = strings.Join(sorted, ",")
+		}
+		sort.Strings(res)
+		return res
+	}
+
+	normalizedExp := normalize(expected)
+	normalizedAct := normalize(actual)
+
+	for i := range normalizedExp {
+		if normalizedExp[i] != normalizedAct[i] {
+			t.Errorf("expected %s, got %s", normalizedAct[i], normalizedExp[i])
+			return false
+		}
+	}
+
+	return true
+}
diff --git a/pkg/util/native/kubelet.go b/pkg/util/native/kubelet.go
index 91eafcbbd3..8c7dc22028 100644
--- a/pkg/util/native/kubelet.go
+++ b/pkg/util/native/kubelet.go
@@ -26,13 +26,20 @@ import (
 	"strconv"
 	"time"
 
+	"github.com/pkg/errors"
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/client-go/discovery"
 	"k8s.io/client-go/rest"
+	"k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
+	"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
+	"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
 )
 
 const (
 	defaultTimeout = time.Second * 10
+
+	// kubeletDeviceManagerCheckpoint is the file name of device plugin checkpoint
+	kubeletDeviceManagerCheckpoint = "kubelet_internal_checkpoint"
 )
 
 // MemoryReservation specifies the memory reservation of different types for each NUMA node
@@ -198,3 +205,21 @@ func insecureConfig(host, tokenFile string) (*rest.Config, error) {
 		BearerTokenFile: tokenFile,
 	}, nil
 }
+
+func GetKubeletCheckpoint() (checkpoint.DeviceManagerCheckpoint, error) {
+	checkpointManager, err := checkpointmanager.NewCheckpointManager(v1beta1.DevicePluginPath)
+	if err != nil {
+		return nil, errors.Wrap(err, "new checkpoint manager failed")
+	}
+
+	registeredDevs := make(map[string][]string)
+	devEntries := make([]checkpoint.PodDevicesEntry, 0)
+	cp := checkpoint.New(devEntries, registeredDevs)
+
+	err = checkpointManager.GetCheckpoint(kubeletDeviceManagerCheckpoint, cp)
+	if err != nil {
+		return nil, errors.Wrap(err, "get checkpoint failed")
+	}
+
+	return cp, nil
+}