From a729ef7b0f1775f7f15423b9157678eddcdae054 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Tue, 16 Dec 2025 16:57:45 +0500 Subject: [PATCH 1/2] Use TTLCache for _BACKENDS_CACHE --- .../server/services/backends/__init__.py | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/dstack/_internal/server/services/backends/__init__.py b/src/dstack/_internal/server/services/backends/__init__.py index 5b9908e356..0e51395cec 100644 --- a/src/dstack/_internal/server/services/backends/__init__.py +++ b/src/dstack/_internal/server/services/backends/__init__.py @@ -4,6 +4,7 @@ from typing import Callable, Coroutine, Dict, List, Optional, Tuple from uuid import UUID +from cachetools import TTLCache from sqlalchemy import delete, update from sqlalchemy.ext.asyncio import AsyncSession @@ -187,8 +188,8 @@ async def delete_backends( BackendTuple = Tuple[BackendModel, Backend] -_BACKENDS_CACHE_LOCKS = {} -_BACKENDS_CACHE: Dict[UUID, Dict[BackendType, BackendTuple]] = {} +_BACKENDS_CACHE_LOCKS: Dict[UUID, asyncio.Lock] = {} +_BACKENDS_CACHE = TTLCache[UUID, Dict[BackendType, BackendTuple]](maxsize=1000, ttl=600) def _get_project_cache_lock(project_id: UUID) -> asyncio.Lock: @@ -196,18 +197,16 @@ def _get_project_cache_lock(project_id: UUID) -> asyncio.Lock: async def get_project_backends_with_models(project: ProjectModel) -> List[BackendTuple]: - backends = [] async with _get_project_cache_lock(project.id): key = project.id - project_backends_cache = _BACKENDS_CACHE.setdefault(key, {}) + project_backends = _BACKENDS_CACHE.get(key, {}) for backend_model in project.backends: - cached_backend = project_backends_cache.get(backend_model.type) + cached_backend = project_backends.get(backend_model.type) if ( cached_backend is not None and cached_backend[0].config == backend_model.config and cached_backend[0].auth == backend_model.auth ): - backends.append(cached_backend) continue configurator = get_configurator(backend_model.type) if configurator is None: @@ -231,9 +230,13 @@ async def get_project_backends_with_models(project: ProjectModel) -> List[Backen backend_model.type.value, ) continue - backends.append((backend_model, backend)) - _BACKENDS_CACHE[key][backend_model.type] = (backend_model, backend) - return backends + project_backends[backend_model.type] = (backend_model, backend) + # `__setitem__()` will also expire the cache. + # Note that there is no global cache lock so a race condition is possible: + # one coroutine updates/re-assigns backends expired by another coroutine. + # This is ok since the only effect is that project's cache gets restored. + _BACKENDS_CACHE[key] = project_backends + return list(project_backends.values()) _get_project_backend_with_model_by_type = None From 0da6b1d183466f0e535d4830b16e4a2cb41947a6 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Tue, 16 Dec 2025 17:14:04 +0500 Subject: [PATCH 2/2] Set ttl=300 --- src/dstack/_internal/server/services/backends/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dstack/_internal/server/services/backends/__init__.py b/src/dstack/_internal/server/services/backends/__init__.py index 0e51395cec..c2478df17f 100644 --- a/src/dstack/_internal/server/services/backends/__init__.py +++ b/src/dstack/_internal/server/services/backends/__init__.py @@ -189,7 +189,7 @@ async def delete_backends( _BACKENDS_CACHE_LOCKS: Dict[UUID, asyncio.Lock] = {} -_BACKENDS_CACHE = TTLCache[UUID, Dict[BackendType, BackendTuple]](maxsize=1000, ttl=600) +_BACKENDS_CACHE = TTLCache[UUID, Dict[BackendType, BackendTuple]](maxsize=1000, ttl=300) def _get_project_cache_lock(project_id: UUID) -> asyncio.Lock: