-
-
Notifications
You must be signed in to change notification settings - Fork 311
feat(dev): add file-based caching for library objects during development #3029
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,55 @@ | ||||||||
| """ | ||||||||
| Management command to clear the development file cache. | ||||||||
|
|
||||||||
| Usage: | ||||||||
| python manage.py clear_dev_cache # Clear all cache entries | ||||||||
| python manage.py clear_dev_cache --list # List all cache entries | ||||||||
| python manage.py clear_dev_cache --name full_auto_completer # Clear specific entry | ||||||||
| """ | ||||||||
| from django.core.management.base import BaseCommand | ||||||||
| from sefaria.system.cache import clear_dev_file_cache, list_dev_file_cache, is_dev_file_cache_enabled | ||||||||
|
|
||||||||
|
|
||||||||
| class Command(BaseCommand): | ||||||||
| help = 'Clear the development file cache used for persisting library objects across server reloads' | ||||||||
|
|
||||||||
| def add_arguments(self, parser): | ||||||||
| parser.add_argument( | ||||||||
| '--name', | ||||||||
| type=str, | ||||||||
| help='Clear a specific cache entry by name (e.g., full_auto_completer, linker_he)', | ||||||||
| ) | ||||||||
| parser.add_argument( | ||||||||
| '--list', | ||||||||
| action='store_true', | ||||||||
| help='List all cache entries instead of clearing', | ||||||||
| ) | ||||||||
|
|
||||||||
| def handle(self, *args, **options): | ||||||||
| if not is_dev_file_cache_enabled(): | ||||||||
| self.stdout.write( | ||||||||
| self.style.WARNING( | ||||||||
| 'Dev file cache is not enabled. Set USE_DEV_FILE_CACHE = True in local_settings.py' | ||||||||
| ) | ||||||||
| ) | ||||||||
|
||||||||
| ) | |
| ) | |
| return |
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -220,6 +220,13 @@ | |||||||||||
| # Turns on loading of machine learning models to run linker | ||||||||||||
| ENABLE_LINKER = False | ||||||||||||
|
|
||||||||||||
| # File-based caching for library objects (AutoCompleter, Linker) during development. | ||||||||||||
| # When enabled, these expensive objects are serialized to disk after first build | ||||||||||||
| # and loaded from disk on subsequent server reloads, significantly speeding up | ||||||||||||
| # development iteration. Should NOT be used in production. | ||||||||||||
|
||||||||||||
| # development iteration. Should NOT be used in production. | |
| # development iteration. Should NOT be used in production. | |
| # Cached files are stored under DEV_FILE_CACHE_DIR; if you change the database contents, | |
| # update models/serialization for these objects, or observe stale behavior, clear this | |
| # directory (delete its contents) so that fresh cache files will be rebuilt. |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -310,6 +310,11 @@ def get_static_url(): | |||||
| } | ||||||
|
|
||||||
|
|
||||||
| # Default values for settings that may be overridden in local_settings. | ||||||
| # These ensure the application works even if local_settings doesn't define them. | ||||||
| USE_DEV_FILE_CACHE = False | ||||||
| DEV_FILE_CACHE_DIR = "/tmp/sefaria_dev_cache" | ||||||
|
||||||
| DEV_FILE_CACHE_DIR = "/tmp/sefaria_dev_cache" | |
| DEV_FILE_CACHE_DIR = os.path.join(os.path.expanduser("~"), ".sefaria_dev_cache") |
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -277,4 +277,125 @@ def invalidate_cache_by_pattern(pattern: str, cache_type: Optional[str] = None) | |||||||||||||||
| "backend": "unknown", | ||||||||||||||||
| "count": 0, | ||||||||||||||||
| "message": "Internal server error" | ||||||||||||||||
| } | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
|
|
||||||||||||||||
| # ============================================================================= | ||||||||||||||||
| # File-based cache for development - persists expensive objects across reloads | ||||||||||||||||
| # ============================================================================= | ||||||||||||||||
|
|
||||||||||||||||
| import pickle | ||||||||||||||||
| from pathlib import Path | ||||||||||||||||
|
Comment on lines
+287
to
+288
|
||||||||||||||||
|
|
||||||||||||||||
|
|
||||||||||||||||
| def _get_dev_file_cache_dir() -> Path: | ||||||||||||||||
| """Get the development file cache directory from settings.""" | ||||||||||||||||
| cache_dir = Path(getattr(settings, 'DEV_FILE_CACHE_DIR', '/tmp/sefaria_dev_cache')) | ||||||||||||||||
| cache_dir.mkdir(parents=True, exist_ok=True) | ||||||||||||||||
|
||||||||||||||||
| cache_dir.mkdir(parents=True, exist_ok=True) | |
| cache_dir.mkdir(parents=True, exist_ok=True, mode=0o700) | |
| try: | |
| cache_dir.chmod(0o700) | |
| except Exception as e: | |
| # Log a warning but do not fail if we cannot enforce strict permissions | |
| logger.warning(f"Unable to set permissions on dev file cache dir {cache_dir}: {e}") |
Copilot
AI
Jan 19, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using pickle to deserialize untrusted data is a security risk as it can lead to arbitrary code execution. While this is intended for development use only, consider adding a warning in the documentation or using a safer serialization format like JSON where possible. At minimum, ensure the cache directory has appropriate permissions and is not world-writable.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After loading from cache, slug_to_pools is a regular dict, but when built from scratch it's a defaultdict(list). This inconsistency could cause issues. Consider either: 1) wrapping the cached dict in defaultdict(list, cached) after loading, or 2) always using a regular dict and avoiding defaultdict behavior.