From a2cd6ea0e21c76b577213f90a24f3873b16d5e42 Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Thu, 15 Jan 2026 17:29:04 +0200 Subject: [PATCH 1/8] split into 3 files --- ...pulate_nodes_notification_subscriptions.py | 68 +++++++++++++++++++ ...late_reviews_notification_subscriptions.py | 43 ++++++++++++ ...opulate_user_notification_subscriptions.py | 44 ++++++++++++ 3 files changed, 155 insertions(+) create mode 100644 scripts/populate_nodes_notification_subscriptions.py create mode 100644 scripts/populate_reviews_notification_subscriptions.py create mode 100644 scripts/populate_user_notification_subscriptions.py diff --git a/scripts/populate_nodes_notification_subscriptions.py b/scripts/populate_nodes_notification_subscriptions.py new file mode 100644 index 00000000000..7c13d2ad1df --- /dev/null +++ b/scripts/populate_nodes_notification_subscriptions.py @@ -0,0 +1,68 @@ +import django +django.setup() + +from website.app import init_app +init_app(routes=False) + +from framework.celery_tasks import app as celery_app +from django.contrib.contenttypes.models import ContentType +from django.db.models import Count, F, OuterRef, Subquery, IntegerField, CharField +from django.db.models.functions import Cast +from osf.models import Node, NotificationSubscription, NotificationType + + +@celery_app.task(name='scripts.populate_nodes_notification_subscriptions') +def populate_nodes_notification_subscriptions(): + created = 0 + node_file_nt = NotificationType.Type.NODE_FILE_UPDATED.instance + + node_ct = ContentType.objects.get_for_model(Node) + + node_notifications_sq = ( + NotificationSubscription.objects.filter( + content_type=node_ct, + notification_type=node_file_nt, + object_id=Cast(OuterRef('pk'), CharField()), + ).values( + 'object_id' + ).annotate( + cnt=Count('id') + ).values('cnt')[:1] + ) + + nodes_qs = ( + Node.objects + .annotate( + contributors_count=Count('_contributors', distinct=True), + notifications_count=Subquery( + node_notifications_sq, + output_field=IntegerField(), + ), + ).exclude(contributors_count=F('notifications_count')) + ) + + print(f"Creating NODE_FILE_UPDATED subscriptions for {nodes_qs.count()} nodes.") + for id, node in enumerate(nodes_qs, 1): + print(f"Processing node {id} / {nodes_qs.count()}") + for contributor in node.contributors.all(): + try: + _, is_created = NotificationSubscription.objects.get_or_create( + notification_type=node_file_nt, + user=contributor, + content_type=node_ct, + object_id=node.id, + defaults={ + '_is_digest': True, + 'message_frequency': 'none', + }, + ) + if is_created: + created += 1 + except Exception as exeption: + print(exeption) + continue + + print(f"Created {created} subscriptions") + +if __name__ == '__main__': + populate_nodes_notification_subscriptions.delay() diff --git a/scripts/populate_reviews_notification_subscriptions.py b/scripts/populate_reviews_notification_subscriptions.py new file mode 100644 index 00000000000..7e0aa12884c --- /dev/null +++ b/scripts/populate_reviews_notification_subscriptions.py @@ -0,0 +1,43 @@ +import django +django.setup() + +from website.app import init_app +init_app(routes=False) + +from framework.celery_tasks import app as celery_app +from django.contrib.contenttypes.models import ContentType +from osf.models import OSFUser, NotificationSubscription, NotificationType + + +@celery_app.task(name='scripts.populate_reviews_notification_subscriptions') +def populate_reviews_notification_subscriptions(): + created = 0 + review_nt = NotificationType.Type.REVIEWS_SUBMISSION_STATUS.instance + + user_ct = ContentType.objects.get_for_model(OSFUser) + + reviews_qs = OSFUser.objects.exclude(subscriptions__notification_type__name=NotificationType.Type.REVIEWS_SUBMISSION_STATUS).distinct('id') + + print(f"Creating REVIEWS_SUBMISSION_STATUS subscriptions for {reviews_qs.count()} users.") + for id, user in enumerate(reviews_qs, 1): + print(f"Processing user {id} / {reviews_qs.count()}") + try: + _, is_created = NotificationSubscription.objects.get_or_create( + notification_type=review_nt, + user=user, + content_type=user_ct, + object_id=user.id, + defaults={ + 'message_frequency': 'none', + }, + ) + if is_created: + created += 1 + except Exception as exeption: + print(exeption) + continue + + print(f"Created {created} subscriptions") + +if __name__ == '__main__': + populate_reviews_notification_subscriptions.delay() diff --git a/scripts/populate_user_notification_subscriptions.py b/scripts/populate_user_notification_subscriptions.py new file mode 100644 index 00000000000..ce0bba61583 --- /dev/null +++ b/scripts/populate_user_notification_subscriptions.py @@ -0,0 +1,44 @@ +import django +django.setup() + +from website.app import init_app +init_app(routes=False) + +from framework.celery_tasks import app as celery_app +from django.contrib.contenttypes.models import ContentType +from osf.models import OSFUser, NotificationSubscription, NotificationType + + +@celery_app.task(name='scripts.populate_user_notification_subscriptions') +def populate_user_notification_subscriptions(): + created = 0 + user_file_nt = NotificationType.Type.USER_FILE_UPDATED.instance + + user_ct = ContentType.objects.get_for_model(OSFUser) + + files_qs = OSFUser.objects.exclude(subscriptions__notification_type__name=NotificationType.Type.USER_FILE_UPDATED).distinct('id') + + print(f"Creating USER_FILE_UPDATED subscriptions for {files_qs.count()} users.") + for id, user in enumerate(files_qs, 1): + print(f"Processing user {id} / {files_qs.count()}") + try: + _, is_created = NotificationSubscription.objects.get_or_create( + notification_type=user_file_nt, + user=user, + content_type=user_ct, + object_id=user.id, + defaults={ + '_is_digest': True, + 'message_frequency': 'none', + }, + ) + if is_created: + created += 1 + except Exception as exeption: + print(exeption) + continue + + print(f"Created {created} subscriptions") + +if __name__ == '__main__': + populate_user_notification_subscriptions.delay() From fc57c88185c70123bb8d1b9bca8495a67f9e73d3 Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Thu, 15 Jan 2026 17:29:35 +0200 Subject: [PATCH 2/8] remove populate_notification_subscriptions --- .../populate_notification_subscriptions.py | 113 ------------------ 1 file changed, 113 deletions(-) delete mode 100644 scripts/populate_notification_subscriptions.py diff --git a/scripts/populate_notification_subscriptions.py b/scripts/populate_notification_subscriptions.py deleted file mode 100644 index 557b9f2a47d..00000000000 --- a/scripts/populate_notification_subscriptions.py +++ /dev/null @@ -1,113 +0,0 @@ -import django -django.setup() - -from website.app import init_app -init_app(routes=False) - -from framework.celery_tasks import app as celery_app -from django.contrib.contenttypes.models import ContentType -from django.db.models import Count, F, OuterRef, Subquery, IntegerField, CharField -from django.db.models.functions import Cast -from osf.models import OSFUser, Node, NotificationSubscription, NotificationType - - -@celery_app.task(name='scripts.populate_notification_subscriptions') -def populate_notification_subscriptions(): - created = 0 - user_file_nt = NotificationType.Type.USER_FILE_UPDATED.instance - review_nt = NotificationType.Type.REVIEWS_SUBMISSION_STATUS.instance - node_file_nt = NotificationType.Type.NODE_FILE_UPDATED.instance - - user_ct = ContentType.objects.get_for_model(OSFUser) - node_ct = ContentType.objects.get_for_model(Node) - - reviews_qs = OSFUser.objects.exclude(subscriptions__notification_type__name=NotificationType.Type.REVIEWS_SUBMISSION_STATUS).distinct('id') - files_qs = OSFUser.objects.exclude(subscriptions__notification_type__name=NotificationType.Type.USER_FILE_UPDATED).distinct('id') - - node_notifications_sq = ( - NotificationSubscription.objects.filter( - content_type=node_ct, - notification_type=node_file_nt, - object_id=Cast(OuterRef('pk'), CharField()), - ).values( - 'object_id' - ).annotate( - cnt=Count('id') - ).values('cnt')[:1] - ) - - nodes_qs = ( - Node.objects - .annotate( - contributors_count=Count('_contributors', distinct=True), - notifications_count=Subquery( - node_notifications_sq, - output_field=IntegerField(), - ), - ).exclude(contributors_count=F('notifications_count')) - ) - - print(f"Creating REVIEWS_SUBMISSION_STATUS subscriptions for {reviews_qs.count()} users.") - for id, user in enumerate(reviews_qs, 1): - print(f"Processing user {id} / {reviews_qs.count()}") - try: - _, is_created = NotificationSubscription.objects.get_or_create( - notification_type=review_nt, - user=user, - content_type=user_ct, - object_id=user.id, - defaults={ - 'message_frequency': 'none', - }, - ) - if is_created: - created += 1 - except Exception as exeption: - print(exeption) - continue - - print(f"Creating USER_FILE_UPDATED subscriptions for {files_qs.count()} users.") - for id, user in enumerate(files_qs, 1): - print(f"Processing user {id} / {files_qs.count()}") - try: - _, is_created = NotificationSubscription.objects.get_or_create( - notification_type=user_file_nt, - user=user, - content_type=user_ct, - object_id=user.id, - defaults={ - '_is_digest': True, - 'message_frequency': 'none', - }, - ) - if is_created: - created += 1 - except Exception as exeption: - print(exeption) - continue - - print(f"Creating NODE_FILE_UPDATED subscriptions for {nodes_qs.count()} nodes.") - for id, node in enumerate(nodes_qs, 1): - print(f"Processing node {id} / {nodes_qs.count()}") - for contributor in node.contributors.all(): - try: - _, is_created = NotificationSubscription.objects.get_or_create( - notification_type=node_file_nt, - user=contributor, - content_type=node_ct, - object_id=node.id, - defaults={ - '_is_digest': True, - 'message_frequency': 'none', - }, - ) - if is_created: - created += 1 - except Exception as exeption: - print(exeption) - continue - - print(f"Created {created} subscriptions") - -if __name__ == '__main__': - populate_notification_subscriptions.delay() From b3d33561c52d8a4e0bbb7593a7700818bfc9669f Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Tue, 20 Jan 2026 16:28:54 +0200 Subject: [PATCH 3/8] Renamed files, refactor of populate notification subscriptions user global file updated --- ...cation_subscriptions_node_file_updated.py} | 0 ..._subscriptions_user_global_file_updated.py | 76 +++++++++++++++++++ ...tion_subscriptions_user_global_reviews.py} | 0 ...opulate_user_notification_subscriptions.py | 44 ----------- 4 files changed, 76 insertions(+), 44 deletions(-) rename scripts/{populate_nodes_notification_subscriptions.py => populate_notification_subscriptions_node_file_updated.py} (100%) create mode 100644 scripts/populate_notification_subscriptions_user_global_file_updated.py rename scripts/{populate_reviews_notification_subscriptions.py => populate_notification_subscriptions_user_global_reviews.py} (100%) delete mode 100644 scripts/populate_user_notification_subscriptions.py diff --git a/scripts/populate_nodes_notification_subscriptions.py b/scripts/populate_notification_subscriptions_node_file_updated.py similarity index 100% rename from scripts/populate_nodes_notification_subscriptions.py rename to scripts/populate_notification_subscriptions_node_file_updated.py diff --git a/scripts/populate_notification_subscriptions_user_global_file_updated.py b/scripts/populate_notification_subscriptions_user_global_file_updated.py new file mode 100644 index 00000000000..1cf567047a9 --- /dev/null +++ b/scripts/populate_notification_subscriptions_user_global_file_updated.py @@ -0,0 +1,76 @@ +import django +django.setup() + +from website.app import init_app +init_app(routes=False) + +from framework.celery_tasks import app as celery_app +from django.contrib.contenttypes.models import ContentType +from osf.models import OSFUser, NotificationSubscription, NotificationType + + +@celery_app.task(name="scripts.populate_notification_subscriptions_user_global_file_updated") +def populate_notification_subscriptions_user_global_file_updated(): + print("Starting USER_FILE_UPDATED subscription population...") + + batch_size = 1000 + user_file_updated_nt = NotificationType.Type.USER_FILE_UPDATED + + updated = ( + NotificationSubscription.objects + .filter( + notification_type__name=user_file_updated_nt, + _is_digest=False, + ) + .update(_is_digest=True) + ) + + print(f"Updated {updated} subscriptions") + print("Update finished.") + + user_ct = ContentType.objects.get_for_model(OSFUser) + user_qs = (OSFUser.objects + .exclude(subscriptions__notification_type__name=user_file_updated_nt) + .distinct() + .order_by("id") + .iterator(chunk_size=batch_size) + ) + + items_to_create = [] + total_created = 0 + + for count, user in enumerate(user_qs, 1): + items_to_create.append( + NotificationSubscription( + notification_type=user_file_updated_nt.instance, + user=user, + content_type=user_ct, + object_id=user.id, + _is_digest=True, + message_frequency="none", + ) + ) + if len(items_to_create) >= batch_size: + print(f"Creating batch of {len(items_to_create)} subscriptions...") + NotificationSubscription.objects.bulk_create( + items_to_create, + batch_size=batch_size, + ignore_conflicts=True, + ) + total_created += len(items_to_create) + items_to_create.clear() + + if count % 1000 == 0: + print(f"Processed {count}, created {total_created}") + + if items_to_create: + print(f"Creating final batch of {len(items_to_create)} subscriptions...") + NotificationSubscription.objects.bulk_create( + items_to_create, + batch_size=batch_size, + ignore_conflicts=True, + ) + total_created += len(items_to_create) + + print(f"Created {total_created} subscriptions.") + print("Creation finished.") diff --git a/scripts/populate_reviews_notification_subscriptions.py b/scripts/populate_notification_subscriptions_user_global_reviews.py similarity index 100% rename from scripts/populate_reviews_notification_subscriptions.py rename to scripts/populate_notification_subscriptions_user_global_reviews.py diff --git a/scripts/populate_user_notification_subscriptions.py b/scripts/populate_user_notification_subscriptions.py deleted file mode 100644 index ce0bba61583..00000000000 --- a/scripts/populate_user_notification_subscriptions.py +++ /dev/null @@ -1,44 +0,0 @@ -import django -django.setup() - -from website.app import init_app -init_app(routes=False) - -from framework.celery_tasks import app as celery_app -from django.contrib.contenttypes.models import ContentType -from osf.models import OSFUser, NotificationSubscription, NotificationType - - -@celery_app.task(name='scripts.populate_user_notification_subscriptions') -def populate_user_notification_subscriptions(): - created = 0 - user_file_nt = NotificationType.Type.USER_FILE_UPDATED.instance - - user_ct = ContentType.objects.get_for_model(OSFUser) - - files_qs = OSFUser.objects.exclude(subscriptions__notification_type__name=NotificationType.Type.USER_FILE_UPDATED).distinct('id') - - print(f"Creating USER_FILE_UPDATED subscriptions for {files_qs.count()} users.") - for id, user in enumerate(files_qs, 1): - print(f"Processing user {id} / {files_qs.count()}") - try: - _, is_created = NotificationSubscription.objects.get_or_create( - notification_type=user_file_nt, - user=user, - content_type=user_ct, - object_id=user.id, - defaults={ - '_is_digest': True, - 'message_frequency': 'none', - }, - ) - if is_created: - created += 1 - except Exception as exeption: - print(exeption) - continue - - print(f"Created {created} subscriptions") - -if __name__ == '__main__': - populate_user_notification_subscriptions.delay() From 2827778175af071422f6907df894876e41092810 Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Tue, 20 Jan 2026 17:51:02 +0200 Subject: [PATCH 4/8] added try/except, added timers --- ..._subscriptions_user_global_file_updated.py | 60 ++++++++++++------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/scripts/populate_notification_subscriptions_user_global_file_updated.py b/scripts/populate_notification_subscriptions_user_global_file_updated.py index 1cf567047a9..50905faaaea 100644 --- a/scripts/populate_notification_subscriptions_user_global_file_updated.py +++ b/scripts/populate_notification_subscriptions_user_global_file_updated.py @@ -4,18 +4,21 @@ from website.app import init_app init_app(routes=False) +from datetime import datetime from framework.celery_tasks import app as celery_app from django.contrib.contenttypes.models import ContentType from osf.models import OSFUser, NotificationSubscription, NotificationType -@celery_app.task(name="scripts.populate_notification_subscriptions_user_global_file_updated") +@celery_app.task(name='scripts.populate_notification_subscriptions_user_global_file_updated') def populate_notification_subscriptions_user_global_file_updated(): - print("Starting USER_FILE_UPDATED subscription population...") + print('---Starting USER_FILE_UPDATED subscription population script----') + global_start = datetime.now() batch_size = 1000 user_file_updated_nt = NotificationType.Type.USER_FILE_UPDATED + update_start = datetime.now() updated = ( NotificationSubscription.objects .filter( @@ -24,15 +27,16 @@ def populate_notification_subscriptions_user_global_file_updated(): ) .update(_is_digest=True) ) + update_end = datetime.now() - print(f"Updated {updated} subscriptions") - print("Update finished.") + print(f'Updated {updated} subscriptions. Took time: {update_end - update_start}') + print('Update finished.') user_ct = ContentType.objects.get_for_model(OSFUser) user_qs = (OSFUser.objects .exclude(subscriptions__notification_type__name=user_file_updated_nt) .distinct() - .order_by("id") + .order_by('id') .iterator(chunk_size=batch_size) ) @@ -40,6 +44,7 @@ def populate_notification_subscriptions_user_global_file_updated(): total_created = 0 for count, user in enumerate(user_qs, 1): + batch_start = datetime.now() items_to_create.append( NotificationSubscription( notification_type=user_file_updated_nt.instance, @@ -47,30 +52,41 @@ def populate_notification_subscriptions_user_global_file_updated(): content_type=user_ct, object_id=user.id, _is_digest=True, - message_frequency="none", + message_frequency='none', ) ) if len(items_to_create) >= batch_size: - print(f"Creating batch of {len(items_to_create)} subscriptions...") + print(f'Creating batch of {len(items_to_create)} subscriptions...') + try: + NotificationSubscription.objects.bulk_create( + items_to_create, + batch_size=batch_size, + ignore_conflicts=True, + ) + total_created += len(items_to_create) + except Exception as e: + print(f'Error during bulk_create: {e}') + finally: + items_to_create.clear() + batch_end = datetime.now() + print(f'Batch took {batch_end - batch_start}') + + if count % 1000 == 0: + print(f'Processed {count}, created {total_created}') + + if items_to_create: + print(f'Creating final batch of {len(items_to_create)} subscriptions...') + try: NotificationSubscription.objects.bulk_create( items_to_create, batch_size=batch_size, ignore_conflicts=True, ) total_created += len(items_to_create) - items_to_create.clear() - - if count % 1000 == 0: - print(f"Processed {count}, created {total_created}") - - if items_to_create: - print(f"Creating final batch of {len(items_to_create)} subscriptions...") - NotificationSubscription.objects.bulk_create( - items_to_create, - batch_size=batch_size, - ignore_conflicts=True, - ) - total_created += len(items_to_create) + except Exception as e: + print(f'Error during bulk_create: {e}') - print(f"Created {total_created} subscriptions.") - print("Creation finished.") + global_end = datetime.now() + print(f'Total time for USER_FILE_UPDATED subscription population: {global_end - global_start}') + print(f'Created {total_created} subscriptions.') + print('----Creation finished----') From 174422c3f548dd4acefb7fb1995f0d76f031abd6 Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Wed, 21 Jan 2026 16:07:26 +0200 Subject: [PATCH 5/8] converted populate_notification_subscriptions_user_global_reviews.py --- ...ation_subscriptions_user_global_reviews.py | 85 ++++++++++++++----- 1 file changed, 65 insertions(+), 20 deletions(-) diff --git a/scripts/populate_notification_subscriptions_user_global_reviews.py b/scripts/populate_notification_subscriptions_user_global_reviews.py index 7e0aa12884c..02753e1aaa8 100644 --- a/scripts/populate_notification_subscriptions_user_global_reviews.py +++ b/scripts/populate_notification_subscriptions_user_global_reviews.py @@ -4,6 +4,7 @@ from website.app import init_app init_app(routes=False) +from datetime import datetime from framework.celery_tasks import app as celery_app from django.contrib.contenttypes.models import ContentType from osf.models import OSFUser, NotificationSubscription, NotificationType @@ -11,33 +12,77 @@ @celery_app.task(name='scripts.populate_reviews_notification_subscriptions') def populate_reviews_notification_subscriptions(): - created = 0 - review_nt = NotificationType.Type.REVIEWS_SUBMISSION_STATUS.instance + print('---Starting REVIEWS_SUBMISSION_STATUS subscription population script----') + global_start = datetime.now() + + batch_size = 1000 + review_nt = NotificationType.Type.REVIEWS_SUBMISSION_STATUS user_ct = ContentType.objects.get_for_model(OSFUser) - reviews_qs = OSFUser.objects.exclude(subscriptions__notification_type__name=NotificationType.Type.REVIEWS_SUBMISSION_STATUS).distinct('id') + updated_start = datetime.now() + updated = ( + NotificationSubscription.objects.filter( + notification_type__name=review_nt, + _is_digest=False, + ) + .update(_is_digest=True) + ) + updated_end = datetime.now() + print(f'Updated {updated} subscriptions. Took time: {updated_end - updated_start}') + print('Update finished.') - print(f"Creating REVIEWS_SUBMISSION_STATUS subscriptions for {reviews_qs.count()} users.") - for id, user in enumerate(reviews_qs, 1): - print(f"Processing user {id} / {reviews_qs.count()}") - try: - _, is_created = NotificationSubscription.objects.get_or_create( - notification_type=review_nt, + user_qs = OSFUser.objects.exclude( + subscriptions__notification_type__name=NotificationType.Type.REVIEWS_SUBMISSION_STATUS.instance + ).distinct('id') + + items_to_create = [] + total_created = 0 + + for count, user in enumerate(user_qs, 1): + batch_start = datetime.now() + items_to_create.append( + NotificationSubscription( + notification_type=review_nt.instance, user=user, content_type=user_ct, object_id=user.id, - defaults={ - 'message_frequency': 'none', - }, + _is_digest=True, + message_frequency='none', ) - if is_created: - created += 1 - except Exception as exeption: - print(exeption) - continue + ) + if len(items_to_create) >= batch_size: + print(f'Creating batch of {len(items_to_create)} subscriptions...') + try: + NotificationSubscription.objects.bulk_create( + items_to_create, + batch_size=batch_size, + ignore_conflicts=True, + ) + total_created += len(items_to_create) + except Exception as e: + print(f'Error during bulk_create: {e}') + finally: + items_to_create.clear() + batch_end = datetime.now() + print(f'Batch took {batch_end - batch_start}') - print(f"Created {created} subscriptions") + if count % 1000 == 0: + print(f'Processed {count}, created {total_created}') + + if items_to_create: + print(f'Creating final batch of {len(items_to_create)} subscriptions...') + try: + NotificationSubscription.objects.bulk_create( + items_to_create, + batch_size=batch_size, + ignore_conflicts=True, + ) + total_created += len(items_to_create) + except Exception as e: + print(f'Error during bulk_create: {e}') -if __name__ == '__main__': - populate_reviews_notification_subscriptions.delay() + global_end = datetime.now() + print(f'Total time for REVIEWS_SUBMISSION_STATUS subscription population: {global_end - global_start}') + print(f'Created {total_created} subscriptions.') + print('----Creation finished----') \ No newline at end of file From 5c8223ae1a5e89065d1e8f3cb748b36e6c9f20e4 Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Thu, 22 Jan 2026 13:31:49 +0200 Subject: [PATCH 6/8] fix batch time execution --- ...e_notification_subscriptions_user_global_file_updated.py | 5 +++-- ...pulate_notification_subscriptions_user_global_reviews.py | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/scripts/populate_notification_subscriptions_user_global_file_updated.py b/scripts/populate_notification_subscriptions_user_global_file_updated.py index 50905faaaea..06c3c776f85 100644 --- a/scripts/populate_notification_subscriptions_user_global_file_updated.py +++ b/scripts/populate_notification_subscriptions_user_global_file_updated.py @@ -43,8 +43,8 @@ def populate_notification_subscriptions_user_global_file_updated(): items_to_create = [] total_created = 0 + batch_start = datetime.now() for count, user in enumerate(user_qs, 1): - batch_start = datetime.now() items_to_create.append( NotificationSubscription( notification_type=user_file_updated_nt.instance, @@ -71,8 +71,9 @@ def populate_notification_subscriptions_user_global_file_updated(): batch_end = datetime.now() print(f'Batch took {batch_end - batch_start}') - if count % 1000 == 0: + if count % batch_size == 0: print(f'Processed {count}, created {total_created}') + batch_start = datetime.now() if items_to_create: print(f'Creating final batch of {len(items_to_create)} subscriptions...') diff --git a/scripts/populate_notification_subscriptions_user_global_reviews.py b/scripts/populate_notification_subscriptions_user_global_reviews.py index 02753e1aaa8..a1bce301cb0 100644 --- a/scripts/populate_notification_subscriptions_user_global_reviews.py +++ b/scripts/populate_notification_subscriptions_user_global_reviews.py @@ -38,9 +38,8 @@ def populate_reviews_notification_subscriptions(): items_to_create = [] total_created = 0 - + batch_start = datetime.now() for count, user in enumerate(user_qs, 1): - batch_start = datetime.now() items_to_create.append( NotificationSubscription( notification_type=review_nt.instance, @@ -67,8 +66,9 @@ def populate_reviews_notification_subscriptions(): batch_end = datetime.now() print(f'Batch took {batch_end - batch_start}') - if count % 1000 == 0: + if count % batch_size == 0: print(f'Processed {count}, created {total_created}') + batch_start = datetime.now() if items_to_create: print(f'Creating final batch of {len(items_to_create)} subscriptions...') From df49700f34b08266821bb79a96681fbff2ef3ff4 Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Fri, 23 Jan 2026 16:52:44 +0200 Subject: [PATCH 7/8] converted populate_notification_subscriptions_node_file_updated --- ...ication_subscriptions_node_file_updated.py | 102 +++++++++++++----- 1 file changed, 76 insertions(+), 26 deletions(-) diff --git a/scripts/populate_notification_subscriptions_node_file_updated.py b/scripts/populate_notification_subscriptions_node_file_updated.py index 7c13d2ad1df..19c0d2988cf 100644 --- a/scripts/populate_notification_subscriptions_node_file_updated.py +++ b/scripts/populate_notification_subscriptions_node_file_updated.py @@ -4,24 +4,40 @@ from website.app import init_app init_app(routes=False) +from datetime import datetime from framework.celery_tasks import app as celery_app from django.contrib.contenttypes.models import ContentType from django.db.models import Count, F, OuterRef, Subquery, IntegerField, CharField -from django.db.models.functions import Cast +from django.db.models.functions import Cast, Coalesce from osf.models import Node, NotificationSubscription, NotificationType @celery_app.task(name='scripts.populate_nodes_notification_subscriptions') def populate_nodes_notification_subscriptions(): - created = 0 - node_file_nt = NotificationType.Type.NODE_FILE_UPDATED.instance + print('---Starting NODE_FILE_UPDATED subscription population script----') + global_start = datetime.now() + + batch_size = 1000 + node_file_nt = NotificationType.Type.NODE_FILE_UPDATED node_ct = ContentType.objects.get_for_model(Node) + updated_start = datetime.now() + updated = ( + NotificationSubscription.objects.filter( + notification_type__name=node_file_nt, + _is_digest=False, + ) + .update(_is_digest=True) + ) + updated_end = datetime.now() + print(f'Updated {updated} subscriptions. Took time: {updated_end - updated_start}') + print('Update finished.') + node_notifications_sq = ( NotificationSubscription.objects.filter( content_type=node_ct, - notification_type=node_file_nt, + notification_type=node_file_nt.instance, object_id=Cast(OuterRef('pk'), CharField()), ).values( 'object_id' @@ -34,35 +50,69 @@ def populate_nodes_notification_subscriptions(): Node.objects .annotate( contributors_count=Count('_contributors', distinct=True), - notifications_count=Subquery( - node_notifications_sq, - output_field=IntegerField(), + notifications_count=Coalesce( + Subquery( + node_notifications_sq, + output_field=IntegerField(), + ), + 0 ), ).exclude(contributors_count=F('notifications_count')) - ) + ).iterator(chunk_size=batch_size) - print(f"Creating NODE_FILE_UPDATED subscriptions for {nodes_qs.count()} nodes.") - for id, node in enumerate(nodes_qs, 1): - print(f"Processing node {id} / {nodes_qs.count()}") + items_to_create = [] + total_created = 0 + batch_start = datetime.now() + count_nodes = 0 + count_contributors = 0 + for node in nodes_qs: + count_nodes += 1 for contributor in node.contributors.all(): - try: - _, is_created = NotificationSubscription.objects.get_or_create( - notification_type=node_file_nt, + count_contributors += 1 + items_to_create.append( + NotificationSubscription( + notification_type=node_file_nt.instance, user=contributor, content_type=node_ct, object_id=node.id, - defaults={ - '_is_digest': True, - 'message_frequency': 'none', - }, + _is_digest=True, + message_frequency='none', ) - if is_created: - created += 1 - except Exception as exeption: - print(exeption) - continue + ) + if len(items_to_create) >= batch_size: + print(f'Creating batch of {len(items_to_create)} subscriptions...') + try: + NotificationSubscription.objects.bulk_create( + items_to_create, + batch_size=batch_size, + ignore_conflicts=True, + ) + total_created += len(items_to_create) + items_to_create = [] + except Exception as exeption: + print(f"Error during bulk_create: {exeption}") + continue + finally: + items_to_create.clear() + batch_end = datetime.now() + print(f'Batch took {batch_end - batch_start}') + + if count_contributors % batch_size == 0: + print(f'Processed {count_nodes} nodes with {count_contributors} contributors, created {total_created} subscriptions') - print(f"Created {created} subscriptions") + if items_to_create: + print(f'Creating final batch of {len(items_to_create)} subscriptions...') + try: + NotificationSubscription.objects.bulk_create( + items_to_create, + batch_size=batch_size, + ignore_conflicts=True, + ) + total_created += len(items_to_create) + except Exception as exeption: + print(f"Error during bulk_create: {exeption}") -if __name__ == '__main__': - populate_nodes_notification_subscriptions.delay() + global_end = datetime.now() + print(f'Total time for NODE_FILE_UPDATED subscription population: {global_end - global_start}') + print(f'Created {total_created} subscriptions.') + print('----Creation finished----') From c70b570c10fde304b2ce06058c3fbc6f1ffc231c Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Fri, 23 Jan 2026 16:59:21 +0200 Subject: [PATCH 8/8] convert to separate update and create scripts --- ...ication_subscriptions_node_file_updated.py | 32 ++++++++++------- ..._subscriptions_user_global_file_updated.py | 36 +++++++++++-------- ...ation_subscriptions_user_global_reviews.py | 35 ++++++++++-------- 3 files changed, 61 insertions(+), 42 deletions(-) diff --git a/scripts/populate_notification_subscriptions_node_file_updated.py b/scripts/populate_notification_subscriptions_node_file_updated.py index 19c0d2988cf..0eb2549beab 100644 --- a/scripts/populate_notification_subscriptions_node_file_updated.py +++ b/scripts/populate_notification_subscriptions_node_file_updated.py @@ -14,7 +14,7 @@ @celery_app.task(name='scripts.populate_nodes_notification_subscriptions') def populate_nodes_notification_subscriptions(): - print('---Starting NODE_FILE_UPDATED subscription population script----') + print('---Starting NODE_FILE_UPDATED subscriptions population script----') global_start = datetime.now() batch_size = 1000 @@ -22,18 +22,6 @@ def populate_nodes_notification_subscriptions(): node_ct = ContentType.objects.get_for_model(Node) - updated_start = datetime.now() - updated = ( - NotificationSubscription.objects.filter( - notification_type__name=node_file_nt, - _is_digest=False, - ) - .update(_is_digest=True) - ) - updated_end = datetime.now() - print(f'Updated {updated} subscriptions. Took time: {updated_end - updated_start}') - print('Update finished.') - node_notifications_sq = ( NotificationSubscription.objects.filter( content_type=node_ct, @@ -116,3 +104,21 @@ def populate_nodes_notification_subscriptions(): print(f'Total time for NODE_FILE_UPDATED subscription population: {global_end - global_start}') print(f'Created {total_created} subscriptions.') print('----Creation finished----') + +@celery_app.task(name='scripts.update_nodes_notification_subscriptions') +def update_nodes_notification_subscriptions(): + print('---Starting NODE_FILE_UPDATED subscriptions update script----') + + node_file_nt = NotificationType.Type.NODE_FILE_UPDATED + + updated_start = datetime.now() + updated = ( + NotificationSubscription.objects.filter( + notification_type__name=node_file_nt, + _is_digest=False, + ) + .update(_is_digest=True) + ) + updated_end = datetime.now() + print(f'Updated {updated} subscriptions. Took time: {updated_end - updated_start}') + print('Update finished.') \ No newline at end of file diff --git a/scripts/populate_notification_subscriptions_user_global_file_updated.py b/scripts/populate_notification_subscriptions_user_global_file_updated.py index 06c3c776f85..fa6657160cc 100644 --- a/scripts/populate_notification_subscriptions_user_global_file_updated.py +++ b/scripts/populate_notification_subscriptions_user_global_file_updated.py @@ -12,26 +12,12 @@ @celery_app.task(name='scripts.populate_notification_subscriptions_user_global_file_updated') def populate_notification_subscriptions_user_global_file_updated(): - print('---Starting USER_FILE_UPDATED subscription population script----') + print('---Starting USER_FILE_UPDATED subscriptions population script----') global_start = datetime.now() batch_size = 1000 user_file_updated_nt = NotificationType.Type.USER_FILE_UPDATED - update_start = datetime.now() - updated = ( - NotificationSubscription.objects - .filter( - notification_type__name=user_file_updated_nt, - _is_digest=False, - ) - .update(_is_digest=True) - ) - update_end = datetime.now() - - print(f'Updated {updated} subscriptions. Took time: {update_end - update_start}') - print('Update finished.') - user_ct = ContentType.objects.get_for_model(OSFUser) user_qs = (OSFUser.objects .exclude(subscriptions__notification_type__name=user_file_updated_nt) @@ -91,3 +77,23 @@ def populate_notification_subscriptions_user_global_file_updated(): print(f'Total time for USER_FILE_UPDATED subscription population: {global_end - global_start}') print(f'Created {total_created} subscriptions.') print('----Creation finished----') + +@celery_app.task(name='scripts.update_notification_subscriptions_user_global_file_updated') +def update_notification_subscriptions_user_global_file_updated(): + print('---Starting USER_FILE_UPDATED subscriptions updating script----') + + user_file_updated_nt = NotificationType.Type.USER_FILE_UPDATED + + update_start = datetime.now() + updated = ( + NotificationSubscription.objects + .filter( + notification_type__name=user_file_updated_nt, + _is_digest=False, + ) + .update(_is_digest=True) + ) + update_end = datetime.now() + + print(f'Updated {updated} subscriptions. Took time: {update_end - update_start}') + print('Update finished.') \ No newline at end of file diff --git a/scripts/populate_notification_subscriptions_user_global_reviews.py b/scripts/populate_notification_subscriptions_user_global_reviews.py index a1bce301cb0..16303941ad2 100644 --- a/scripts/populate_notification_subscriptions_user_global_reviews.py +++ b/scripts/populate_notification_subscriptions_user_global_reviews.py @@ -12,7 +12,7 @@ @celery_app.task(name='scripts.populate_reviews_notification_subscriptions') def populate_reviews_notification_subscriptions(): - print('---Starting REVIEWS_SUBMISSION_STATUS subscription population script----') + print('---Starting REVIEWS_SUBMISSION_STATUS subscriptions population script----') global_start = datetime.now() batch_size = 1000 @@ -20,18 +20,6 @@ def populate_reviews_notification_subscriptions(): user_ct = ContentType.objects.get_for_model(OSFUser) - updated_start = datetime.now() - updated = ( - NotificationSubscription.objects.filter( - notification_type__name=review_nt, - _is_digest=False, - ) - .update(_is_digest=True) - ) - updated_end = datetime.now() - print(f'Updated {updated} subscriptions. Took time: {updated_end - updated_start}') - print('Update finished.') - user_qs = OSFUser.objects.exclude( subscriptions__notification_type__name=NotificationType.Type.REVIEWS_SUBMISSION_STATUS.instance ).distinct('id') @@ -85,4 +73,23 @@ def populate_reviews_notification_subscriptions(): global_end = datetime.now() print(f'Total time for REVIEWS_SUBMISSION_STATUS subscription population: {global_end - global_start}') print(f'Created {total_created} subscriptions.') - print('----Creation finished----') \ No newline at end of file + print('----Creation finished----') + +@celery_app.task(name='scripts.update_reviews_notification_subscriptions') +def update_reviews_notification_subscriptions(): + print('---Starting REVIEWS_SUBMISSION_STATUS subscriptions updating script----') + + review_nt = NotificationType.Type.REVIEWS_SUBMISSION_STATUS + + updated_start = datetime.now() + updated = ( + NotificationSubscription.objects.filter( + notification_type__name=review_nt, + _is_digest=False, + ) + .update(_is_digest=True) + ) + updated_end = datetime.now() + + print(f'Updated {updated} subscriptions. Took time: {updated_end - updated_start}') + print('Update finished.')