diff --git a/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py b/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py index 4f942417ae0e..5e3652c2e79c 100644 --- a/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py +++ b/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py @@ -1079,8 +1079,50 @@ def cleanup_and_assert_status(self, data=None, expected_status=status.HTTP_204_N return response def test_simple_success(self): + """ + Test basic cleanup with default redacted values. + """ + # Verify redaction happens (records exist before cleanup) + assert UserRetirementStatus.objects.count() == 9 + + # Make the cleanup request self.cleanup_and_assert_status() - assert not UserRetirementStatus.objects.all() + + # Records should be deleted after redaction + retirements = UserRetirementStatus.objects.all() + assert retirements.count() == 0 + + def test_redaction_before_deletion(self): + """ + Verify that redaction (UPDATE) happens before deletion (DELETE). + Uses assertNumQueries to verify UPDATE queries execute before DELETE queries. + This protects PII from being exposed in soft-deletes to downstream data warehouses. + """ + # Use assertNumQueries to capture and verify the SQL queries execute in correct order. + with self.assertNumQueries(53): # Full request with 9 UPDATEs (redaction) + 9 DELETEs + self.cleanup_and_assert_status() + + # Verify records are deleted after redaction + retirements = UserRetirementStatus.objects.all() + assert retirements.count() == 0 + + def test_custom_redacted_values(self): + """Test that custom redacted values are applied before deletion.""" + custom_username = 'username-redacted-12345' + custom_email = 'email-redacted-67890' + custom_name = 'name-redacted-abcde' + + data = { + 'usernames': self.usernames, + 'redacted_username': custom_username, + 'redacted_email': custom_email, + 'redacted_name': custom_name + } + self.cleanup_and_assert_status(data=data) + + # Records should be deleted after redaction + retirements = UserRetirementStatus.objects.all() + assert retirements.count() == 0 def test_leaves_other_users(self): remaining_usernames = [] diff --git a/openedx/core/djangoapps/user_api/accounts/views.py b/openedx/core/djangoapps/user_api/accounts/views.py index c3ff6ce7a2f2..b80e5c6fbb88 100644 --- a/openedx/core/djangoapps/user_api/accounts/views.py +++ b/openedx/core/djangoapps/user_api/accounts/views.py @@ -1024,14 +1024,20 @@ def cleanup(self, request): ``` { - 'usernames': ['user1', 'user2', ...] + 'usernames': ['user1', 'user2', ...], + 'redacted_username': 'Value to store in username field', + 'redacted_email': 'Value to store in email field', + 'redacted_name': 'Value to store in name field' } ``` - Deletes a batch of retirement requests by username. + Redacts a batch of retirement requests by redacting PII fields. """ try: usernames = request.data["usernames"] + redacted_username = request.data.get("redacted_username", "redacted") + redacted_email = request.data.get("redacted_email", "redacted") + redacted_name = request.data.get("redacted_name", "redacted") if not isinstance(usernames, list): raise TypeError("Usernames should be an array.") @@ -1045,7 +1051,16 @@ def cleanup(self, request): if len(usernames) != len(retirements): raise UserRetirementStatus.DoesNotExist("Not all usernames exist in the COMPLETE state.") - retirements.delete() + # Redact PII fields first, then delete. In case an ETL tool is syncing data + # to a downstream data warehouse, and treats the deletes as soft-deletes, + # the data will have first been redacted, protecting the sensitive PII. + for retirement in retirements: + retirement.original_username = redacted_username + retirement.original_email = redacted_email + retirement.original_name = redacted_name + retirement.save() + retirement.delete() + return Response(status=status.HTTP_204_NO_CONTENT) except (RetirementStateError, UserRetirementStatus.DoesNotExist, TypeError) as exc: return Response(str(exc), status=status.HTTP_400_BAD_REQUEST)