Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 29 additions & 8 deletions common/djangoapps/student/models/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -934,14 +934,31 @@ class PendingSecondaryEmailChange(DeletableByUserValue, models.Model): # noqa:
"""
This model keeps track of pending requested changes to a user's secondary email address.

.. pii: Contains new_secondary_email, not currently retired
.. pii: Contains new_secondary_email, redacted in `DeactivateLogoutView`
.. pii_types: email_address
.. pii_retirement: retained
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bmedx: [inform] This doesn't seem like it would have been intentionally retained, so I'm fine with calling this a bug and just fixing. Any objections?

.. pii_retirement: local_api
"""
user = models.OneToOneField(User, unique=True, db_index=True, on_delete=models.CASCADE)
new_secondary_email = models.CharField(blank=True, max_length=255, db_index=True)
activation_key = models.CharField(('activation key'), max_length=32, unique=True, db_index=True)

@classmethod
def redact_pending_secondary_email(cls, user_id):
"""
Redact a pending secondary email change row for a user.

Redacts the email before deletion so any downstream soft-delete mirror does
not retain the original secondary email address in the final row image.
"""
try:
pending_secondary_email = cls.objects.get(user_id=user_id)
except cls.DoesNotExist:
return True
pending_secondary_email.new_secondary_email = f"redacted+{user_id}@redacted.com"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But PendingEmailChange redaction (PR #38426) and management.py activation both use get_retired_email_by_email(). The edX retirement system has a standardized hashing approach for a reason — it's deterministic, auditable, and consistent across the retirement pipeline. Using a raw format here:

Produces .com domain (use .invalid if using a custom format)
Is inconsistent with the rest of the retirement system in this same codebase
Means downstream audits see two different redaction patterns for the same retirement flow
Both retire_recovery_email and redact_and_delete_pending_secondary_email should use get_retired_email_by_email(record.secondary_email) / get_retired_email_by_email(record.new_secondary_email) to stay consistent.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. I'm not finding the PR description useful. I wish you'd simply write yourself the simple list of cases that you are fixing.
  2. I'm not clear if we are fixing the redact-before-delete for pending records that are activated during normal flow, and not during user retirement. I'm not seeing that (maybe I just missed it), but I thought that was a problem. and that we'd be calling the same code.
  3. Because we are simply redacted a deleted record that should never be seen, unless someone goes and looks at the soft-deleted records in downstream (in our case Snowflake), the redacted value is less important to be using the get_retired_email_by_email() function. I could imagine a simple hard-coded string like redact-before-delete@redacted.com.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. is_active = False removed pointless before delete(), dropped from retire_recovery_email
  2. get_retired_email_by_email() used in both retirement methods — consistent with the rest of the retirement pipeline
  3. Activation path uses neutral placeholder "completed@delete-pending.com" not a retirement hash, no false-positive signals
  4. Docstrings updated "Redact and delete", correct return type, proper descriptions
  5. except DoesNotExist now returns True consistent early-return pattern in both methods
  6. Unused import removed get_retired_email_by_email import dropped from management.py

pending_secondary_email.save(update_fields=['new_secondary_email'])
pending_secondary_email.delete()
Comment thread
robrap marked this conversation as resolved.
return True


class LoginFailures(models.Model):
"""
Expand Down Expand Up @@ -1688,7 +1705,7 @@ class AccountRecovery(models.Model): # noqa: DJ008
"""
Model for storing information for user's account recovery in case of access loss.

.. pii: the field named secondary_email contains pii, retired in the `DeactivateLogoutView`
.. pii: the field named secondary_email contains pii, redacted in the `DeactivateLogoutView`
.. pii_types: email_address
.. pii_retirement: local_api
"""
Expand Down Expand Up @@ -1721,19 +1738,23 @@ def update_recovery_email(self, email):
@classmethod
def retire_recovery_email(cls, user_id):
"""
Retire user's recovery/secondary email as part of GDPR Phase I.
Redact user's recovery/secondary email as part of GDPR Phase I.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should "Redact and delete" if we leave the delete.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DONE IMPLEMENTED

Returns 'True'
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No longer returns True in all cases. Fix.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got it implemented


If an AccountRecovery record is found for this user it will be deleted,
if it is not found it is assumed this table has no PII for the given user.
If an AccountRecovery record is found for this user it will be redacted and
deleted. If it is not found it is assumed this table has no PII for the given user.
Comment thread
robrap marked this conversation as resolved.

:param user_id: int
:return: bool
"""
try:
cls.objects.get(user_id=user_id).delete()
account_recovery = cls.objects.get(user_id=user_id)
except cls.DoesNotExist:
pass
return True
account_recovery.secondary_email = f"redacted+{user_id}@redacted.com"
account_recovery.is_active = False
account_recovery.save(update_fields=['secondary_email', 'is_active'])
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Messing with is_active is simply confusing. We're about to delete this row, so in theory we don't need to make any changes. The only reason we are touching the record before deleting is to not leave PII in soft-deleted records downstream. Let's keep this simple.

Suggested change
account_recovery.is_active = False
account_recovery.save(update_fields=['secondary_email', 'is_active'])
account_recovery.save(update_fields=['secondary_email'])

Copy link
Copy Markdown
Contributor Author

@ktyagiapphelix2u ktyagiapphelix2u May 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Implemented

account_recovery.delete()
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ktyagiapphelix2u: Is it true that the recovery email itself is not yet redacted? I thought it was just pending secondary email updates. If so, I think secondary (recovery) email and pending secondary email should be separate PRs.

Also, it look like you wish to delete where we weren't previously doing so. @bmedx: Any strong opinions either way? In general are we redacting and leaving things in place?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ktyagiapphelix2u: You did not respond on this comment in particular, but from elsewhere, I think you want this all to stay together, right?

Also, I missed the earlier cls.objects.get(user_id=user_id).delete(), so we're not changing anything, but just adding redaction, right? Again, maybe use the simpler redact-before-delete email I proposed earlier?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the follow-up. To address your points:

  1. Same PR- Yes, both changes (recovery email + pending secondary email) address the same retirement gap and follow the same redact-before-delete pattern, so keeping them together makes sense.

  2. Simplified redact-before-delete, setting is_active = False before delete() was pointless (the row is gone immediately after). I've removed it, so retire_recovery_email now matches the simpler pattern used in redact_and_delete_pending_secondary_email, redact the email field, save, then delete.


return True

Expand Down
22 changes: 22 additions & 0 deletions common/djangoapps/student/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
ManualEnrollmentAudit,
PendingEmailChange,
PendingNameChange,
PendingSecondaryEmailChange,
UserAttribute,
UserCelebration,
UserProfile,
Expand Down Expand Up @@ -747,6 +748,27 @@ def test_retire_recovery_email(self):
assert len(AccountRecovery.objects.filter(user_id=user.id)) == 0


class TestPendingSecondaryEmailChange(TestCase):
"""Tests for retiring PendingSecondaryEmailChange records."""

def test_redact_pending_secondary_email(self):
"""Assert that pending secondary email records are deleted for retired users."""
user = UserFactory()
PendingSecondaryEmailChange.objects.create(
user=user,
new_secondary_email='new-secondary@example.com',
activation_key='a' * 32,
)
assert len(PendingSecondaryEmailChange.objects.filter(user_id=user.id)) == 1
PendingSecondaryEmailChange.redact_pending_secondary_email(user_id=user.id)
assert len(PendingSecondaryEmailChange.objects.filter(user_id=user.id)) == 0

def test_redact_pending_secondary_email_when_no_record(self):
"""Assert retirement cleanup returns True when no pending secondary row exists."""
user = UserFactory()
assert PendingSecondaryEmailChange.redact_pending_secondary_email(user_id=user.id) is True


@ddt.ddt
class TestUserPostSaveCallback(SharedModuleStoreTestCase):
"""
Expand Down
5 changes: 5 additions & 0 deletions common/djangoapps/student/views/management.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
UserStanding,
create_comments_service_user, # noqa: F401
email_exists_or_retired, # noqa: F401
get_retired_email_by_email,
)
from common.djangoapps.student.signals import REFUND_ORDER, USER_EMAIL_CHANGED
from common.djangoapps.student.toggles import should_redirect_to_courseware_after_enrollment
Expand Down Expand Up @@ -890,6 +891,10 @@ def activate_secondary_email(request, key):
'secondary_email': pending_secondary_email_change.new_secondary_email
})

pending_secondary_email_change.new_secondary_email = get_retired_email_by_email(
pending_secondary_email_change.new_secondary_email
)
pending_secondary_email_change.save(update_fields=['new_secondary_email'])
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

activate_secondary_email is a normal user action (clicking the confirmation link), not a retirement path. get_retired_email_by_email() produces a retired_<hash>@retired.invalid value. If a downstream system snapshots this deletion, it will see a retirement-style hashed email even though the user was never retired — this will produce confusing audit trails and could trigger false-positive retirement detection in downstream systems.

This change does not belong in this view. The soft-delete protection goal is valid, but the redaction function must not be get_retired_email_by_email() on a non-retired user. Either use a neutral placeholder, or remove this from the activation path entirely.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right—activate_secondary_email is a normal user action, not a retirement flow. Using get_retired_email_by_email() here would incorrectly mark a regular user as “retired” in downstream systems, which could cause confusion or false positives.

I’ve removed the redaction from this path, so now the pending secondary email is just deleted after activation, with no retirement-style hashing. Redaction only happens in actual retirement flows.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about something like using completed@delete-pending.com? If the user is later retired, this soft-delete would persist, and isn’t that the case we’re trying to fix?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ktyagiapphelix2u: Did you see my comment? Can you acknowledge and respond? Thank you.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've implemented your suggestion. The pending secondary email is now overwritten with "completed@delete-pending.com" before deletion.

does not use get_retired_email_by_email(), so it won't be mistaken for a retirement hash or trigger false-positive retirement detection downstream

pending_secondary_email_change.delete()

return render_to_response("secondary_email_change_successful.html")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
ManualEnrollmentAudit,
PendingEmailChange,
PendingNameChange,
PendingSecondaryEmailChange,
Registration,
SocialLink,
UserProfile,
Expand Down Expand Up @@ -233,6 +234,24 @@ def test_user_can_deactivate_secondary_email(self):
# Assert that there is no longer a secondary/recovery email for test user
assert len(AccountRecovery.objects.filter(user_id=self.test_user.id)) == 0

def test_user_can_deactivate_pending_secondary_email_change(self):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm holding off on reviewing tests. I'm getting confused between the pending and actual records, and this will be simpler when the PRs are split. Also, feel free to get an approval from the team first.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Helpful Info

This PR fixes a PII leakage issue affecting two related models that store secondary email data at different lifecycle stages. The AccountRecovery model stores confirmed/active recovery emails, while PendingSecondaryEmailChange stores unconfirmed email change requests (before the user clicks the activation link).

During user retirement, we must handle both models because a user can have: (1) only a confirmed email, (2) only a pending change request, (3) both (when changing their existing recovery email to a new one), or (4) neither.

Both models require the same fix pattern: redact the email field to prevent PII from syncing to downstream systems save the redacted record, then delete it. The methods are idempotent, they return True if no record exists. For review clarity, I recommend examining the AccountRecovery changes first (confirmed emails), then PendingSecondaryEmailChange (pending emails), and finally the retirement flow that calls both methods.

"""
Verify that pending secondary email change records are removed when a user retires.
"""
PendingSecondaryEmailChange.objects.create(
user=self.test_user,
new_secondary_email='pending-secondary@example.com',
activation_key='b' * 32,
)
assert len(PendingSecondaryEmailChange.objects.filter(user_id=self.test_user.id)) == 1

self.client.login(username=self.test_user.username, password=self.test_password)
headers = build_jwt_headers(self.test_user)
response = self.client.post(self.url, self.build_post(self.test_password), **headers)
assert response.status_code == status.HTTP_204_NO_CONTENT

assert len(PendingSecondaryEmailChange.objects.filter(user_id=self.test_user.id)) == 0

def test_password_mismatch(self):
"""
Verify that the user submitting a mismatched password results in
Expand Down Expand Up @@ -1293,6 +1312,18 @@ def setUp(self):
UserOrgTagFactory.create(user=self.test_user, key='foo', value='bar')
UserOrgTagFactory.create(user=self.test_user, key='cat', value='dog')

# Secondary email setup
PendingSecondaryEmailChange.objects.create(
user=self.test_user,
new_secondary_email='pending_secondary@example.com',
activation_key='test_activation_key_123'
)
AccountRecovery.objects.create(
user=self.test_user,
secondary_email='confirmed_secondary@example.com',
is_active=True
)

CourseEnrollmentAllowedFactory.create(email=self.original_email)

self.course_key = CourseKey.from_string('course-v1:edX+DemoX+Demo_Course')
Expand Down Expand Up @@ -1399,6 +1430,10 @@ def test_retire_user(self, mock_remove_profile_images, mock_get_profile_image_na
assert not PendingEmailChange.objects.filter(user=self.test_user).exists()
assert not UserOrgTag.objects.filter(user=self.test_user).exists()

# Verify secondary email models were cleaned
assert not PendingSecondaryEmailChange.objects.filter(user=self.test_user).exists()
assert not AccountRecovery.objects.filter(user=self.test_user).exists()

assert not CourseEnrollmentAllowed.objects.filter(email=self.original_email).exists()
assert not UnregisteredLearnerCohortAssignments.objects.filter(email=self.original_email).exists()

Expand Down
8 changes: 7 additions & 1 deletion openedx/core/djangoapps/user_api/accounts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@
from edx_django_utils.user import generate_password
from social_django.models import UserSocialAuth

from common.djangoapps.student.models import AccountRecovery, Registration, get_retired_email_by_email
from common.djangoapps.student.models import (
AccountRecovery,
PendingSecondaryEmailChange,
Registration,
get_retired_email_by_email,
)
from openedx.core.djangoapps.site_configuration.models import SiteConfiguration
from openedx.core.djangoapps.theming.helpers import get_config_value_from_site_or_settings, get_current_site
from openedx.core.djangolib.oauth2_retirement_utils import retire_dot_oauth2_models
Expand Down Expand Up @@ -219,6 +224,7 @@ def create_retirement_request_and_deactivate_account(user):
# Delete OAuth tokens associated with the user.
retire_dot_oauth2_models(user)
AccountRecovery.retire_recovery_email(user.id)
PendingSecondaryEmailChange.redact_pending_secondary_email(user.id)


def username_suffix_generator(suffix_length=4):
Expand Down
10 changes: 10 additions & 0 deletions openedx/core/djangoapps/user_api/accounts/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,13 @@

from common.djangoapps.entitlements.models import CourseEntitlement
from common.djangoapps.student.models import ( # lint-amnesty, pylint: disable=unused-import
AccountRecovery,
CourseEnrollmentAllowed,
LoginFailures,
ManualEnrollmentAudit,
PendingEmailChange,
PendingNameChange,
PendingSecondaryEmailChange,
User,
UserProfile,
get_potentially_retired_user_by_username,
Expand Down Expand Up @@ -1152,8 +1154,15 @@ def post(self, request):
self.retire_entitlement_support_detail(user)

# Retire misc. models that may contain PII of this user
# Redact pending email change before deletion to prevent plaintext sync to Snowflake
pending_email = PendingEmailChange.objects.filter(user=user).first()
if pending_email:
pending_email.new_email = get_retired_email_by_email(pending_email.new_email)
pending_email.save(update_fields=['new_email'])
Comment thread
ktyagiapphelix2u marked this conversation as resolved.
Outdated
PendingEmailChange.delete_by_user_value(user, field="user")
UserOrgTag.delete_by_user_value(user, field="user")
PendingSecondaryEmailChange.redact_pending_secondary_email(user.id)
AccountRecovery.retire_recovery_email(user.id)

# Retire any objects linked to the user via their original email
CourseEnrollmentAllowed.delete_by_user_value(original_email, field="email")
Expand All @@ -1171,6 +1180,7 @@ def post(self, request):
user.last_name = ""
user.is_active = False
user.username = retired_username
user.email = retired_email
user.save()
except UserRetirementStatus.DoesNotExist:
return Response(status=status.HTTP_404_NOT_FOUND)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
from django.db import transaction
from social_django.models import UserSocialAuth

from common.djangoapps.student.models import AccountRecovery, Registration, get_retired_email_by_email
from common.djangoapps.student.models import (
AccountRecovery,
PendingSecondaryEmailChange,
Registration,
get_retired_email_by_email,
)
from openedx.core.djangolib.oauth2_retirement_utils import retire_dot_oauth2_models

from ...models import BulkUserRetirementConfig, UserRetirementStatus
Expand Down Expand Up @@ -158,6 +163,7 @@ def handle(self, *args, **options):
# Delete OAuth tokens associated with the user.
retire_dot_oauth2_models(user)
AccountRecovery.retire_recovery_email(user.id)
PendingSecondaryEmailChange.redact_pending_secondary_email(user.id)
except KeyError:
error_message = f'Username not specified {user}'
logger.error(error_message)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user
from django.core.management import CommandError, call_command

from common.djangoapps.student.models import PendingSecondaryEmailChange
from common.djangoapps.student.tests.factories import UserFactory # lint-amnesty, pylint: disable=wrong-import-order
from openedx.core.djangoapps.user_api.accounts.tests.retirement_helpers import ( # lint-amnesty, pylint: disable=unused-import, wrong-import-order
setup_retirement_states, # noqa: F401
Expand Down Expand Up @@ -107,3 +108,18 @@ def test_retire_with_username_email_userfile(setup_retirement_states): # lint-a
with pytest.raises(CommandError, match=r'You cannot use userfile option with username and user_email'):
call_command('retire_user', user_file=user_file, username=username, user_email=user_email)
remove_user_file()


@skip_unless_lms
def test_retire_user_cleans_pending_secondary_email(setup_retirement_states): # lint-amnesty, pylint: disable=redefined-outer-name, unused-argument # noqa: F811
user = UserFactory.create(username='user-cleanup', email='user-cleanup@example.com')
PendingSecondaryEmailChange.objects.create(
user=user,
new_secondary_email='pending-secondary@example.com',
activation_key='c' * 32,
)
assert PendingSecondaryEmailChange.objects.filter(user=user).exists()

call_command('retire_user', username=user.username, user_email=user.email)

assert not PendingSecondaryEmailChange.objects.filter(user=user).exists()
Comment thread
ktyagiapphelix2u marked this conversation as resolved.
Outdated
Loading