-
Notifications
You must be signed in to change notification settings - Fork 4.3k
fix: retirement PII leaks by redacting pending secondary email/name data #38427
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 2 commits
1e6614b
da2bbfd
4326d69
f9aa35a
d05cd5a
48e5e17
d929ff9
6320d0b
8358ca2
80e4ce9
082a9e8
8acd7b8
111c74a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -934,14 +934,31 @@ class PendingSecondaryEmailChange(DeletableByUserValue, models.Model): # noqa: | |||||||
| """ | ||||||||
| This model keeps track of pending requested changes to a user's secondary email address. | ||||||||
|
|
||||||||
| .. pii: Contains new_secondary_email, not currently retired | ||||||||
| .. pii: Contains new_secondary_email, redacted in `DeactivateLogoutView` | ||||||||
| .. pii_types: email_address | ||||||||
| .. pii_retirement: retained | ||||||||
| .. pii_retirement: local_api | ||||||||
| """ | ||||||||
| user = models.OneToOneField(User, unique=True, db_index=True, on_delete=models.CASCADE) | ||||||||
| new_secondary_email = models.CharField(blank=True, max_length=255, db_index=True) | ||||||||
| activation_key = models.CharField(('activation key'), max_length=32, unique=True, db_index=True) | ||||||||
|
|
||||||||
| @classmethod | ||||||||
| def redact_pending_secondary_email(cls, user_id): | ||||||||
| """ | ||||||||
| Redact a pending secondary email change row for a user. | ||||||||
|
|
||||||||
| Redacts the email before deletion so any downstream soft-delete mirror does | ||||||||
| not retain the original secondary email address in the final row image. | ||||||||
| """ | ||||||||
| try: | ||||||||
| pending_secondary_email = cls.objects.get(user_id=user_id) | ||||||||
| except cls.DoesNotExist: | ||||||||
| return True | ||||||||
| pending_secondary_email.new_secondary_email = f"redacted+{user_id}@redacted.com" | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But Produces
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||||
| pending_secondary_email.save(update_fields=['new_secondary_email']) | ||||||||
| pending_secondary_email.delete() | ||||||||
|
robrap marked this conversation as resolved.
|
||||||||
| return True | ||||||||
|
|
||||||||
|
|
||||||||
| class LoginFailures(models.Model): | ||||||||
| """ | ||||||||
|
|
@@ -1688,7 +1705,7 @@ class AccountRecovery(models.Model): # noqa: DJ008 | |||||||
| """ | ||||||||
| Model for storing information for user's account recovery in case of access loss. | ||||||||
|
|
||||||||
| .. pii: the field named secondary_email contains pii, retired in the `DeactivateLogoutView` | ||||||||
| .. pii: the field named secondary_email contains pii, redacted in the `DeactivateLogoutView` | ||||||||
| .. pii_types: email_address | ||||||||
| .. pii_retirement: local_api | ||||||||
| """ | ||||||||
|
|
@@ -1721,19 +1738,23 @@ def update_recovery_email(self, email): | |||||||
| @classmethod | ||||||||
| def retire_recovery_email(cls, user_id): | ||||||||
| """ | ||||||||
| Retire user's recovery/secondary email as part of GDPR Phase I. | ||||||||
| Redact user's recovery/secondary email as part of GDPR Phase I. | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should "Redact and delete" if we leave the delete.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. DONE IMPLEMENTED |
||||||||
| Returns 'True' | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No longer returns True in all cases. Fix.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. got it implemented |
||||||||
|
|
||||||||
| If an AccountRecovery record is found for this user it will be deleted, | ||||||||
| if it is not found it is assumed this table has no PII for the given user. | ||||||||
| If an AccountRecovery record is found for this user it will be redacted and | ||||||||
| deleted. If it is not found it is assumed this table has no PII for the given user. | ||||||||
|
robrap marked this conversation as resolved.
|
||||||||
|
|
||||||||
| :param user_id: int | ||||||||
| :return: bool | ||||||||
| """ | ||||||||
| try: | ||||||||
| cls.objects.get(user_id=user_id).delete() | ||||||||
| account_recovery = cls.objects.get(user_id=user_id) | ||||||||
| except cls.DoesNotExist: | ||||||||
| pass | ||||||||
| return True | ||||||||
| account_recovery.secondary_email = f"redacted+{user_id}@redacted.com" | ||||||||
| account_recovery.is_active = False | ||||||||
| account_recovery.save(update_fields=['secondary_email', 'is_active']) | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Messing with
Suggested change
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Implemented |
||||||||
| account_recovery.delete() | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ktyagiapphelix2u: Is it true that the recovery email itself is not yet redacted? I thought it was just pending secondary email updates. If so, I think secondary (recovery) email and pending secondary email should be separate PRs. Also, it look like you wish to delete where we weren't previously doing so. @bmedx: Any strong opinions either way? In general are we redacting and leaving things in place?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ktyagiapphelix2u: You did not respond on this comment in particular, but from elsewhere, I think you want this all to stay together, right? Also, I missed the earlier
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the follow-up. To address your points:
|
||||||||
|
|
||||||||
| return True | ||||||||
|
|
||||||||
|
|
||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -84,6 +84,7 @@ | |
| UserStanding, | ||
| create_comments_service_user, # noqa: F401 | ||
| email_exists_or_retired, # noqa: F401 | ||
| get_retired_email_by_email, | ||
| ) | ||
| from common.djangoapps.student.signals import REFUND_ORDER, USER_EMAIL_CHANGED | ||
| from common.djangoapps.student.toggles import should_redirect_to_courseware_after_enrollment | ||
|
|
@@ -890,6 +891,10 @@ def activate_secondary_email(request, key): | |
| 'secondary_email': pending_secondary_email_change.new_secondary_email | ||
| }) | ||
|
|
||
| pending_secondary_email_change.new_secondary_email = get_retired_email_by_email( | ||
| pending_secondary_email_change.new_secondary_email | ||
| ) | ||
| pending_secondary_email_change.save(update_fields=['new_secondary_email']) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This change does not belong in this view. The soft-delete protection goal is valid, but the redaction function must not be
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. right—activate_secondary_email is a normal user action, not a retirement flow. Using get_retired_email_by_email() here would incorrectly mark a regular user as “retired” in downstream systems, which could cause confusion or false positives. I’ve removed the redaction from this path, so now the pending secondary email is just deleted after activation, with no retirement-style hashing. Redaction only happens in actual retirement flows.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about something like using completed@delete-pending.com? If the user is later retired, this soft-delete would persist, and isn’t that the case we’re trying to fix?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ktyagiapphelix2u: Did you see my comment? Can you acknowledge and respond? Thank you.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've implemented your suggestion. The pending secondary email is now overwritten with "completed@delete-pending.com" before deletion. does not use get_retired_email_by_email(), so it won't be mistaken for a retirement hash or trigger false-positive retirement detection downstream |
||
| pending_secondary_email_change.delete() | ||
|
|
||
| return render_to_response("secondary_email_change_successful.html") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,6 +29,7 @@ | |
| ManualEnrollmentAudit, | ||
| PendingEmailChange, | ||
| PendingNameChange, | ||
| PendingSecondaryEmailChange, | ||
| Registration, | ||
| SocialLink, | ||
| UserProfile, | ||
|
|
@@ -233,6 +234,24 @@ def test_user_can_deactivate_secondary_email(self): | |
| # Assert that there is no longer a secondary/recovery email for test user | ||
| assert len(AccountRecovery.objects.filter(user_id=self.test_user.id)) == 0 | ||
|
|
||
| def test_user_can_deactivate_pending_secondary_email_change(self): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm holding off on reviewing tests. I'm getting confused between the pending and actual records, and this will be simpler when the PRs are split. Also, feel free to get an approval from the team first.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Helpful Info This PR fixes a PII leakage issue affecting two related models that store secondary email data at different lifecycle stages. The AccountRecovery model stores confirmed/active recovery emails, while PendingSecondaryEmailChange stores unconfirmed email change requests (before the user clicks the activation link). During user retirement, we must handle both models because a user can have: (1) only a confirmed email, (2) only a pending change request, (3) both (when changing their existing recovery email to a new one), or (4) neither. Both models require the same fix pattern: redact the email field to prevent PII from syncing to downstream systems save the redacted record, then delete it. The methods are idempotent, they return True if no record exists. For review clarity, I recommend examining the AccountRecovery changes first (confirmed emails), then PendingSecondaryEmailChange (pending emails), and finally the retirement flow that calls both methods. |
||
| """ | ||
| Verify that pending secondary email change records are removed when a user retires. | ||
| """ | ||
| PendingSecondaryEmailChange.objects.create( | ||
| user=self.test_user, | ||
| new_secondary_email='pending-secondary@example.com', | ||
| activation_key='b' * 32, | ||
| ) | ||
| assert len(PendingSecondaryEmailChange.objects.filter(user_id=self.test_user.id)) == 1 | ||
|
|
||
| self.client.login(username=self.test_user.username, password=self.test_password) | ||
| headers = build_jwt_headers(self.test_user) | ||
| response = self.client.post(self.url, self.build_post(self.test_password), **headers) | ||
| assert response.status_code == status.HTTP_204_NO_CONTENT | ||
|
|
||
| assert len(PendingSecondaryEmailChange.objects.filter(user_id=self.test_user.id)) == 0 | ||
|
|
||
| def test_password_mismatch(self): | ||
| """ | ||
| Verify that the user submitting a mismatched password results in | ||
|
|
@@ -1293,6 +1312,18 @@ def setUp(self): | |
| UserOrgTagFactory.create(user=self.test_user, key='foo', value='bar') | ||
| UserOrgTagFactory.create(user=self.test_user, key='cat', value='dog') | ||
|
|
||
| # Secondary email setup | ||
| PendingSecondaryEmailChange.objects.create( | ||
| user=self.test_user, | ||
| new_secondary_email='pending_secondary@example.com', | ||
| activation_key='test_activation_key_123' | ||
| ) | ||
| AccountRecovery.objects.create( | ||
| user=self.test_user, | ||
| secondary_email='confirmed_secondary@example.com', | ||
| is_active=True | ||
| ) | ||
|
|
||
| CourseEnrollmentAllowedFactory.create(email=self.original_email) | ||
|
|
||
| self.course_key = CourseKey.from_string('course-v1:edX+DemoX+Demo_Course') | ||
|
|
@@ -1399,6 +1430,10 @@ def test_retire_user(self, mock_remove_profile_images, mock_get_profile_image_na | |
| assert not PendingEmailChange.objects.filter(user=self.test_user).exists() | ||
| assert not UserOrgTag.objects.filter(user=self.test_user).exists() | ||
|
|
||
| # Verify secondary email models were cleaned | ||
| assert not PendingSecondaryEmailChange.objects.filter(user=self.test_user).exists() | ||
| assert not AccountRecovery.objects.filter(user=self.test_user).exists() | ||
|
|
||
| assert not CourseEnrollmentAllowed.objects.filter(email=self.original_email).exists() | ||
| assert not UnregisteredLearnerCohortAssignments.objects.filter(email=self.original_email).exists() | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@bmedx: [inform] This doesn't seem like it would have been intentionally retained, so I'm fine with calling this a bug and just fixing. Any objections?