Skip to content

Commit 82b77ae

Browse files
feat: redact PII fields before delete in user retirement flows (#38426)
Adds support for redacting sensitive PII fields before deletion in DeletableByUserValue models. This is useful when there is a downstream sync of data that retains soft-deleted records, so the data will be redacted for user retirements. Changes include: - Add redact_before_delete_fields hook to model mixin - Redact emails before delete for: - CourseEnrollmentAllowed - PendingEmailChange - UnregisteredLearnerCohortAssignments - Adds SQL assertion helpers to verify UPDATE occurs before DELETE - Adds regression/unit tests for safe ID-filtered redaction and deletion flows - Updates email change flow to use delete_by_user_value, to pick up the redact functionality.
1 parent 59bb6d6 commit 82b77ae

10 files changed

Lines changed: 233 additions & 21 deletions

File tree

common/djangoapps/student/models/course_enrollment.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1597,7 +1597,9 @@ class CourseEnrollmentAllowed(DeletableByUserValue, models.Model):
15971597
the object is marked with the student who enrolled, to prevent students from changing e-mails and
15981598
enrolling many accounts through the same e-mail.
15991599
1600-
.. no_pii:
1600+
.. pii: Contains email, redacted then deleted in AccountRetirementView
1601+
.. pii_types: email_address
1602+
.. pii_retirement: local_api
16011603
"""
16021604
email = models.CharField(max_length=255, db_index=True)
16031605
course_id = CourseKeyField(max_length=255, db_index=True)
@@ -1646,6 +1648,13 @@ def may_enroll_and_unenrolled(cls, course_id):
16461648
"""
16471649
return CourseEnrollmentAllowed.objects.filter(course_id=course_id, user__isnull=True)
16481650

1651+
@classmethod
1652+
def redact_before_delete_fields(cls):
1653+
"""
1654+
Redact email before deleting records for downstream soft-delete systems.
1655+
"""
1656+
return {'email': 'redacted-before-delete@safe.com'}
1657+
16491658

16501659
class CourseEnrollmentAttribute(models.Model):
16511660
"""

common/djangoapps/student/models/user.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -904,14 +904,21 @@ class PendingEmailChange(DeletableByUserValue, models.Model): # noqa: DJ008
904904
"""
905905
This model keeps track of pending requested changes to a user's email address.
906906
907-
.. pii: Contains new_email, retired in AccountRetirementView
907+
.. pii: Contains new_email, redacted then deleted in AccountRetirementView
908908
.. pii_types: email_address
909909
.. pii_retirement: local_api
910910
"""
911911
user = models.OneToOneField(User, unique=True, db_index=True, on_delete=models.CASCADE)
912912
new_email = models.CharField(blank=True, max_length=255, db_index=True)
913913
activation_key = models.CharField(('activation key'), max_length=32, unique=True, db_index=True)
914914

915+
@classmethod
916+
def redact_before_delete_fields(cls):
917+
"""
918+
Redact PII fields before delete in downstream soft-delete systems.
919+
"""
920+
return {'new_email': 'redacted-before-delete@safe.com'}
921+
915922
def request_change(self, email):
916923
"""Request a change to a user's email.
917924

common/djangoapps/student/tests/test_models.py

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@
1010
from django.conf import settings
1111
from django.contrib.auth.models import AnonymousUser, User # pylint: disable=imported-auth-user
1212
from django.core.cache import cache
13+
from django.db import connection
1314
from django.db.models.functions import Lower
1415
from django.test import TestCase, override_settings
16+
from django.test.utils import CaptureQueriesContext
1517
from edx_toggles.toggles.testutils import override_waffle_flag
1618
from freezegun import freeze_time
1719
from opaque_keys.edx.keys import CourseKey
@@ -42,7 +44,7 @@
4244
from openedx.core.djangoapps.content.course_overviews.tests.factories import CourseOverviewFactory
4345
from openedx.core.djangoapps.schedules.models import Schedule
4446
from openedx.core.djangoapps.user_api.preferences.api import set_user_preference
45-
from openedx.core.djangolib.testing.utils import skip_unless_lms
47+
from openedx.core.djangolib.testing.utils import assert_redact_before_delete, skip_unless_lms
4648
from xmodule.modulestore import ModuleStoreEnum # pylint: disable=wrong-import-order
4749
from xmodule.modulestore.tests.django_utils import ( # pylint: disable=wrong-import-order
4850
ModuleStoreTestCase,
@@ -600,6 +602,22 @@ def test_delete_by_user_no_effect_for_user_with_no_email_change(self):
600602
assert not record_was_deleted
601603
assert 1 == len(PendingEmailChange.objects.all())
602604

605+
def test_delete_by_user_value_redacts_pending_email_before_deletion(self):
606+
"""
607+
Verify that delete_by_user_value redacts new_email before deletion.
608+
"""
609+
table = PendingEmailChange._meta.db_table
610+
with CaptureQueriesContext(connection) as ctx:
611+
record_was_deleted = PendingEmailChange.delete_by_user_value(self.user, field='user')
612+
613+
assert_redact_before_delete(
614+
[q['sql'] for q in ctx],
615+
table=table,
616+
expected_redacted_value_list=['redacted-before-delete@safe.com'],
617+
)
618+
assert record_was_deleted
619+
assert not PendingEmailChange.objects.filter(user=self.user).exists()
620+
603621

604622
class TestCourseEnrollmentAllowed(ModuleStoreTestCase): # pylint: disable=missing-class-docstring
605623

@@ -615,11 +633,18 @@ def setUp(self):
615633
)
616634

617635
def test_retiring_user_deletes_record(self):
618-
is_successful = CourseEnrollmentAllowed.delete_by_user_value(
619-
value=self.email,
620-
field='email'
621-
)
636+
with CaptureQueriesContext(connection) as ctx:
637+
is_successful = CourseEnrollmentAllowed.delete_by_user_value(
638+
value=self.email,
639+
field='email'
640+
)
641+
622642
assert is_successful
643+
assert_redact_before_delete(
644+
[q['sql'] for q in ctx],
645+
table=CourseEnrollmentAllowed._meta.db_table,
646+
expected_redacted_value_list=['redacted-before-delete@safe.com'],
647+
)
623648
user_search_results = CourseEnrollmentAllowed.objects.filter(
624649
email=self.email
625650
)

common/djangoapps/student/views/management.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -963,14 +963,17 @@ def confirm_email_change(request, key):
963963

964964
user.email = pec.new_email
965965
user.save()
966-
pec.delete()
966+
967+
# Redact pending email before deletion.
968+
PendingEmailChange.delete_by_user_value(user, field="user")
969+
967970
# And send it to the new email...
968-
msg.recipient = Recipient(user.id, pec.new_email)
971+
msg.recipient = Recipient(user.id, user.email)
969972
try:
970973
ace.send(msg)
971974
except Exception: # pylint: disable=broad-except
972975
log.warning('Unable to send confirmation email to new address', exc_info=True)
973-
response = render_to_response("email_change_failed.html", {'email': pec.new_email})
976+
response = render_to_response("email_change_failed.html", {'email': user.email})
974977
transaction.set_rollback(True)
975978
return response
976979

openedx/core/djangoapps/course_groups/models.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,3 +311,10 @@ class Meta:
311311
course_user_group = models.ForeignKey(CourseUserGroup, on_delete=models.CASCADE) # noqa: DJ012
312312
email = models.CharField(blank=True, max_length=255, db_index=True)
313313
course_id = CourseKeyField(max_length=255)
314+
315+
@classmethod
316+
def redact_before_delete_fields(cls):
317+
"""
318+
Redact email before deleting records for downstream soft-delete systems.
319+
"""
320+
return {'email': 'redacted-before-delete@safe.com'}

openedx/core/djangoapps/course_groups/tests/test_cohorts.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,17 @@
88
import ddt
99
import pytest
1010
from django.contrib.auth.models import AnonymousUser, User # pylint: disable=imported-auth-user
11-
from django.db import IntegrityError
11+
from django.db import IntegrityError, connection
1212
from django.http import Http404
1313
from django.test import TestCase
14+
from django.test.utils import CaptureQueriesContext
1415
from opaque_keys.edx.keys import CourseKey
1516
from opaque_keys.edx.locator import CourseLocator
1617
from openedx_events.testing import OpenEdxEventsTestMixin
1718

1819
from common.djangoapps.student.models import CourseEnrollment
1920
from common.djangoapps.student.tests.factories import UserFactory
21+
from openedx.core.djangolib.testing.utils import assert_redact_before_delete
2022
from xmodule.modulestore.django import modulestore # pylint: disable=wrong-import-order
2123
from xmodule.modulestore.tests.django_utils import ( # pylint: disable=wrong-import-order
2224
TEST_DATA_SPLIT_MODULESTORE,
@@ -785,12 +787,18 @@ def setUp(self):
785787
)
786788

787789
def test_retired_user_has_deleted_record(self):
788-
was_retired = UnregisteredLearnerCohortAssignments.delete_by_user_value(
789-
value='learner@example.com',
790-
field='email'
791-
)
790+
with CaptureQueriesContext(connection) as ctx:
791+
was_retired = UnregisteredLearnerCohortAssignments.delete_by_user_value(
792+
value='learner@example.com',
793+
field='email'
794+
)
792795

793796
assert was_retired
797+
assert_redact_before_delete(
798+
[q['sql'] for q in ctx],
799+
table=UnregisteredLearnerCohortAssignments._meta.db_table,
800+
expected_redacted_value_list=['redacted-before-delete@safe.com'],
801+
)
794802

795803
search_retired_user_results = \
796804
UnregisteredLearnerCohortAssignments.objects.filter(

openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@
6767
UserRetirementPartnerReportingStatus,
6868
UserRetirementStatus,
6969
)
70-
from openedx.core.djangolib.testing.utils import skip_unless_lms
70+
from openedx.core.djangolib.testing.utils import assert_redact_before_delete, skip_unless_lms
7171
from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
7272
from xmodule.modulestore.tests.factories import CourseFactory
7373

@@ -1469,7 +1469,14 @@ def test_retire_user_where_username_not_provided(self):
14691469
@mock.patch('openedx.core.djangoapps.user_api.accounts.views.remove_profile_images')
14701470
def test_retire_user(self, mock_remove_profile_images, mock_get_profile_image_names):
14711471
data = {'username': self.original_username}
1472-
self.post_and_assert_status(data)
1472+
with CaptureQueriesContext(connection) as ctx:
1473+
self.post_and_assert_status(data)
1474+
1475+
assert_redact_before_delete(
1476+
[q['sql'] for q in ctx],
1477+
table=PendingEmailChange._meta.db_table,
1478+
expected_redacted_value_list=['redacted-before-delete@safe.com'],
1479+
)
14731480

14741481
self.test_user.refresh_from_db()
14751482
self.test_user.profile.refresh_from_db() # pylint: disable=no-member

openedx/core/djangolib/model_mixins.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,22 +20,40 @@ class DeletableByUserValue:
2020
associated with some specified user.
2121
"""
2222

23+
@classmethod
24+
def redact_before_delete_fields(cls):
25+
"""
26+
Returns dict of PII fields and their redacted values.
27+
28+
Always returns an empty dict unless overridden by the inheriting model.
29+
Results are used by ``delete_by_user_value`` to redact PII before delete.
30+
"""
31+
return {}
32+
2333
@classmethod
2434
def delete_by_user_value(cls, value, field):
2535
"""
26-
Deletes instances of this model where ``field`` equals ``value``.
36+
Redacts as-needed and always deletes instances of this model where ``field`` equals ``value``.
2737
2838
e.g.
2939
``delete_by_user_value(value='learner@example.com', field='email')``
3040
41+
If ``redact_before_delete_fields()`` returns a non-empty dict, the
42+
returned PII fields are redacted before any records are deleted.
43+
3144
Returns True if any instances were deleted.
3245
Returns False otherwise.
3346
"""
3447
filter_kwargs = {field: value}
3548
records_matching_user_value = cls.objects.filter(**filter_kwargs)
36-
37-
if not records_matching_user_value.exists():
49+
record_ids_matching_user_value = list(records_matching_user_value.values_list('id', flat=True))
50+
if not record_ids_matching_user_value:
3851
return False
3952

40-
records_matching_user_value.delete()
53+
# Converting to query set by id ensures we redact and delete the appropriate records
54+
user_value_records_by_id = cls.objects.filter(id__in=record_ids_matching_user_value)
55+
redact_fields = cls.redact_before_delete_fields()
56+
if redact_fields:
57+
user_value_records_by_id.update(**redact_fields)
58+
user_value_records_by_id.delete()
4159
return True

openedx/core/djangolib/testing/utils.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,44 @@
3333
]
3434

3535

36+
def assert_redact_before_delete(
37+
sql_list,
38+
table,
39+
expected_redacted_value_list,
40+
num_redact_delete_pairs=1,
41+
):
42+
"""
43+
Assert that PII is redacted before deleted.
44+
45+
Redacting before deleting protects downstream sync of data from holding PII
46+
in soft-deleted records. This helper ensures UPDATE and DELETE queries for
47+
``table`` occur in consecutive pairs, and that each UPDATE contains the
48+
expected redacted values.
49+
"""
50+
assert expected_redacted_value_list
51+
52+
table_key = table.upper()
53+
expected_sql_list = [
54+
sql for sql in sql_list
55+
if table_key in sql.upper() and ('UPDATE' in sql.upper() or 'DELETE' in sql.upper())
56+
]
57+
58+
assert len(expected_sql_list) == num_redact_delete_pairs * 2, (
59+
f'Expected {num_redact_delete_pairs * 2} UPDATE/DELETE queries on {table}, '
60+
f'got {len(expected_sql_list)}'
61+
)
62+
63+
for index in range(0, len(expected_sql_list), 2):
64+
update_sql = expected_sql_list[index]
65+
delete_sql = expected_sql_list[index + 1]
66+
assert 'UPDATE' in update_sql.upper(), f'Expected UPDATE at position {index} for {table}'
67+
assert 'DELETE' in delete_sql.upper(), f'Expected DELETE at position {index + 1} for {table}'
68+
for expected_redacted_value in expected_redacted_value_list:
69+
assert expected_redacted_value.upper() in update_sql.upper(), (
70+
f'Expected UPDATE to set redacted value {expected_redacted_value} for {table}'
71+
)
72+
73+
3674
class CacheIsolationMixin:
3775
"""
3876
This class can be used to enable specific django caches for
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
"""
2+
Tests for model_mixins.py.
3+
"""
4+
5+
from unittest import TestCase, mock
6+
7+
import ddt
8+
9+
from openedx.core.djangolib.model_mixins import DeletableByUserValue
10+
11+
12+
@ddt.ddt
13+
class TestDeletableByUserValue(TestCase):
14+
"""
15+
Unit tests for DeletableByUserValue.
16+
"""
17+
18+
class NonRedactingModel(DeletableByUserValue):
19+
"""
20+
Dummy model that uses default redaction behavior.
21+
"""
22+
23+
class RedactingModel(DeletableByUserValue):
24+
"""
25+
Dummy model that overrides redaction fields.
26+
"""
27+
28+
@classmethod
29+
def redact_before_delete_fields(cls):
30+
return {'email': 'redacted@retired.invalid'}
31+
32+
def _make_queryset(self, exists):
33+
"""
34+
Return a mock queryset with ``exists`` and ``values_list`` pre-configured.
35+
"""
36+
queryset = mock.Mock()
37+
queryset.exists.return_value = exists
38+
queryset.values_list.return_value = [11, 12] if exists else []
39+
return queryset
40+
41+
def test_redact_before_delete_fields_defaults_to_empty_dict(self):
42+
"""
43+
Verify the default redaction hook does not request any field updates.
44+
"""
45+
assert not self.NonRedactingModel.redact_before_delete_fields()
46+
47+
def test_delete_by_user_value_returns_false_when_no_matches(self):
48+
"""
49+
Verify no updates or deletes occur when no rows match the filter.
50+
"""
51+
queryset = self._make_queryset(exists=False)
52+
with mock.patch.object(self.NonRedactingModel, 'objects', create=True) as mock_objects:
53+
mock_objects.filter.return_value = queryset
54+
55+
was_deleted = self.NonRedactingModel.delete_by_user_value(value='missing@example.com', field='email')
56+
57+
assert not was_deleted
58+
mock_objects.filter.assert_called_once_with(email='missing@example.com')
59+
queryset.update.assert_not_called()
60+
queryset.delete.assert_not_called()
61+
62+
@ddt.data(
63+
('NonRedactingModel', None),
64+
('RedactingModel', {'email': 'redacted@retired.invalid'}),
65+
)
66+
@ddt.unpack
67+
def test_delete_by_user_value(self, model_name, expected_redact_fields):
68+
"""
69+
Verify delete behavior with and without redaction configured.
70+
71+
When no redaction hook is set, rows are deleted directly.
72+
When a redaction hook is set, fields are updated before deletion.
73+
"""
74+
model_cls = getattr(self, model_name)
75+
queryset = self._make_queryset(exists=True)
76+
with mock.patch.object(model_cls, 'objects', create=True) as mock_objects:
77+
mock_objects.filter.return_value = queryset
78+
79+
was_deleted = model_cls.delete_by_user_value(value='learner@example.com', field='email')
80+
81+
assert was_deleted
82+
assert mock_objects.filter.call_args_list == [
83+
mock.call(email='learner@example.com'),
84+
mock.call(id__in=[11, 12]),
85+
]
86+
if expected_redact_fields:
87+
queryset.update.assert_called_once_with(**expected_redact_fields)
88+
else:
89+
queryset.update.assert_not_called()
90+
queryset.delete.assert_called_once_with()

0 commit comments

Comments
 (0)