Skip to content

Commit 80dbbfa

Browse files
fix: Redact SSO PII before deletion (#38425)
Implements PII redaction for UserSocialAuth records before deletion to prevent personally identifiable information from persisting if any downstream copy of the data uses soft-deletes.
1 parent 7c68f1d commit 80dbbfa

7 files changed

Lines changed: 330 additions & 11 deletions

File tree

openedx/core/djangoapps/user_api/accounts/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
from django.utils.text import format_lazy
77
from django.utils.translation import gettext_lazy as _
88

9+
# Import signals to ensure they are registered
10+
from . import signals # noqa: F401, pylint: disable=unused-import
11+
912
# The maximum length for the bio ("about me") account field
1013
BIO_MAX_LENGTH = 300
1114

openedx/core/djangoapps/user_api/accounts/signals.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,24 @@
22
Django Signal related functionality for user_api accounts
33
"""
44

5+
import logging
56

6-
from django.dispatch import Signal
7+
from django.db.models.signals import pre_delete
8+
from django.dispatch import Signal, receiver
9+
from social_django.models import UserSocialAuth
10+
11+
from .utils import REDACTED_SOCIAL_AUTH_UID_PREFIX, REDACTED_SOCIAL_AUTH_UID_SUFFIX, redact_and_delete_social_auth
12+
13+
logger = logging.getLogger(__name__)
14+
15+
16+
def get_redacted_social_auth_uid(pk):
17+
"""
18+
Return the redacted uid for a UserSocialAuth record.
19+
20+
This must match the format used in redact_and_delete_social_auth.
21+
"""
22+
return f'{REDACTED_SOCIAL_AUTH_UID_PREFIX}{pk}{REDACTED_SOCIAL_AUTH_UID_SUFFIX}'
723

824
# Signal to retire a user from LMS-initiated mailings (course mailings, etc)
925
# providing_args=["user"]
@@ -16,3 +32,24 @@
1632
# Signal to retire LMS misc information
1733
# providing_args=["user"]
1834
USER_RETIRE_LMS_MISC = Signal()
35+
36+
37+
@receiver(pre_delete, sender=UserSocialAuth)
38+
def redact_social_auth_pii_before_deletion(sender, instance, **kwargs): # pylint: disable=unused-argument
39+
"""
40+
Safety-net signal handler that redacts PII on any UserSocialAuth before deletion.
41+
42+
Records deleted via ``redact_and_delete_social_auth`` will already be redacted;
43+
this handler is a fallback for any missed deletion path.
44+
"""
45+
redacted_uid = get_redacted_social_auth_uid(instance.pk)
46+
47+
# Safety-net in case the record wasn't redacted before delete.
48+
if instance.extra_data or instance.uid != redacted_uid:
49+
logger.warning(
50+
'Social auth link for user_id=%s, provider=%s was deleted without first being redacted.'
51+
' Redacting in pre_delete.',
52+
instance.user_id,
53+
instance.provider,
54+
)
55+
redact_and_delete_social_auth(instance.user_id, skip_delete=True)
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
"""
2+
Tests for user_api accounts signals.
3+
"""
4+
5+
import logging
6+
from unittest.mock import patch
7+
8+
from django.test import TestCase
9+
from social_django.models import UserSocialAuth
10+
11+
from common.djangoapps.student.tests.factories import UserFactory
12+
from openedx.core.djangoapps.user_api.accounts.signals import get_redacted_social_auth_uid
13+
from openedx.core.djangolib.testing.utils import skip_unless_lms
14+
15+
16+
@skip_unless_lms
17+
class RedactSocialAuthPIIOnDeleteSignalTest(TestCase):
18+
"""
19+
Tests for the redact_social_auth_pii_before_deletion pre_delete signal handler.
20+
"""
21+
22+
def setUp(self):
23+
super().setUp()
24+
self.user = UserFactory.create(username='testuser', email='testuser@example.com')
25+
26+
def _create_social_auth(self, uid='user@example.com', extra_data=None):
27+
if extra_data is None:
28+
extra_data = {'email': 'user@example.com', 'name': 'Test User'}
29+
return UserSocialAuth.objects.create(
30+
user=self.user,
31+
provider='google-oauth2',
32+
uid=uid,
33+
extra_data=extra_data,
34+
)
35+
36+
def test_get_redacted_social_auth_uid_format(self):
37+
"""
38+
Test that get_redacted_social_auth_uid returns the expected string format.
39+
40+
This is the single source of truth for the redacted uid format.
41+
"""
42+
assert get_redacted_social_auth_uid(42) == 'redacted-before-delete-42@safe.com'
43+
assert get_redacted_social_auth_uid(1) == 'redacted-before-delete-1@safe.com'
44+
45+
@patch('openedx.core.djangoapps.user_api.accounts.signals.redact_and_delete_social_auth')
46+
def test_signal_warns_and_redacts_when_not_already_redacted(self, mock_redact):
47+
"""
48+
When a UserSocialAuth is deleted without prior redaction, the signal handler
49+
should log a warning and call redact_and_delete_social_auth with skip_delete=True.
50+
"""
51+
social_auth = self._create_social_auth()
52+
53+
with self.assertLogs(
54+
'openedx.core.djangoapps.user_api.accounts.signals', level=logging.WARNING
55+
) as log_ctx:
56+
social_auth.delete()
57+
58+
mock_redact.assert_called_once_with(self.user.id, skip_delete=True)
59+
assert any('was deleted without first being redacted' in msg for msg in log_ctx.output)
60+
61+
@patch('openedx.core.djangoapps.user_api.accounts.signals.redact_and_delete_social_auth')
62+
def test_signal_skips_warning_and_redaction_when_already_redacted(self, mock_redact):
63+
"""
64+
When a UserSocialAuth is already redacted before deletion, the signal handler
65+
should not log a warning and should not call redact_and_delete_social_auth.
66+
"""
67+
social_auth = self._create_social_auth()
68+
social_auth.uid = get_redacted_social_auth_uid(social_auth.pk)
69+
social_auth.extra_data = {}
70+
social_auth.save(update_fields=['uid', 'extra_data'])
71+
social_auth_id = social_auth.id
72+
73+
social_auth.delete()
74+
75+
mock_redact.assert_not_called()
76+
assert not UserSocialAuth.objects.filter(id=social_auth_id).exists()

openedx/core/djangoapps/user_api/accounts/tests/test_utils.py

Lines changed: 111 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,23 @@
11
""" Unit tests for custom UserProfile properties. """
22

3+
from contextlib import contextmanager
34

45
import ddt
56
from completion import models
67
from completion.test_utils import CompletionWaffleTestMixin
8+
from django.db import connection
9+
from django.db.models.signals import pre_delete
710
from django.test import TestCase
8-
from django.test.utils import override_settings
11+
from django.test.utils import CaptureQueriesContext, override_settings
12+
from social_django.models import UserSocialAuth
913

1014
from common.djangoapps.student.models import CourseEnrollment
1115
from common.djangoapps.student.tests.factories import UserFactory
12-
from openedx.core.djangoapps.user_api.accounts.utils import retrieve_last_sitewide_block_completed
16+
from openedx.core.djangoapps.user_api.accounts.signals import redact_social_auth_pii_before_deletion
17+
from openedx.core.djangoapps.user_api.accounts.utils import (
18+
redact_and_delete_social_auth,
19+
retrieve_last_sitewide_block_completed,
20+
)
1321
from openedx.core.djangolib.testing.utils import skip_unless_lms
1422
from xmodule.modulestore.tests.django_utils import (
1523
SharedModuleStoreTestCase, # pylint: disable=wrong-import-order
@@ -22,6 +30,39 @@
2230
from ..utils import format_social_link, validate_social_link
2331

2432

33+
def assert_update_before_delete(sql_list, num_redact_delete_pairs=1, table='social_auth_usersocialauth'):
34+
"""
35+
Assert that UPDATE and DELETE queries for ``table`` occur in consecutive pairs.
36+
"""
37+
table_key = table.upper()
38+
expected_sql_list = [
39+
sql for sql in sql_list
40+
if table_key in sql.upper() and ('UPDATE' in sql.upper() or 'DELETE' in sql.upper())
41+
]
42+
assert len(expected_sql_list) == num_redact_delete_pairs * 2, (
43+
f'Expected {num_redact_delete_pairs * 2} UPDATE/DELETE queries on {table}, '
44+
f'got {len(expected_sql_list)}'
45+
)
46+
47+
for index in range(0, len(expected_sql_list), 2):
48+
update_sql = expected_sql_list[index]
49+
delete_sql = expected_sql_list[index + 1]
50+
assert 'UPDATE' in update_sql.upper(), f'Expected UPDATE at position {index} for {table}'
51+
assert 'DELETE' in delete_sql.upper(), f'Expected DELETE at position {index + 1} for {table}'
52+
53+
# Use a context manager to guarantee signal reconnection between tests.
54+
@contextmanager
55+
def disconnected_social_auth_redaction_signal():
56+
"""
57+
Temporarily disconnect the fallback signal so tests exercise the helper path.
58+
"""
59+
pre_delete.disconnect(redact_social_auth_pii_before_deletion, sender=UserSocialAuth)
60+
try:
61+
yield
62+
finally:
63+
pre_delete.connect(redact_social_auth_pii_before_deletion, sender=UserSocialAuth)
64+
65+
2566
@ddt.ddt
2667
class UserAccountSettingsTest(TestCase):
2768
"""Unit tests for setting Social Media Links."""
@@ -133,3 +174,71 @@ def test_retrieve_last_sitewide_block_completed(self):
133174
)
134175

135176
assert empty_block_url is None
177+
178+
179+
@ddt.ddt
180+
@skip_unless_lms
181+
class RedactAndDeleteSocialAuthTest(TestCase):
182+
"""
183+
Tests for the redact_and_delete_social_auth utility function.
184+
"""
185+
186+
def setUp(self):
187+
super().setUp()
188+
self.user = UserFactory.create(username='testuser', email='testuser@example.com')
189+
190+
def create_social_auth(self, provider='google-oauth2', uid='user@example.com', extra_data=None):
191+
"""
192+
Helper method to create UserSocialAuth instances for testing.
193+
"""
194+
extra_data = extra_data or {
195+
'email': f'{provider}@example.com',
196+
'name': f'{provider.capitalize()} User',
197+
'id': '123456789',
198+
}
199+
return UserSocialAuth.objects.create(
200+
user=self.user,
201+
provider=provider,
202+
uid=uid,
203+
extra_data=extra_data,
204+
)
205+
206+
def test_redact_and_delete_redacts_single_sso_record(self):
207+
"""
208+
Test that redact_and_delete_social_auth redacts and deletes a single SSO record.
209+
"""
210+
social_auth = self.create_social_auth(
211+
provider='google-oauth2',
212+
uid='google@example.com',
213+
extra_data={'email': 'google@example.com', 'name': 'Google User'},
214+
)
215+
social_auth_id = social_auth.pk
216+
217+
with disconnected_social_auth_redaction_signal(), CaptureQueriesContext(connection) as ctx:
218+
redact_and_delete_social_auth(self.user.id)
219+
220+
assert_update_before_delete([query['sql'] for query in ctx])
221+
assert not UserSocialAuth.objects.filter(id=social_auth_id).exists()
222+
223+
def test_redact_and_delete_redacts_multiple_sso_records(self):
224+
"""
225+
Test that redact_and_delete_social_auth redacts and deletes all SSO records for a user.
226+
"""
227+
social_auth_ids = [
228+
self.create_social_auth(
229+
provider='google-oauth2',
230+
uid='google@example.com',
231+
extra_data={'email': 'google@example.com', 'name': 'Google User'},
232+
).pk,
233+
self.create_social_auth(
234+
provider='tpa-saml',
235+
uid='saml@example.com',
236+
extra_data={'email': 'saml@example.com', 'name': 'SAML User', 'uid': 'saml-uid'},
237+
).pk,
238+
]
239+
240+
with disconnected_social_auth_redaction_signal(), CaptureQueriesContext(connection) as ctx:
241+
redact_and_delete_social_auth(self.user.id)
242+
243+
assert_update_before_delete([query['sql'] for query in ctx])
244+
assert not UserSocialAuth.objects.filter(id__in=social_auth_ids).exists()

openedx/core/djangoapps/user_api/accounts/utils.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
from completion.models import BlockCompletion
1212
from completion.waffle import ENABLE_COMPLETION_TRACKING_SWITCH
1313
from django.conf import settings
14+
from django.db.models import CharField, Value
15+
from django.db.models.functions import Cast, Concat
1416
from django.utils.translation import gettext as _
1517
from edx_django_utils.user import generate_password
1618
from social_django.models import UserSocialAuth
@@ -23,6 +25,10 @@
2325

2426
from ..models import UserRetirementStatus
2527

28+
# Prefix and suffix used to build a per-record redacted uid for UserSocialAuth.
29+
REDACTED_SOCIAL_AUTH_UID_PREFIX = 'redacted-before-delete-'
30+
REDACTED_SOCIAL_AUTH_UID_SUFFIX = '@safe.com'
31+
2632
ENABLE_SECONDARY_EMAIL_FEATURE_SWITCH = 'enable_secondary_email_feature'
2733
LOGGER = logging.getLogger(__name__)
2834

@@ -196,6 +202,30 @@ def is_secondary_email_feature_enabled():
196202
return waffle.switch_is_active(ENABLE_SECONDARY_EMAIL_FEATURE_SWITCH)
197203

198204

205+
def redact_and_delete_social_auth(user_id, skip_delete=False):
206+
"""
207+
Redact PII from all UserSocialAuth records for the given user, then delete them.
208+
209+
Downstream copies of data may use soft-deletes, and redacting before deleting
210+
ensures PII for retired users (or future retirements) is not retained.
211+
212+
``skip_delete`` should only be set to True when called from the pre_delete signal
213+
handler, where deletion is already in progress.
214+
"""
215+
social_auth_queryset = UserSocialAuth.objects.filter(user_id=user_id)
216+
# Important: this redacted uid must match the format used by ``get_redacted_social_auth_uid()``.
217+
social_auth_queryset.update(
218+
uid=Concat(
219+
Value(REDACTED_SOCIAL_AUTH_UID_PREFIX),
220+
Cast('id', output_field=CharField()),
221+
Value(REDACTED_SOCIAL_AUTH_UID_SUFFIX),
222+
),
223+
extra_data={},
224+
)
225+
if not skip_delete:
226+
social_auth_queryset.delete()
227+
228+
199229
def create_retirement_request_and_deactivate_account(user):
200230
"""
201231
Adds user to retirement queue, unlinks social auth accounts, changes user passwords
@@ -204,8 +234,8 @@ def create_retirement_request_and_deactivate_account(user):
204234
# Add user to retirement queue.
205235
UserRetirementStatus.create_retirement(user)
206236

207-
# Unlink LMS social auth accounts
208-
UserSocialAuth.objects.filter(user_id=user.id).delete()
237+
# Redact and unlink LMS social auth accounts.
238+
redact_and_delete_social_auth(user.id)
209239

210240
# Change LMS password & email
211241
user.email = get_retired_email_by_email(user.email)

openedx/core/djangoapps/user_api/management/commands/retire_user.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
from django.contrib.auth.models import User # pylint: disable=imported-auth-user
66
from django.core.management.base import BaseCommand, CommandError
77
from django.db import transaction
8-
from social_django.models import UserSocialAuth
98

109
from common.djangoapps.student.models import AccountRecovery, Registration, get_retired_email_by_email
1110
from openedx.core.djangolib.oauth2_retirement_utils import retire_dot_oauth2_models
1211

12+
from ...accounts.utils import redact_and_delete_social_auth
1313
from ...models import BulkUserRetirementConfig, UserRetirementStatus
1414

1515
logger = logging.getLogger(__name__)
@@ -144,8 +144,8 @@ def handle(self, *args, **options):
144144
for user in users:
145145
# Add user to retirement queue.
146146
UserRetirementStatus.create_retirement(user)
147-
# Unlink LMS social auth accounts
148-
UserSocialAuth.objects.filter(user_id=user.id).delete()
147+
# Redact and unlink LMS social auth accounts.
148+
redact_and_delete_social_auth(user.id)
149149
# Change LMS password & email
150150
user.email = get_retired_email_by_email(user.email)
151151
user.set_unusable_password()

0 commit comments

Comments
 (0)