Skip to content

Commit 2a5bbd8

Browse files
authored
migration: backfill apitoken hashed values (#71728)
Supports getsentry/rfcs#32 We've been hashing tokens as they are used to authenticate (#65941), but it's started to level out. This is a backfill migration to fill in all of the hashed values for the remaining tokens. Huge thank you to @markstory @wedamija and @GabeVillalobos for helping with the migration test! 🙏
1 parent 28fe514 commit 2a5bbd8

3 files changed

Lines changed: 208 additions & 1 deletion

File tree

migrations_lockfile.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,5 @@ feedback: 0004_index_together
99
hybridcloud: 0016_add_control_cacheversion
1010
nodestore: 0002_nodestore_no_dictfield
1111
replays: 0004_index_together
12-
sentry: 0725_create_sentry_groupsearchview_table
12+
sentry: 0726_apitoken_backfill_hashes
1313
social_auth: 0002_default_auto_field
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
# Generated by Django 5.0.6 on 2024-05-29 21:28
2+
3+
import hashlib
4+
import logging
5+
from enum import IntEnum
6+
7+
from django.db import migrations, router
8+
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
9+
from django.db.migrations.state import StateApps
10+
11+
from sentry.new_migrations.migrations import CheckedMigration
12+
from sentry.utils.query import RangeQuerySetWrapperWithProgressBar
13+
14+
logger = logging.getLogger(__name__)
15+
16+
17+
def backfill_hash_values(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
18+
ApiToken = apps.get_model("sentry", "ApiToken")
19+
ControlOutbox = apps.get_model("sentry", "ControlOutbox")
20+
OrganizationMemberMapping = apps.get_model("sentry", "OrganizationMemberMapping")
21+
OrganizationMapping = apps.get_model("sentry", "OrganizationMapping")
22+
23+
try:
24+
from collections.abc import Container
25+
26+
from django.conf import settings
27+
28+
from sentry.services.hybrid_cloud.util import control_silo_function
29+
from sentry.silo.base import SiloMode
30+
from sentry.silo.safety import unguarded_write
31+
except ImportError:
32+
logger.exception("Cannot execute migration. Required symbols could not be imported")
33+
return
34+
35+
# copied from src/sentry/models/outbox.py
36+
class OutboxCategory(IntEnum):
37+
USER_UPDATE = 0
38+
UNUSED_TWO = 4
39+
UNUSUED_THREE = 13
40+
UNUSED_ONE = 19
41+
AUTH_IDENTITY_UPDATE = 25
42+
API_TOKEN_UPDATE = 32
43+
44+
# copied from src/sentry/models/outbox.py
45+
_outbox_categories_for_scope: dict[int, set[OutboxCategory]] = {}
46+
_used_categories: set[OutboxCategory] = set()
47+
48+
# copied from src/sentry/models/outbox.py
49+
def scope_categories(enum_value: int, categories: set[OutboxCategory]) -> int:
50+
_outbox_categories_for_scope[enum_value] = categories
51+
inter = _used_categories.intersection(categories)
52+
assert not inter, f"OutboxCategories {inter} were already registered to a different scope"
53+
_used_categories.update(categories)
54+
return enum_value
55+
56+
# copied from src/sentry/models/outbox.py
57+
class OutboxScope(IntEnum):
58+
USER_SCOPE = scope_categories(
59+
1,
60+
{
61+
OutboxCategory.USER_UPDATE,
62+
OutboxCategory.API_TOKEN_UPDATE,
63+
OutboxCategory.UNUSED_ONE,
64+
OutboxCategory.UNUSED_TWO,
65+
OutboxCategory.UNUSUED_THREE,
66+
OutboxCategory.AUTH_IDENTITY_UPDATE,
67+
},
68+
)
69+
70+
@control_silo_function
71+
def _find_orgs_for_user(user_id: int) -> set[int]:
72+
return {
73+
m["organization_id"]
74+
for m in OrganizationMemberMapping.objects.filter(user_id=user_id).values(
75+
"organization_id"
76+
)
77+
}
78+
79+
@control_silo_function
80+
def find_regions_for_orgs(org_ids: Container[int]) -> set[str]:
81+
if SiloMode.get_current_mode() == SiloMode.MONOLITH:
82+
return {settings.SENTRY_MONOLITH_REGION}
83+
else:
84+
return set(
85+
OrganizationMapping.objects.filter(organization_id__in=org_ids).values_list(
86+
"region_name", flat=True
87+
)
88+
)
89+
90+
@control_silo_function
91+
def find_regions_for_user(user_id: int) -> set[str]:
92+
if SiloMode.get_current_mode() == SiloMode.MONOLITH:
93+
return {settings.SENTRY_MONOLITH_REGION}
94+
95+
org_ids = _find_orgs_for_user(user_id)
96+
return find_regions_for_orgs(org_ids)
97+
98+
for api_token in RangeQuerySetWrapperWithProgressBar(ApiToken.objects.all()):
99+
hashed_token = None
100+
if api_token.hashed_token is None:
101+
hashed_token = hashlib.sha256(api_token.token.encode()).hexdigest()
102+
api_token.hashed_token = hashed_token
103+
104+
# if there's a refresh token make sure it is hashed as well
105+
hashed_refresh_token = None
106+
if api_token.refresh_token:
107+
hashed_refresh_token = hashlib.sha256(api_token.refresh_token.encode()).hexdigest()
108+
api_token.hashed_refresh_token = hashed_refresh_token
109+
110+
# only save if we've actually had to hash values
111+
if hashed_token or hashed_refresh_token:
112+
with unguarded_write(using=router.db_for_write(ApiToken)):
113+
api_token.save(update_fields=["hashed_token", "hashed_refresh_token"])
114+
user_regions = find_regions_for_user(api_token.user_id)
115+
for region in user_regions:
116+
ControlOutbox.objects.create(
117+
shard_scope=OutboxScope.USER_SCOPE,
118+
shard_identifier=api_token.user_id,
119+
category=OutboxCategory.API_TOKEN_UPDATE,
120+
region_name=region,
121+
object_identifier=api_token.id,
122+
)
123+
124+
125+
class Migration(CheckedMigration):
126+
# This flag is used to mark that a migration shouldn't be automatically run in production.
127+
# This should only be used for operations where it's safe to run the migration after your
128+
# code has deployed. So this should not be used for most operations that alter the schema
129+
# of a table.
130+
# Here are some things that make sense to mark as post deployment:
131+
# - Large data migrations. Typically we want these to be run manually so that they can be
132+
# monitored and not block the deploy for a long period of time while they run.
133+
# - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
134+
# run this outside deployments so that we don't block them. Note that while adding an index
135+
# is a schema change, it's completely safe to run the operation after the code has deployed.
136+
# Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment
137+
138+
is_post_deployment = True
139+
140+
dependencies = [
141+
("sentry", "0725_create_sentry_groupsearchview_table"),
142+
]
143+
144+
operations = [
145+
migrations.RunPython(
146+
backfill_hash_values,
147+
migrations.RunPython.noop,
148+
hints={
149+
"tables": [
150+
"sentry_apitoken",
151+
]
152+
},
153+
)
154+
]
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
from sentry.models.outbox import ControlOutbox, OutboxCategory, OutboxScope
2+
from sentry.testutils.cases import TestMigrations
3+
from sentry.testutils.helpers import override_options
4+
from sentry.testutils.silo import control_silo_test
5+
6+
7+
@control_silo_test
8+
class TestBackfillApiTokenHashesMigration(TestMigrations):
9+
migrate_from = "0725_create_sentry_groupsearchview_table"
10+
migrate_to = "0726_apitoken_backfill_hashes"
11+
connection = "control"
12+
13+
@override_options({"apitoken.save-hash-on-create": False})
14+
def setup_initial_state(self):
15+
user = self.create_user()
16+
self.user_auth_token = self.create_user_auth_token(user=user)
17+
18+
# Put the user in an org so we have membership
19+
organization = self.create_organization(owner=user)
20+
21+
app = self.create_sentry_app(user=user, organization_id=organization.id)
22+
self.app_install = self.create_sentry_app_installation(
23+
organization=organization, user=user, slug=app.slug
24+
)
25+
26+
assert self.user_auth_token.hashed_token is None
27+
# user auth tokens do not have refresh tokens
28+
assert self.user_auth_token.refresh_token is None
29+
30+
assert self.app_install.api_token.hashed_token is None
31+
assert self.app_install.api_token.hashed_refresh_token is None
32+
# tokens related to sentry apps do have refresh tokens
33+
assert self.app_install.api_token.refresh_token is not None
34+
35+
def test_for_hashed_value(self):
36+
self.user_auth_token.refresh_from_db()
37+
assert self.user_auth_token.hashed_token is not None
38+
assert ControlOutbox.objects.get(
39+
shard_scope=OutboxScope.USER_SCOPE,
40+
category=OutboxCategory.API_TOKEN_UPDATE,
41+
object_identifier=self.user_auth_token.id,
42+
shard_identifier=self.user_auth_token.user_id,
43+
)
44+
45+
self.app_install.refresh_from_db()
46+
assert self.app_install.api_token.hashed_token is not None
47+
assert self.app_install.api_token.hashed_refresh_token is not None
48+
assert ControlOutbox.objects.get(
49+
shard_scope=OutboxScope.USER_SCOPE,
50+
category=OutboxCategory.API_TOKEN_UPDATE,
51+
object_identifier=self.app_install.api_token.id,
52+
shard_identifier=self.app_install.api_token.user_id,
53+
)

0 commit comments

Comments
 (0)