Skip to content

Commit dfb41c6

Browse files
authored
fix(Identities): Sanitize identifiers (#6024)
1 parent d519430 commit dfb41c6

11 files changed

Lines changed: 369 additions & 14 deletions

File tree

api/edge_api/identities/serializers.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from rest_framework.exceptions import ValidationError
1919

2020
from environments.dynamodb.types import IdentityOverrideV2
21+
from environments.identities.constants import identifier_regex_validator
2122
from environments.models import Environment
2223
from features.models import Feature, FeatureState, FeatureStateValue
2324
from features.multivariate.models import MultivariateFeatureOption
@@ -52,7 +53,11 @@ def to_internal_value(self, data: typing.Any) -> str:
5253

5354
class EdgeIdentitySerializer(serializers.Serializer): # type: ignore[type-arg]
5455
identity_uuid = serializers.CharField(read_only=True)
55-
identifier = serializers.CharField(required=True, max_length=2000)
56+
identifier = serializers.CharField(
57+
required=True,
58+
max_length=2000,
59+
validators=[identifier_regex_validator],
60+
)
5661
dashboard_alias = LowerCaseCharField(
5762
required=False,
5863
max_length=100,
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import re
2+
3+
from django.core.validators import RegexValidator
4+
5+
RE_VALID_IDENTIFIER = re.compile(r"^[\w!#$%&*+/=?^_`{}|~@.\-]+$")
6+
7+
identifier_regex_validator = RegexValidator(
8+
regex=RE_VALID_IDENTIFIER,
9+
message="Identifier can only contain unicode letters, numbers, and the symbols: ! # $ %% & * + / = ? ^ _ ` { } | ~ @ . -",
10+
)
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Generated by Django 4.2.22 on 2025-09-08 23:30
2+
3+
import re
4+
5+
import django.core.validators
6+
from django.db import migrations, models
7+
8+
9+
class Migration(migrations.Migration):
10+
11+
dependencies = [
12+
("identities", "0002_alter_identity_index_together"),
13+
]
14+
15+
operations = [
16+
migrations.AlterField(
17+
model_name="identity",
18+
name="identifier",
19+
field=models.CharField(
20+
max_length=2000,
21+
validators=[
22+
django.core.validators.RegexValidator(
23+
message="Identifier can only contain unicode letters, numbers, and the symbols: ! # $ %% & * + / = ? ^ _ ` { } | ~ @ . -",
24+
regex=re.compile("^[\\w!#$%&*+/=?^_`{}|~@.\\-]+$"),
25+
)
26+
],
27+
),
28+
),
29+
]

api/environments/identities/models.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from flag_engine.context.mappers import map_environment_identity_to_context
66
from flag_engine.segments.evaluator import is_context_in_segment
77

8+
from environments.identities.constants import identifier_regex_validator
89
from environments.identities.managers import IdentityManager
910
from environments.identities.traits.models import Trait
1011
from environments.models import Environment
@@ -21,7 +22,10 @@
2122

2223

2324
class Identity(models.Model):
24-
identifier = models.CharField(max_length=2000)
25+
identifier = models.CharField(
26+
max_length=2000,
27+
validators=[identifier_regex_validator],
28+
)
2529
created_date = models.DateTimeField("DateCreated", auto_now_add=True)
2630
environment = models.ForeignKey(
2731
Environment, related_name="identities", on_delete=models.CASCADE

api/environments/identities/serializers.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,13 @@ class _TraitSerializer(serializers.Serializer): # type: ignore[type-arg]
6464
traits = serializers.ListSerializer(child=_TraitSerializer()) # type: ignore[var-annotated]
6565

6666

67-
class SDKIdentitiesQuerySerializer(serializers.Serializer): # type: ignore[type-arg]
68-
identifier = serializers.CharField(required=True)
67+
class SDKIdentitiesQuerySerializer(serializers.ModelSerializer[Identity]):
6968
transient = serializers.BooleanField(default=False)
7069

70+
class Meta:
71+
model = Identity
72+
fields = ("identifier", "transient")
73+
7174

7275
class IdentityAllFeatureStatesFeatureSerializer(serializers.Serializer): # type: ignore[type-arg]
7376
id = serializers.IntegerField()

api/environments/identities/views.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from drf_yasg.utils import swagger_auto_schema # type: ignore[import-untyped]
1616
from rest_framework import status, viewsets
1717
from rest_framework.permissions import IsAuthenticated
18+
from rest_framework.request import Request
1819
from rest_framework.response import Response
1920

2021
from app.pagination import CustomPagination
@@ -149,7 +150,7 @@ class SDKIdentities(SDKAPIView):
149150
pagination_class = None # set here to ensure documentation is correct
150151
throttle_classes = []
151152

152-
@swagger_auto_schema(
153+
@swagger_auto_schema( # type: ignore[misc]
153154
responses={200: SDKIdentitiesResponseSerializer()},
154155
query_serializer=SDKIdentitiesQuerySerializer(),
155156
operation_id="identify_user",
@@ -161,14 +162,14 @@ class SDKIdentities(SDKAPIView):
161162
cache=settings.GET_IDENTITIES_ENDPOINT_CACHE_NAME,
162163
)
163164
)
164-
def get(self, request): # type: ignore[no-untyped-def]
165-
identifier = request.query_params.get("identifier")
166-
if not identifier:
167-
return Response(
168-
{"detail": "Missing identifier"}
169-
) # TODO: add 400 status - will this break the clients?
170-
171-
if request.query_params.get("transient"):
165+
def get(self, request: Request) -> Response:
166+
query_serializer = SDKIdentitiesQuerySerializer(data=request.query_params)
167+
if not query_serializer.is_valid():
168+
return Response(query_serializer.errors, status=status.HTTP_400_BAD_REQUEST)
169+
170+
identifier = query_serializer.validated_data["identifier"]
171+
172+
if query_serializer.validated_data["transient"]:
172173
identity = Identity(
173174
created_date=timezone.now(),
174175
identifier=identifier,

api/tests/unit/conftest.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,41 @@
1717
from util.mappers import map_environment_to_environment_document
1818

1919

20+
def pytest_generate_tests(metafunc: pytest.Metafunc) -> None:
21+
if metafunc.definition.get_closest_marker("valid_identity_identifiers"):
22+
metafunc.parametrize(
23+
"identifier",
24+
[
25+
"bond...jamesbond",
26+
"ゴジラ",
27+
"ElChapulínColorado",
28+
"dalek#6453@skaro.gov",
29+
"agáta={^_^}=",
30+
"_ツ_/-handless-shrug",
31+
"who+am+i?",
32+
"i_100%_dont_know!",
33+
"~neo|simulation`0065192*75`",
34+
"KacperGustyr$Flagsmat",
35+
],
36+
)
37+
38+
if metafunc.definition.get_closest_marker("invalid_identity_identifiers"):
39+
error_message = "Identifier can only contain unicode letters, numbers, and the symbols: ! # $ % & * + / = ? ^ _ ` { } | ~ @ . -"
40+
metafunc.parametrize(
41+
["identifier", "identifier_error_message"],
42+
[
43+
("", "This field may not be blank."),
44+
(" ", "This field may not be blank."),
45+
("or really anything with a whitespace", error_message),
46+
("<script>alert(1)</script>", error_message),
47+
("'; DROP TABLE users;--", error_message),
48+
("'single-quotes'", error_message),
49+
('"double-quotes"', error_message),
50+
("figaro" * 334, "Ensure this field has no more than 2000 characters."),
51+
],
52+
)
53+
54+
2055
@pytest.fixture()
2156
def organisation_one(db): # type: ignore[no-untyped-def]
2257
return Organisation.objects.create(name="Test organisation 1")

api/tests/unit/edge_api/identities/test_edge_api_identities_views.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import typing
2+
from unittest.mock import Mock
23

4+
import pytest
35
from common.environments.permissions import (
46
MANAGE_IDENTITIES,
57
VIEW_ENVIRONMENT,
@@ -213,3 +215,42 @@ def test_user_cannot_delete_identity_from_another_project(
213215
assert response.status_code == status.HTTP_403_FORBIDDEN
214216

215217
assert flagsmith_identities_table.get_item(Key={"composite_key": composite_key})
218+
219+
220+
@pytest.mark.valid_identity_identifiers
221+
def test_EdgeIdentityViewSet__identifier_sanitization__accepts_valid_identifiers(
222+
admin_client: APIClient,
223+
edge_identity_dynamo_wrapper_mock: Mock,
224+
environment: Environment,
225+
identifier: str,
226+
) -> None:
227+
# Given
228+
edge_identity_dynamo_wrapper_mock.get_item.return_value = None
229+
230+
# When
231+
response = admin_client.post(
232+
f"/api/v1/environments/{environment.api_key}/edge-identities/",
233+
data={"identifier": identifier},
234+
)
235+
236+
# Then
237+
assert response.status_code == status.HTTP_201_CREATED
238+
assert response.json()["identifier"] == identifier
239+
240+
241+
@pytest.mark.invalid_identity_identifiers
242+
def test_EdgeIdentityViewSet__identifier_sanitization__rejects_invalid_identifiers(
243+
admin_client: APIClient,
244+
environment: Environment,
245+
identifier_error_message: str,
246+
identifier: str,
247+
) -> None:
248+
# When
249+
response = admin_client.post(
250+
f"/api/v1/environments/{environment.api_key}/edge-identities/",
251+
data={"identifier": identifier},
252+
)
253+
254+
# Then
255+
assert response.status_code == status.HTTP_400_BAD_REQUEST
256+
assert response.json() == {"identifier": [identifier_error_message]}

api/tests/unit/environments/identities/test_unit_identities_views.py

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import urllib
33
from typing import Any
44
from unittest import mock
5+
from urllib.parse import quote
56

67
import pytest
78
from common.environments.permissions import (
@@ -1431,7 +1432,7 @@ def test_SDKIdentitiesDeprecated__given_identifier__retrieves_identity(
14311432
pytest.param(True, False, True, 4, id="replica_db,old_identity,transient"),
14321433
],
14331434
)
1434-
def test_SDKIdentities_retrieves_identity_feature_states(
1435+
def test_SDKIdentities__retrieves_identity_feature_states(
14351436
api_client: APIClient,
14361437
django_assert_num_queries: DjangoAssertNumQueries,
14371438
environment: Environment,
@@ -1458,3 +1459,105 @@ def test_SDKIdentities_retrieves_identity_feature_states(
14581459

14591460
# Then
14601461
assert response.status_code == status.HTTP_200_OK
1462+
1463+
1464+
@pytest.mark.parametrize(
1465+
["transient_value", "expected_status"],
1466+
[
1467+
("true", status.HTTP_200_OK),
1468+
("false", status.HTTP_200_OK),
1469+
("1", status.HTTP_200_OK),
1470+
("0", status.HTTP_200_OK),
1471+
("yes", status.HTTP_200_OK),
1472+
("no", status.HTTP_200_OK),
1473+
("foo", status.HTTP_400_BAD_REQUEST),
1474+
("123", status.HTTP_400_BAD_REQUEST),
1475+
],
1476+
)
1477+
def test_SDKIdentities__given_transient_value__responds_accordingly(
1478+
api_client: APIClient,
1479+
environment: Environment,
1480+
transient_value: str,
1481+
expected_status: int,
1482+
) -> None:
1483+
# Given
1484+
api_client.credentials(HTTP_X_ENVIRONMENT_KEY=environment.api_key)
1485+
1486+
# When
1487+
response = api_client.get(
1488+
f"/api/v1/identities/?identifier=jamesbond&transient={transient_value}"
1489+
)
1490+
1491+
# Then
1492+
assert response.status_code == expected_status
1493+
1494+
1495+
@pytest.mark.valid_identity_identifiers
1496+
def test_SDKIdentities__identifier_sanitization__accepts_valid_identifiers(
1497+
api_client: APIClient,
1498+
environment: Environment,
1499+
identifier: str,
1500+
) -> None:
1501+
# Given
1502+
api_client.credentials(HTTP_X_ENVIRONMENT_KEY=environment.api_key)
1503+
1504+
# When
1505+
response = api_client.get(f"/api/v1/identities/?identifier={quote(identifier)}")
1506+
1507+
# Then
1508+
assert response.status_code == status.HTTP_200_OK
1509+
assert response.json()["identifier"] == identifier
1510+
1511+
1512+
@pytest.mark.invalid_identity_identifiers
1513+
def test_SDKIdentities__identifier_sanitization__rejects_invalid_identifiers(
1514+
api_client: APIClient,
1515+
environment: Environment,
1516+
identifier: str,
1517+
identifier_error_message: str,
1518+
) -> None:
1519+
# Given
1520+
api_client.credentials(HTTP_X_ENVIRONMENT_KEY=environment.api_key)
1521+
1522+
# When
1523+
response = api_client.get(f"/api/v1/identities/?identifier={quote(identifier)}")
1524+
1525+
# Then
1526+
assert response.status_code == status.HTTP_400_BAD_REQUEST
1527+
assert response.json() == {"identifier": [identifier_error_message]}
1528+
1529+
1530+
@pytest.mark.valid_identity_identifiers
1531+
def test_IdentityViewSet_create__accepts_valid_identifiers(
1532+
admin_client: APIClient,
1533+
environment: Environment,
1534+
identifier: str,
1535+
) -> None:
1536+
# When
1537+
response = admin_client.post(
1538+
f"/api/v1/environments/{environment.api_key}/identities/",
1539+
data={"identifier": identifier},
1540+
)
1541+
1542+
# Then
1543+
assert response.status_code == status.HTTP_201_CREATED
1544+
assert response.json()["identifier"] == identifier
1545+
assert Identity.objects.filter(identifier=identifier).exists()
1546+
1547+
1548+
@pytest.mark.invalid_identity_identifiers
1549+
def test_IdentityViewSet_create__rejects_invalid_identifiers(
1550+
admin_client: APIClient,
1551+
environment: Environment,
1552+
identifier: str,
1553+
identifier_error_message: str,
1554+
) -> None:
1555+
# When
1556+
response = admin_client.post(
1557+
f"/api/v1/environments/{environment.api_key}/identities/",
1558+
data={"identifier": identifier},
1559+
)
1560+
1561+
# Then
1562+
assert response.status_code == status.HTTP_400_BAD_REQUEST
1563+
assert response.json() == {"identifier": [identifier_error_message]}

0 commit comments

Comments
 (0)