scim2-models/scim2_models/utils.py at c48ab6d2063758af472f4e5d0dd4a6c3d02461d4 · python-scim/scim2-models · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import base64
import re
from typing import Annotated
from typing import Literal
from typing import Optional
from typing import Union

from pydantic import EncodedBytes
from pydantic import EncoderProtocol
from pydantic.alias_generators import to_snake
from pydantic_core import PydanticCustomError

try:
    from types import UnionType

    UNION_TYPES = [Union, UnionType]
except ImportError:
    # Python 3.9 has no UnionType
    UNION_TYPES = [Union]


def int_to_str(status: Optional[int]) -> Optional[str]:
    return None if status is None else str(status)


# Copied from Pydantic 2.10 repository
class Base64Encoder(EncoderProtocol):  # pragma: no cover
    """Standard (non-URL-safe) Base64 encoder."""

    @classmethod
    def decode(cls, data: bytes) -> bytes:
        """Decode the data from base64 encoded bytes to original bytes data.

        Args:
            data: The data to decode.

        Returns:
            The decoded data.

        """
        try:
            return base64.b64decode(data)
        except ValueError as e:
            raise PydanticCustomError(
                "base64_decode", "Base64 decoding error: '{error}'", {"error": str(e)}
            ) from e

    @classmethod
    def encode(cls, value: bytes) -> bytes:
        """Encode the data from bytes to a base64 encoded bytes.

        Args:
            value: The data to encode.

        Returns:
            The encoded data.

        """
        return base64.b64encode(value)

    @classmethod
    def get_json_format(cls) -> Literal["base64"]:
        """Get the JSON format for the encoded data.

        Returns:
            The JSON format for the encoded data.

        """
        return "base64"


# Compatibility with Pydantic <2.10
# https://pydantic.dev/articles/pydantic-v2-10-release#use-b64decode-and-b64encode-for-base64bytes-and-base64str-types
Base64Bytes = Annotated[bytes, EncodedBytes(encoder=Base64Encoder)]


def to_camel(string: str) -> str:
    """Transform strings to camelCase.

    This method is used for attribute name serialization. This is more
    or less the pydantic implementation, but it does not add uppercase
    on alphanumerical characters after specials characters. For instance
    '$ref' stays '$ref'.
    """
    snake = to_snake(string)
    camel = re.sub(r"_+([0-9A-Za-z]+)", lambda m: m.group(1).title(), snake)
    return camel


def normalize_attribute_name(attribute_name: str) -> str:
    """Remove all non-alphabetical characters and lowerise a string.

    This method is used for attribute name validation.
    """
    is_extension_attribute = ":" in attribute_name
    if not is_extension_attribute:
        attribute_name = re.sub(r"[\W_]+", "", attribute_name)

    return attribute_name.lower()


def validate_scim_path_syntax(path: str) -> bool:
    """Check if path syntax is valid according to RFC 7644 simplified rules.

    :param path: The path to validate
    :type path: str
    :return: True if path syntax is valid, False otherwise
    :rtype: bool
    """
    if not path or not path.strip():
        return False

    # Cannot start with a digit
    if path[0].isdigit():
        return False

    # Cannot contain double dots
    if ".." in path:
        return False

    # Cannot contain invalid characters (basic check)
    # Allow alphanumeric, dots, underscores, hyphens, colons (for URNs), brackets
    if not re.match(r'^[a-zA-Z][a-zA-Z0-9._:\-\[\]"=\s]*$', path):
        return False

    # If it contains a colon, validate it's a proper URN format
    if ":" in path:
        if not validate_scim_urn_syntax(path):
            return False

    return True


def validate_scim_urn_syntax(path: str) -> bool:
    """Validate URN-based path format.

    :param path: The URN path to validate
    :type path: str
    :return: True if URN path format is valid, False otherwise
    :rtype: bool
    """
    # Basic URN validation: should start with urn:
    if not path.startswith("urn:"):
        return False

    # Split on the last colon to separate URN from attribute
    urn_part, attr_part = path.rsplit(":", 1)

    # URN part should have at least 4 parts (urn:namespace:specific:resource)
    urn_segments = urn_part.split(":")
    if len(urn_segments) < 4:
        return False

    # Attribute part should be valid
    if not attr_part or attr_part[0].isdigit():
        return False

    return True