react-native/scripts/cxx-api/parser/utils/text_resolution.py at ef54b84d387b61956e3d50bbbb189797deafb20f · coado/react-native · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

"""
Functions for resolving text content from Doxygen XML types.
"""

from __future__ import annotations

import re
from enum import Enum

from doxmlparser import compound


def decode_doxygen_template_encoding(encoded: str) -> str:
    """Decode Doxygen's encoding for template specializations in refids.

    Doxygen encodes special characters in refids using underscore-prefixed codes:
    - '_3' = '<' (template open)
    - '_4' = '>' (template close)
    - '_01' = ' ' (space)
    - '_07' = '(' (open paren)
    - '_08' = ')' (close paren)
    - '_8_8_8' = '...' (variadic ellipsis)
    - '_00' = ',' (comma)
    - '_02' = '*' (pointer)
    - '_05' = '=' (equals)
    - '_06' = '&' (reference)

    e.g. 'SyncCallback_3_01R_07Args_8_8_8_08_4' -> 'SyncCallback< R(Args...)>'
    """
    result = encoded

    # Process longer patterns first to avoid partial matches
    result = result.replace("_8_8_8", "...")  # Variadic ellipsis

    # Process two-char patterns (_0X codes)
    result = result.replace("_00", ", ")  # Comma (with space for readability)
    result = result.replace("_01", " ")  # Space
    result = result.replace("_02", "*")  # Pointer
    result = result.replace("_05", "=")  # Equals
    result = result.replace("_06", "&")  # Reference
    result = result.replace("_07", "(")  # Open paren
    result = result.replace("_08", ")")  # Close paren

    # Process single-char patterns last
    result = result.replace("_3", "<")  # Template open
    result = result.replace("_4", ">")  # Template close

    return result


def extract_namespace_from_refid(refid: str) -> str:
    """Extract the namespace prefix from a doxygen refid.
    e.g. 'namespacefacebook_1_1yoga_1a...' -> 'facebook::yoga'
         'structfacebook_1_1react_1_1detail_1_1is__dynamic' -> 'facebook::react::detail::is_dynamic'
         'classfacebook_1_1react_1_1SyncCallback_3_01R_07Args_8_8_8_08_4' -> 'facebook::react::SyncCallback< R(Args...)>'

    Doxygen encoding:
    - '::' is encoded as '_1_1'
    - '_' in identifiers is encoded as '__' (double underscore)
    - Template specializations are encoded with hex-like codes (see decode_doxygen_template_encoding)
    """
    for prefix in ("namespace", "struct", "class", "union"):
        if refid.startswith(prefix):
            compound_part = refid[len(prefix) :]
            idx = compound_part.find("_1a")
            if idx != -1:
                compound_part = compound_part[:idx]
            # First replace '::' encoding (_1_1 -> ::)
            result = compound_part.replace("_1_1", "::")
            # Then replace double underscore with single underscore
            # (Doxygen encodes '_' in identifiers as '__')
            result = result.replace("__", "_")
            # Decode template specialization encodings
            result = decode_doxygen_template_encoding(result)
            return result
    return ""


def normalize_angle_brackets(text: str) -> str:
    """Doxygen adds spaces around < and > to avoid XML ambiguity.
    e.g. "NSArray< id< RCTBridgeMethod > > *" -> "NSArray<id<RCTBridgeMethod>> *"
    """
    text = re.sub(r"<\s+", "<", text)
    text = re.sub(r"\s+>", ">", text)
    return text


def resolve_ref_text_name(type_def: compound.refTextType) -> str:
    """Resolve the text content of a refTextType."""
    if hasattr(type_def, "content_") and type_def.content_:
        name = ""
        for part in type_def.content_:
            if part.category == 1:  # MixedContainer.CategoryText
                name += part.value
            elif part.category == 3:  # MixedContainer.CategoryComplex (ref element)
                if hasattr(part.value, "get_valueOf_"):
                    name += part.value.get_valueOf_()
                elif hasattr(part.value, "valueOf_"):
                    name += part.value.valueOf_
                else:
                    name += str(part.value)
        return normalize_angle_brackets(name)

    if type_def.ref:
        return normalize_angle_brackets(type_def.ref[0].get_valueOf_())

    return normalize_angle_brackets(type_def.get_valueOf_())


class InitializerType(Enum):
    NONE = (0,)
    ASSIGNMENT = (1,)
    BRACE = 2


def resolve_linked_text_name(
    type_def: compound.linkedTextType,
    strip_initializers: bool = False,
) -> (str, InitializerType):
    """
    Resolve the full text content of a linkedTextType, including all text
    fragments and ref elements.
    """
    name = ""
    in_string = False

    if hasattr(type_def, "content_") and type_def.content_:
        for part in type_def.content_:
            if part.category == 1:  # MixedContainer.CategoryText
                in_string = part.value.count('"') % 2 != in_string
                name += part.value
            elif part.category == 3:  # MixedContainer.CategoryComplex (ref element)
                # For ref elements, get the text content and fully qualify using refid
                text = ""
                if hasattr(part.value, "get_valueOf_"):
                    text = part.value.get_valueOf_()
                elif hasattr(part.value, "valueOf_"):
                    text = part.value.valueOf_
                else:
                    text = str(part.value)

                # Don't resolve refs inside string literals - doxygen may
                # incorrectly treat symbols in strings as references
                refid = getattr(part.value, "refid", None)
                if refid and not in_string:
                    text = _qualify_text_with_refid(text, refid)

                name += text
    elif type_def.ref:
        name = type_def.ref[0].get_valueOf_()
    else:
        name = type_def.get_valueOf_()

    initialier_type = InitializerType.NONE
    if strip_initializers:
        if name.startswith("="):
            # Detect assignment initializers: = value
            initialier_type = InitializerType.ASSIGNMENT
            name = name[1:]
        elif name.startswith("{") and name.endswith("}"):
            # Detect brace initializers: {value}
            initialier_type = InitializerType.BRACE
            name = name[1:-1].strip()

    return (normalize_angle_brackets(name.strip()), initialier_type)


def _qualify_text_with_refid(text: str, refid: str) -> str:
    """Qualify a text symbol using the namespace extracted from its doxygen refid.

    For ref elements, doxygen provides a refid that encodes the fully qualified
    path to the referenced symbol. This function extracts the namespace from
    that refid and prepends it to the text, avoiding redundant qualification.

    Args:
        text: The symbol text (e.g., "SyncCallback")
        refid: The doxygen refid (e.g., "classfacebook_1_1react_1_1SyncCallback...")

    Returns:
        The qualified text (e.g., "facebook::react::SyncCallback")
    """
    ns = extract_namespace_from_refid(refid)

    # Skip re-qualification if text is already globally qualified
    # (starts with "::") - it's already an absolute path
    if not ns or text.startswith(ns) or text.startswith("::"):
        return text

    # The text may already start with a trailing portion of the namespace.
    # For example ns="facebook::react::HighResDuration" and
    # text="HighResDuration::zero". We need to find the longest suffix of ns
    # that is a prefix of text (on a "::" boundary) and only prepend the
    # missing part.
    ns_parts = ns.split("::")
    prepend = ns

    for i in range(1, len(ns_parts)):
        suffix = "::".join(ns_parts[i:])
        # Also compare without template args - for template specializations
        # like "SyncCallback< R(Args...)>", text "SyncCallback" should match
        base_suffix = _strip_template_args(ns_parts[i])
        if (
            text.startswith(suffix + "::")
            or text == suffix
            or text.startswith(base_suffix + "::")
            or text == base_suffix
        ):
            prepend = "::".join(ns_parts[:i])
            break

    return prepend + "::" + text


def _strip_template_args(name: str) -> str:
    """Strip template arguments from a type name.

    e.g. 'SyncCallback< R(Args...)>' -> 'SyncCallback'
    """
    angle_idx = name.find("<")
    return name[:angle_idx].rstrip() if angle_idx != -1 else name