forked from facebook/react-native
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtext_resolution.py
More file actions
225 lines (183 loc) · 8.21 KB
/
text_resolution.py
File metadata and controls
225 lines (183 loc) · 8.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""
Functions for resolving text content from Doxygen XML types.
"""
from __future__ import annotations
import re
from enum import Enum
from doxmlparser import compound
def decode_doxygen_template_encoding(encoded: str) -> str:
"""Decode Doxygen's encoding for template specializations in refids.
Doxygen encodes special characters in refids using underscore-prefixed codes:
- '_3' = '<' (template open)
- '_4' = '>' (template close)
- '_01' = ' ' (space)
- '_07' = '(' (open paren)
- '_08' = ')' (close paren)
- '_8_8_8' = '...' (variadic ellipsis)
- '_00' = ',' (comma)
- '_02' = '*' (pointer)
- '_05' = '=' (equals)
- '_06' = '&' (reference)
e.g. 'SyncCallback_3_01R_07Args_8_8_8_08_4' -> 'SyncCallback< R(Args...)>'
"""
result = encoded
# Process longer patterns first to avoid partial matches
result = result.replace("_8_8_8", "...") # Variadic ellipsis
# Process two-char patterns (_0X codes)
result = result.replace("_00", ", ") # Comma (with space for readability)
result = result.replace("_01", " ") # Space
result = result.replace("_02", "*") # Pointer
result = result.replace("_05", "=") # Equals
result = result.replace("_06", "&") # Reference
result = result.replace("_07", "(") # Open paren
result = result.replace("_08", ")") # Close paren
# Process single-char patterns last
result = result.replace("_3", "<") # Template open
result = result.replace("_4", ">") # Template close
return result
def extract_namespace_from_refid(refid: str) -> str:
"""Extract the namespace prefix from a doxygen refid.
e.g. 'namespacefacebook_1_1yoga_1a...' -> 'facebook::yoga'
'structfacebook_1_1react_1_1detail_1_1is__dynamic' -> 'facebook::react::detail::is_dynamic'
'classfacebook_1_1react_1_1SyncCallback_3_01R_07Args_8_8_8_08_4' -> 'facebook::react::SyncCallback< R(Args...)>'
Doxygen encoding:
- '::' is encoded as '_1_1'
- '_' in identifiers is encoded as '__' (double underscore)
- Template specializations are encoded with hex-like codes (see decode_doxygen_template_encoding)
"""
for prefix in ("namespace", "struct", "class", "union"):
if refid.startswith(prefix):
compound_part = refid[len(prefix) :]
idx = compound_part.find("_1a")
if idx != -1:
compound_part = compound_part[:idx]
# First replace '::' encoding (_1_1 -> ::)
result = compound_part.replace("_1_1", "::")
# Then replace double underscore with single underscore
# (Doxygen encodes '_' in identifiers as '__')
result = result.replace("__", "_")
# Decode template specialization encodings
result = decode_doxygen_template_encoding(result)
return result
return ""
def normalize_angle_brackets(text: str) -> str:
"""Doxygen adds spaces around < and > to avoid XML ambiguity.
e.g. "NSArray< id< RCTBridgeMethod > > *" -> "NSArray<id<RCTBridgeMethod>> *"
"""
text = re.sub(r"<\s+", "<", text)
text = re.sub(r"\s+>", ">", text)
return text
def resolve_ref_text_name(type_def: compound.refTextType) -> str:
"""Resolve the text content of a refTextType."""
if hasattr(type_def, "content_") and type_def.content_:
name = ""
for part in type_def.content_:
if part.category == 1: # MixedContainer.CategoryText
name += part.value
elif part.category == 3: # MixedContainer.CategoryComplex (ref element)
if hasattr(part.value, "get_valueOf_"):
name += part.value.get_valueOf_()
elif hasattr(part.value, "valueOf_"):
name += part.value.valueOf_
else:
name += str(part.value)
return normalize_angle_brackets(name)
if type_def.ref:
return normalize_angle_brackets(type_def.ref[0].get_valueOf_())
return normalize_angle_brackets(type_def.get_valueOf_())
class InitializerType(Enum):
NONE = (0,)
ASSIGNMENT = (1,)
BRACE = 2
def resolve_linked_text_name(
type_def: compound.linkedTextType,
strip_initializers: bool = False,
) -> (str, InitializerType):
"""
Resolve the full text content of a linkedTextType, including all text
fragments and ref elements.
"""
name = ""
in_string = False
if hasattr(type_def, "content_") and type_def.content_:
for part in type_def.content_:
if part.category == 1: # MixedContainer.CategoryText
in_string = part.value.count('"') % 2 != in_string
name += part.value
elif part.category == 3: # MixedContainer.CategoryComplex (ref element)
# For ref elements, get the text content and fully qualify using refid
text = ""
if hasattr(part.value, "get_valueOf_"):
text = part.value.get_valueOf_()
elif hasattr(part.value, "valueOf_"):
text = part.value.valueOf_
else:
text = str(part.value)
# Don't resolve refs inside string literals - doxygen may
# incorrectly treat symbols in strings as references
refid = getattr(part.value, "refid", None)
if refid and not in_string:
text = _qualify_text_with_refid(text, refid)
name += text
elif type_def.ref:
name = type_def.ref[0].get_valueOf_()
else:
name = type_def.get_valueOf_()
initialier_type = InitializerType.NONE
if strip_initializers:
if name.startswith("="):
# Detect assignment initializers: = value
initialier_type = InitializerType.ASSIGNMENT
name = name[1:]
elif name.startswith("{") and name.endswith("}"):
# Detect brace initializers: {value}
initialier_type = InitializerType.BRACE
name = name[1:-1].strip()
return (normalize_angle_brackets(name.strip()), initialier_type)
def _qualify_text_with_refid(text: str, refid: str) -> str:
"""Qualify a text symbol using the namespace extracted from its doxygen refid.
For ref elements, doxygen provides a refid that encodes the fully qualified
path to the referenced symbol. This function extracts the namespace from
that refid and prepends it to the text, avoiding redundant qualification.
Args:
text: The symbol text (e.g., "SyncCallback")
refid: The doxygen refid (e.g., "classfacebook_1_1react_1_1SyncCallback...")
Returns:
The qualified text (e.g., "facebook::react::SyncCallback")
"""
ns = extract_namespace_from_refid(refid)
# Skip re-qualification if text is already globally qualified
# (starts with "::") - it's already an absolute path
if not ns or text.startswith(ns) or text.startswith("::"):
return text
# The text may already start with a trailing portion of the namespace.
# For example ns="facebook::react::HighResDuration" and
# text="HighResDuration::zero". We need to find the longest suffix of ns
# that is a prefix of text (on a "::" boundary) and only prepend the
# missing part.
ns_parts = ns.split("::")
prepend = ns
for i in range(1, len(ns_parts)):
suffix = "::".join(ns_parts[i:])
# Also compare without template args - for template specializations
# like "SyncCallback< R(Args...)>", text "SyncCallback" should match
base_suffix = _strip_template_args(ns_parts[i])
if (
text.startswith(suffix + "::")
or text == suffix
or text.startswith(base_suffix + "::")
or text == base_suffix
):
prepend = "::".join(ns_parts[:i])
break
return prepend + "::" + text
def _strip_template_args(name: str) -> str:
"""Strip template arguments from a type name.
e.g. 'SyncCallback< R(Args...)>' -> 'SyncCallback'
"""
angle_idx = name.find("<")
return name[:angle_idx].rstrip() if angle_idx != -1 else name