Skip to content

Commit 481134a

Browse files
j-piaseckimeta-codesync[bot]
authored andcommitted
Unify doxygen text parsing (#55696)
Summary: Pull Request resolved: #55696 Changelog: [Internal] Unifies different methods of parsing doxygen text outputs into a single method, able to resolve nested references. Reviewed By: cipolleschi Differential Revision: D94077808 fbshipit-source-id: 494ee4da7380e17a60bbdae8e552f1c3827c1130
1 parent 6ea9236 commit 481134a

File tree

6 files changed

+131
-116
lines changed

6 files changed

+131
-116
lines changed

scripts/cxx-api/parser/main.py

Lines changed: 91 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import os
99
import re
10+
from enum import Enum
1011
from pprint import pprint
1112

1213
from doxmlparser import compound, index
@@ -24,127 +25,100 @@
2425
from .utils import Argument, extract_qualifiers, parse_qualified_path
2526

2627

27-
def resolve_ref_text_name(type_def: compound.refTextType) -> str:
28-
"""
29-
Resolve the full text content of a refTextType, including all text
30-
fragments and ref elements.
31-
"""
32-
if hasattr(type_def, "content_") and type_def.content_:
33-
name = ""
34-
for part in type_def.content_:
35-
if part.category == 1: # MixedContainer.CategoryText
36-
name += part.value
37-
elif part.category == 3: # MixedContainer.CategoryComplex (ref element)
38-
if hasattr(part.value, "get_valueOf_"):
39-
name += part.value.get_valueOf_()
40-
elif hasattr(part.value, "valueOf_"):
41-
name += part.value.valueOf_
42-
else:
43-
name += str(part.value)
44-
return name
45-
46-
if type_def.ref:
47-
return type_def.ref[0].get_valueOf_()
48-
49-
return type_def.get_valueOf_()
50-
51-
5228
def extract_namespace_from_refid(refid: str) -> str:
5329
"""Extract the namespace prefix from a doxygen refid.
5430
e.g. 'namespacefacebook_1_1yoga_1a...' -> 'facebook::yoga'
31+
'structfacebook_1_1react_1_1detail_1_1is__dynamic' -> 'facebook::react::detail::is_dynamic'
32+
33+
Doxygen encoding:
34+
- '::' is encoded as '_1_1'
35+
- '_' in identifiers is encoded as '__' (double underscore)
5536
"""
5637
for prefix in ("namespace", "struct", "class", "union"):
5738
if refid.startswith(prefix):
5839
compound_part = refid[len(prefix) :]
5940
idx = compound_part.find("_1a")
6041
if idx != -1:
6142
compound_part = compound_part[:idx]
62-
return compound_part.replace("_1_1", "::")
43+
# First replace '::' encoding (_1_1 -> ::)
44+
result = compound_part.replace("_1_1", "::")
45+
# Then replace double underscore with single underscore
46+
# (Doxygen encodes '_' in identifiers as '__')
47+
result = result.replace("__", "_")
48+
return result
6349
return ""
6450

6551

66-
def resolve_linked_text_name(type_def: compound.linkedTextType) -> (str, bool):
67-
"""
68-
Resolve the full text content of a linkedTextType, including all text
69-
fragments and ref elements.
70-
"""
71-
name = ""
72-
in_string = False
73-
74-
for part in type_def.content_:
75-
if part.category == 1: # MixedContainer.CategoryText
76-
in_string = part.value.count('"') % 2 != in_string
77-
name += part.value
78-
elif part.category == 3: # MixedContainer.CategoryComplex (ref element)
79-
# For ref elements, get the text content and fully qualify using refid
80-
text = ""
81-
if hasattr(part.value, "get_valueOf_"):
82-
text = part.value.get_valueOf_()
83-
elif hasattr(part.value, "valueOf_"):
84-
text = part.value.valueOf_
85-
else:
86-
text = str(part.value)
87-
88-
# Don't resolve refs inside string literals - doxygen may
89-
# incorrectly treat symbols in strings as references
90-
refid = getattr(part.value, "refid", None)
91-
if refid and not in_string:
92-
ns = extract_namespace_from_refid(refid)
93-
if ns and not text.startswith(ns):
94-
# The text may already start with a trailing portion of
95-
# the namespace. For example ns="facebook::react::HighResDuration"
96-
# and text="HighResDuration::zero". We need to find the
97-
# longest suffix of ns that is a prefix of text (on a "::"
98-
# boundary) and only prepend the missing part.
99-
ns_parts = ns.split("::")
100-
prepend = ns
101-
for i in range(1, len(ns_parts)):
102-
suffix = "::".join(ns_parts[i:])
103-
if text.startswith(suffix + "::") or text == suffix:
104-
prepend = "::".join(ns_parts[:i])
105-
break
106-
text = prepend + "::" + text
107-
108-
name += text
109-
110-
is_brace_initializer = False
111-
if name.startswith("="):
112-
# Detect assignment initializers: = value
113-
name = name[1:]
114-
elif name.startswith("{") and name.endswith("}"):
115-
# Detect brace initializers: {value}
116-
is_brace_initializer = True
117-
name = name[1:-1].strip()
118-
119-
return (name.strip(), is_brace_initializer)
52+
class InitializerType(Enum):
53+
NONE = (0,)
54+
ASSIGNMENT = (1,)
55+
BRACE = 2
12056

12157

122-
def resolve_linked_text_full(type_def: compound.linkedTextType) -> str:
58+
def resolve_linked_text_name(
59+
type_def: compound.linkedTextType,
60+
strip_initializers: bool = False,
61+
) -> (str, InitializerType):
12362
"""
12463
Resolve the full text content of a linkedTextType, including all text
12564
fragments and ref elements.
126-
127-
Unlike resolve_linked_text_name which only gets the first ref or value,
128-
this function concatenates all content_ items to reconstruct the full text.
12965
"""
130-
if not type_def.content_:
131-
# Fall back to valueOf_ if no content_ list
132-
return type_def.get_valueOf_() or ""
133-
134-
result = []
135-
for item in type_def.content_:
136-
if item.category == 1: # MixedContainer.CategoryText
137-
result.append(item.value)
138-
elif item.category == 3: # MixedContainer.CategoryComplex (ref element)
139-
# For ref elements, get the text content
140-
if hasattr(item.value, "get_valueOf_"):
141-
result.append(item.value.get_valueOf_())
142-
elif hasattr(item.value, "valueOf_"):
143-
result.append(item.value.valueOf_)
144-
else:
145-
result.append(str(item.value))
66+
name = ""
67+
in_string = False
14668

147-
return "".join(result)
69+
if hasattr(type_def, "content_") and type_def.content_:
70+
for part in type_def.content_:
71+
if part.category == 1: # MixedContainer.CategoryText
72+
in_string = part.value.count('"') % 2 != in_string
73+
name += part.value
74+
elif part.category == 3: # MixedContainer.CategoryComplex (ref element)
75+
# For ref elements, get the text content and fully qualify using refid
76+
text = ""
77+
if hasattr(part.value, "get_valueOf_"):
78+
text = part.value.get_valueOf_()
79+
elif hasattr(part.value, "valueOf_"):
80+
text = part.value.valueOf_
81+
else:
82+
text = str(part.value)
83+
84+
# Don't resolve refs inside string literals - doxygen may
85+
# incorrectly treat symbols in strings as references
86+
refid = getattr(part.value, "refid", None)
87+
if refid and not in_string:
88+
ns = extract_namespace_from_refid(refid)
89+
if ns and not text.startswith(ns):
90+
# The text may already start with a trailing portion of
91+
# the namespace. For example ns="facebook::react::HighResDuration"
92+
# and text="HighResDuration::zero". We need to find the
93+
# longest suffix of ns that is a prefix of text (on a "::"
94+
# boundary) and only prepend the missing part.
95+
ns_parts = ns.split("::")
96+
prepend = ns
97+
for i in range(1, len(ns_parts)):
98+
suffix = "::".join(ns_parts[i:])
99+
if text.startswith(suffix + "::") or text == suffix:
100+
prepend = "::".join(ns_parts[:i])
101+
break
102+
text = prepend + "::" + text
103+
104+
name += text
105+
elif type_def.ref:
106+
name = type_def.ref[0].get_valueOf_()
107+
else:
108+
name = type_def.get_valueOf_()
109+
110+
initialier_type = InitializerType.NONE
111+
if strip_initializers:
112+
if name.startswith("="):
113+
# Detect assignment initializers: = value
114+
initialier_type = InitializerType.ASSIGNMENT
115+
name = name[1:]
116+
elif name.startswith("{") and name.endswith("}"):
117+
# Detect brace initializers: {value}
118+
initialier_type = InitializerType.BRACE
119+
name = name[1:-1].strip()
120+
121+
return (name.strip(), initialier_type)
148122

149123

150124
def get_base_classes(
@@ -191,10 +165,10 @@ def get_template_params(
191165
if compound_object.templateparamlist is not None:
192166
for param in compound_object.templateparamlist.param:
193167
template_value = (
194-
resolve_ref_text_name(param.defval) if param.defval else None
168+
resolve_linked_text_name(param.defval)[0] if param.defval else None
195169
)
196170
template_name = param.defname
197-
template_type = resolve_ref_text_name(param.get_type())
171+
template_type = resolve_linked_text_name(param.get_type())[0]
198172

199173
if template_name is None:
200174
# Split type string and extract name from the end
@@ -235,7 +209,7 @@ def get_variable_member(
235209
# Ignore anonymous variables
236210
return None
237211

238-
variable_type = resolve_ref_text_name(member_def.get_type()).strip()
212+
(variable_type, _) = resolve_linked_text_name(member_def.get_type())
239213
variable_value = None
240214
variable_definition = member_def.definition
241215
variable_argstring = member_def.get_argsstring()
@@ -252,9 +226,12 @@ def get_variable_member(
252226

253227
is_brace_initializer = False
254228
if member_def.initializer is not None:
255-
(variable_value, is_brace_initializer) = resolve_linked_text_name(
256-
member_def.initializer
229+
(variable_value, initializer_type) = resolve_linked_text_name(
230+
member_def.initializer,
231+
strip_initializers=True,
257232
)
233+
if initializer_type == InitializerType.BRACE:
234+
is_brace_initializer = True
258235

259236
return VariableMember(
260237
variable_name,
@@ -287,11 +264,13 @@ def get_doxygen_params(
287264
arguments: list[Argument] = []
288265
for param in params:
289266
param_type = (
290-
resolve_ref_text_name(param.get_type()).strip() if param.get_type() else ""
267+
resolve_linked_text_name(param.get_type())[0].strip()
268+
if param.get_type()
269+
else ""
291270
)
292271
param_name = param.declname or param.defname or None
293272
param_default = (
294-
resolve_ref_text_name(param.defval).strip() if param.defval else None
273+
resolve_linked_text_name(param.defval)[0].strip() if param.defval else None
295274
)
296275

297276
# Doxygen splits array dimensions into a separate <array> element.
@@ -333,7 +312,7 @@ def get_function_member(
333312
Get the function member from a member definition.
334313
"""
335314
function_name = function_def.get_name()
336-
function_type = resolve_ref_text_name(function_def.get_type())
315+
function_type = resolve_linked_text_name(function_def.get_type())[0]
337316
function_arg_string = function_def.get_argsstring()
338317
is_pure_virtual = function_def.get_virt() == "pure-virtual"
339318
function_virtual = function_def.get_virt() == "virtual" or is_pure_virtual
@@ -365,7 +344,7 @@ def get_typedef_member(
365344
typedef_def: compound.memberdefType, visibility: str
366345
) -> TypedefMember:
367346
typedef_name = typedef_def.get_name()
368-
typedef_type = resolve_ref_text_name(typedef_def.get_type())
347+
typedef_type = resolve_linked_text_name(typedef_def.get_type())[0]
369348
typedef_argstring = typedef_def.get_argsstring()
370349
typedef_definition = typedef_def.definition
371350

@@ -402,7 +381,7 @@ def get_concept_member(
402381
if initializer:
403382
# The initializer contains the entire constraind definition.
404383
# We want to extract the constraint part after "="
405-
initializer_text = resolve_linked_text_full(initializer)
384+
initializer_text = resolve_linked_text_name(initializer)[0]
406385
eq_pos = initializer_text.find("=")
407386
if eq_pos != -1:
408387
constraint = initializer_text[eq_pos + 1 :].strip()
@@ -418,7 +397,7 @@ def create_enum_scope(snapshot: Snapshot, enum_def: compound.EnumdefType):
418397
Create an enum scope in the snapshot.
419398
"""
420399
scope = snapshot.create_enum(enum_def.qualifiedname)
421-
scope.kind.type = resolve_ref_text_name(enum_def.get_type())
400+
scope.kind.type = resolve_linked_text_name(enum_def.get_type())[0]
422401
scope.location = enum_def.location.file
423402

424403
for enum_value_def in enum_def.enumvalue:

scripts/cxx-api/tests/snapshots/should_handle_array_param/snapshot.api

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
struct test::Node {
22
public void setArray(int(&arr)[10]);
33
template <size_t N>
4-
public static std::vector< test::PropNameID > names(PropNameID(&&propertyNames)[N]);
4+
public static std::vector< test::PropNameID > names(test::PropNameID(&&propertyNames)[N]);
55
}
66

77
struct test::PropNameID {
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
union test::TypedefUnion {
2-
public FloatType floatValue;
3-
public ValueType intValue;
2+
public test::TypedefUnion::FloatType floatValue;
3+
public test::TypedefUnion::ValueType intValue;
44
public typedef float FloatType;
55
public using ValueType = int32_t;
66
}

scripts/cxx-api/tests/snapshots/should_handle_unnamed_template_param_with_default_value/snapshot.api

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,6 @@ class test::Symbol {
1111
}
1212

1313
struct test::Value {
14-
template <typename T, typename = std::enable_if_t< std::is_base_of<Symbol, T>::value || std::is_base_of<BigInt, T>::value || std::is_base_of<String, T>::value || std::is_base_of<Object, T>::value>>
14+
template <typename T, typename = std::enable_if_t< std::is_base_of<test::Symbol, T>::value || std::is_base_of<test::BigInt, T>::value || std::is_base_of<test::String, T>::value || std::is_base_of<test::Object, T>::value>>
1515
public Value(T && other);
1616
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
template <typename T>
2+
facebook::react::detail::is_dynamic< T >::type & facebook::react::jsArgAsDynamic(T && args, size_t n);
3+
4+
5+
template <typename T>
6+
struct facebook::react::detail::is_dynamic {
7+
public using type = typename std::enable_if< std::is_assignable< folly::dynamic, T >::value, T >::type;
8+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* This source code is licensed under the MIT license found in the
5+
* LICENSE file in the root directory of this source tree.
6+
*/
7+
8+
#pragma once
9+
10+
namespace facebook {
11+
12+
namespace react {
13+
14+
namespace detail {
15+
16+
template <typename T>
17+
struct is_dynamic {
18+
using type = typename std::enable_if<std::is_assignable<folly::dynamic, T>::value, T>::type;
19+
};
20+
21+
} // end namespace detail
22+
23+
template <typename T>
24+
typename detail::is_dynamic<T>::type &jsArgAsDynamic(T &&args, size_t n);
25+
26+
} // namespace react
27+
28+
} // namespace facebook

0 commit comments

Comments
 (0)