Skip to content

Commit 1ce9bc8

Browse files
committed
cuda_bindings: collapse wrapped hyphen spaces in clean_enum_member_docstring
Add _fix_hyphenation_wordwrap_spacing to remove spurious spaces around hyphens from reflowed __doc__ text ([a-z]- [a-z] and [a-z] -[a-z]), applied until stable. Use it in clean_enum_member_docstring after whitespace collapse and in _explanation_dict_text_for_cleaned_doc_compare for symmetric comparison. Add examples to test_clean_enum_member_docstring_examples. Made-with: Cursor
1 parent 4adec22 commit 1ce9bc8

1 file changed

Lines changed: 27 additions & 0 deletions

File tree

cuda_bindings/tests/test_enum_explanations.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,27 @@ def _explanation_dict_text_for_cleaned_doc_compare(value) -> str:
5959
s = _explanation_text_from_dict_value(value)
6060
s = _strip_doxygen_double_colon_prefixes(s)
6161
s = re.sub(r"\s+", " ", s).strip()
62+
s = _fix_hyphenation_wordwrap_spacing(s)
63+
return s
64+
65+
66+
def _fix_hyphenation_wordwrap_spacing(s: str) -> str:
67+
"""Remove spaces around hyphens introduced by line wrapping in generated ``__doc__`` text.
68+
69+
Sphinx/reflow often splits hyphenated words as ``non- linear`` or ``word -word``.
70+
The explanation dicts are usually single-line and do not contain these splits; the
71+
mismatch shows up on the cleaned enum side, so this runs inside
72+
``clean_enum_member_docstring`` (and the same transform is applied to dict text for
73+
comparison parity).
74+
75+
Patterns (all lowercase ASCII letters as in the CUDA blurbs): ``[a-z]- [a-z]`` and
76+
``[a-z] -[a-z]``. Applied repeatedly until stable.
77+
"""
78+
prev = None
79+
while prev != s:
80+
prev = s
81+
s = re.sub(r"([a-z])- ([a-z])", r"\1-\2", s)
82+
s = re.sub(r"([a-z]) -([a-z])", r"\1-\2", s)
6283
return s
6384

6485

@@ -72,6 +93,9 @@ def clean_enum_member_docstring(doc: str | None) -> str | None:
7293
dropped). Does not aim for perfect reST parsing—only patterns that appear on these
7394
enums in practice.
7495
96+
After whitespace collapse, removes spurious spaces around hyphens from line wrapping
97+
(``[a-z]- [a-z]`` and ``[a-z] -[a-z]``) so ``non- linear`` matches dict ``non-linear``.
98+
7599
Returns ``None`` if ``doc`` is ``None``; otherwise returns a non-empty or empty str.
76100
"""
77101
if doc is None:
@@ -89,6 +113,7 @@ def clean_enum_member_docstring(doc: str | None) -> str | None:
89113
s = re.sub(r"\*([^*]+)\*", r"\1", s)
90114
# Collapse whitespace (newlines -> spaces) and trim
91115
s = re.sub(r"\s+", " ", s).strip()
116+
s = _fix_hyphenation_wordwrap_spacing(s)
92117
return s
93118

94119

@@ -109,6 +134,8 @@ def clean_enum_member_docstring(doc: str | None) -> str | None:
109134
),
110135
pytest.param("**Note:** text", "Note: text", id="strip_bold"),
111136
pytest.param("[Deprecated]\n", "[Deprecated]", id="deprecated_line"),
137+
pytest.param("non- linear", "non-linear", id="hyphen_space_after"),
138+
pytest.param("word -word", "word-word", id="hyphen_space_before"),
112139
],
113140
)
114141
def test_clean_enum_member_docstring_examples(raw, expected):

0 commit comments

Comments
 (0)