|
39 | 39 | # --- Locked extractor regexes (verbatim from 09-01-data-shape-spike-SUMMARY) --- |
40 | 40 | # All four HOLD against the spike fixture (A1/A2/sibling probes). Scalar |
41 | 41 | # extractors return None on no-match; _extract_see_also returns []. |
42 | | -_NEW_IN_RE = r"New in version\s+(\d+\.\d+)" |
43 | | -_CHANGED_IN_RE = r"Changed in version\s+(\d+\.\d+)" |
44 | | -_DEPRECATED_IN_RE = r"Deprecated since version\s+(\d+\.\d+)" |
| 42 | +_NEW_IN_RE = re.compile(r"New in version\s+(\d+\.\d+)") |
| 43 | +_CHANGED_IN_RE = re.compile(r"Changed in version\s+(\d+\.\d+)") |
| 44 | +_DEPRECATED_IN_RE = re.compile(r"Deprecated since version\s+(\d+\.\d+)") |
45 | 45 | # Markdown link label extractor; MUST be applied only within a "See also" window |
46 | 46 | # (locate case-insensitive "see also", read forward to next ATX heading / window |
47 | 47 | # end), not against the whole section, or it captures unrelated body links. |
48 | | -_SEE_ALSO_LINK_RE = r"\[([^\]]+)\]\(" |
| 48 | +_SEE_ALSO_LINK_RE = re.compile(r"\[([^\]]+)\]\(") |
49 | 49 |
|
50 | 50 | # M2 note text emitted when a docs page is unfetchable in the both-present branch. |
51 | 51 | _PAGE_UNAVAILABLE_NOTE = "docs page not available for one or both versions" |
|
56 | 56 | _SIGNATURE_LINE_MAX = 80 |
57 | 57 |
|
58 | 58 |
|
59 | | -def _extract_new_in(text: str) -> str | None: |
60 | | - """Extract the 'New in version X.Y' version, or None (locked _NEW_IN_RE).""" |
61 | | - match = re.search(_NEW_IN_RE, text) |
62 | | - return match.group(1) if match else None |
63 | | - |
64 | | - |
65 | | -def _extract_changed_in(text: str) -> str | None: |
66 | | - """Extract the 'Changed in version X.Y' version, or None (_CHANGED_IN_RE).""" |
67 | | - match = re.search(_CHANGED_IN_RE, text) |
68 | | - return match.group(1) if match else None |
69 | | - |
| 59 | +def _extract_version(pattern: re.Pattern[str], text: str) -> str | None: |
| 60 | + """Return the captured version from the first match of ``pattern``, or None. |
70 | 61 |
|
71 | | -def _extract_deprecated_in(text: str) -> str | None: |
72 | | - """Extract the 'Deprecated since version X.Y' version, or None.""" |
73 | | - match = re.search(_DEPRECATED_IN_RE, text) |
| 62 | + Shared by the New-in / Changed-in / Deprecated-since extractors, which differ |
| 63 | + only by their (spike-locked) pattern literal (``_NEW_IN_RE`` etc.). |
| 64 | + """ |
| 65 | + match = pattern.search(text) |
74 | 66 | return match.group(1) if match else None |
75 | 67 |
|
76 | 68 |
|
@@ -105,7 +97,7 @@ def _extract_see_also(text: str) -> list[str]: |
105 | 97 | elif started: |
106 | 98 | break # blank line ends the admonition block |
107 | 99 |
|
108 | | - return re.findall(_SEE_ALSO_LINK_RE, "\n".join(window)) |
| 100 | + return _SEE_ALSO_LINK_RE.findall("\n".join(window)) |
109 | 101 |
|
110 | 102 |
|
111 | 103 | def _first_nonempty_line(text: str) -> str: |
@@ -205,7 +197,7 @@ def compare(self, symbol: str, v1: str, v2: str) -> CompareVersionsResult: |
205 | 197 | new_in: str | None = None |
206 | 198 | try: |
207 | 199 | text_v2 = self._section_text(sym_v2.uri, sym_v2.anchor, v2) |
208 | | - new_in = _extract_new_in(text_v2) |
| 200 | + new_in = _extract_version(_NEW_IN_RE, text_v2) |
209 | 201 | except PageNotFoundError: |
210 | 202 | new_in = None |
211 | 203 | return CompareVersionsResult( |
@@ -236,8 +228,8 @@ def compare(self, symbol: str, v1: str, v2: str) -> CompareVersionsResult: |
236 | 228 | note=_PAGE_UNAVAILABLE_NOTE, |
237 | 229 | ) |
238 | 230 |
|
239 | | - changed_in = _extract_changed_in(text_v2) |
240 | | - deprecated_in = _extract_deprecated_in(text_v2) |
| 231 | + changed_in = _extract_version(_CHANGED_IN_RE, text_v2) |
| 232 | + deprecated_in = _extract_version(_DEPRECATED_IN_RE, text_v2) |
241 | 233 |
|
242 | 234 | # signature_delta (M1, advisory): first non-empty line comparison. |
243 | 235 | first_v1 = _first_nonempty_line(text_v1) |
|
0 commit comments