Skip to content

Commit 81f689e

Browse files
ayhammoudaclaude
andcommitted
refactor(09): simplify compare.py extractors (precompile regexes, dedup)
Pre-compile the four locked extractor regexes once at import instead of re-parsing the pattern strings on every call, and collapse the three byte-identical scalar extractors (_extract_new_in / _extract_changed_in / _extract_deprecated_in) into a single _extract_version(pattern, text). Named _*_RE constants are retained for spike provenance; _extract_see_also (the only externally-imported helper) is unchanged. Behavior-preserving: 284 tests pass, ruff + pyright clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent cf7a956 commit 81f689e

1 file changed

Lines changed: 14 additions & 22 deletions

File tree

src/mcp_server_python_docs/services/compare.py

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,13 @@
3939
# --- Locked extractor regexes (verbatim from 09-01-data-shape-spike-SUMMARY) ---
4040
# All four HOLD against the spike fixture (A1/A2/sibling probes). Scalar
4141
# extractors return None on no-match; _extract_see_also returns [].
42-
_NEW_IN_RE = r"New in version\s+(\d+\.\d+)"
43-
_CHANGED_IN_RE = r"Changed in version\s+(\d+\.\d+)"
44-
_DEPRECATED_IN_RE = r"Deprecated since version\s+(\d+\.\d+)"
42+
_NEW_IN_RE = re.compile(r"New in version\s+(\d+\.\d+)")
43+
_CHANGED_IN_RE = re.compile(r"Changed in version\s+(\d+\.\d+)")
44+
_DEPRECATED_IN_RE = re.compile(r"Deprecated since version\s+(\d+\.\d+)")
4545
# Markdown link label extractor; MUST be applied only within a "See also" window
4646
# (locate case-insensitive "see also", read forward to next ATX heading / window
4747
# end), not against the whole section, or it captures unrelated body links.
48-
_SEE_ALSO_LINK_RE = r"\[([^\]]+)\]\("
48+
_SEE_ALSO_LINK_RE = re.compile(r"\[([^\]]+)\]\(")
4949

5050
# M2 note text emitted when a docs page is unfetchable in the both-present branch.
5151
_PAGE_UNAVAILABLE_NOTE = "docs page not available for one or both versions"
@@ -56,21 +56,13 @@
5656
_SIGNATURE_LINE_MAX = 80
5757

5858

59-
def _extract_new_in(text: str) -> str | None:
60-
"""Extract the 'New in version X.Y' version, or None (locked _NEW_IN_RE)."""
61-
match = re.search(_NEW_IN_RE, text)
62-
return match.group(1) if match else None
63-
64-
65-
def _extract_changed_in(text: str) -> str | None:
66-
"""Extract the 'Changed in version X.Y' version, or None (_CHANGED_IN_RE)."""
67-
match = re.search(_CHANGED_IN_RE, text)
68-
return match.group(1) if match else None
69-
59+
def _extract_version(pattern: re.Pattern[str], text: str) -> str | None:
60+
"""Return the captured version from the first match of ``pattern``, or None.
7061
71-
def _extract_deprecated_in(text: str) -> str | None:
72-
"""Extract the 'Deprecated since version X.Y' version, or None."""
73-
match = re.search(_DEPRECATED_IN_RE, text)
62+
Shared by the New-in / Changed-in / Deprecated-since extractors, which differ
63+
only by their (spike-locked) pattern literal (``_NEW_IN_RE`` etc.).
64+
"""
65+
match = pattern.search(text)
7466
return match.group(1) if match else None
7567

7668

@@ -105,7 +97,7 @@ def _extract_see_also(text: str) -> list[str]:
10597
elif started:
10698
break # blank line ends the admonition block
10799

108-
return re.findall(_SEE_ALSO_LINK_RE, "\n".join(window))
100+
return _SEE_ALSO_LINK_RE.findall("\n".join(window))
109101

110102

111103
def _first_nonempty_line(text: str) -> str:
@@ -205,7 +197,7 @@ def compare(self, symbol: str, v1: str, v2: str) -> CompareVersionsResult:
205197
new_in: str | None = None
206198
try:
207199
text_v2 = self._section_text(sym_v2.uri, sym_v2.anchor, v2)
208-
new_in = _extract_new_in(text_v2)
200+
new_in = _extract_version(_NEW_IN_RE, text_v2)
209201
except PageNotFoundError:
210202
new_in = None
211203
return CompareVersionsResult(
@@ -236,8 +228,8 @@ def compare(self, symbol: str, v1: str, v2: str) -> CompareVersionsResult:
236228
note=_PAGE_UNAVAILABLE_NOTE,
237229
)
238230

239-
changed_in = _extract_changed_in(text_v2)
240-
deprecated_in = _extract_deprecated_in(text_v2)
231+
changed_in = _extract_version(_CHANGED_IN_RE, text_v2)
232+
deprecated_in = _extract_version(_DEPRECATED_IN_RE, text_v2)
241233

242234
# signature_delta (M1, advisory): first non-empty line comparison.
243235
first_v1 = _first_nonempty_line(text_v1)

0 commit comments

Comments
 (0)