@@ -86,20 +86,35 @@ def classify_query(
8686 return "fts"
8787
8888
89+ def _build_concept_patterns (
90+ synonyms : dict [str , list [str ]],
91+ ) -> dict [str , re .Pattern [str ]]:
92+ """Pre-compile word-boundary regex patterns for multi-word synonym concepts."""
93+ patterns : dict [str , re .Pattern [str ]] = {}
94+ for concept in synonyms :
95+ if " " in concept :
96+ patterns [concept ] = re .compile (r"\b" + re .escape (concept ) + r"\b" )
97+ return patterns
98+
99+
89100def expand_synonyms (
90101 query : str ,
91102 synonyms : dict [str , list [str ]],
103+ * ,
104+ _concept_patterns : dict [str , re .Pattern [str ]] | None = None ,
92105) -> set [str ]:
93106 """Expand query using synonym table for concept search (RETR-05).
94107
95- Checks each token and the full query against the synonym table.
96- Returns a set of terms including the original tokens plus any
97- expansion values .
108+ Checks each token and multi-word phrases against the synonym table.
109+ Single-word concepts use exact token matching. Multi-word concepts
110+ use word-boundary regex to avoid substring false positives (CR-01) .
98111
99112 Args:
100113 query: User search query.
101114 synonyms: Mapping of concept -> list of expansion terms.
102115 Loaded from synonyms.yaml at startup.
116+ _concept_patterns: Pre-compiled regex patterns for multi-word
117+ concepts. Built lazily on first call if not provided.
103118
104119 Returns:
105120 Set of terms (original + expansions). Empty set if query
@@ -112,15 +127,20 @@ def expand_synonyms(
112127 tokens = query .lower ().split ()
113128 expanded = set (tokens )
114129
115- # Check individual tokens against synonym keys
130+ # Single-word concepts: exact token match (no substring false positives)
116131 for token in tokens :
117132 if token in synonyms :
118133 expanded .update (synonyms [token ])
119134
120- # Check multi-word concepts (e.g., "http requests", "file io")
135+ # Multi-word concepts: word-boundary regex match (CR-01 fix)
136+ if _concept_patterns is None :
137+ _concept_patterns = _build_concept_patterns (synonyms )
121138 query_lower = query .lower ()
122139 for concept , expansions in synonyms .items ():
123- if concept in query_lower :
140+ if " " not in concept :
141+ continue
142+ pattern = _concept_patterns .get (concept )
143+ if pattern and pattern .search (query_lower ):
124144 expanded .update (expansions )
125145
126146 return expanded
@@ -129,6 +149,8 @@ def expand_synonyms(
129149def build_match_expression (
130150 query : str ,
131151 synonyms : dict [str , list [str ]],
152+ * ,
153+ _concept_patterns : dict [str , re .Pattern [str ]] | None = None ,
132154) -> str :
133155 """Build a complete FTS5 MATCH expression with synonym expansion.
134156
@@ -138,6 +160,7 @@ def build_match_expression(
138160 Args:
139161 query: User search query.
140162 synonyms: Synonym mapping for expansion.
163+ _concept_patterns: Pre-compiled regex patterns for multi-word concepts.
141164
142165 Returns:
143166 FTS5-safe MATCH expression string.
@@ -146,7 +169,7 @@ def build_match_expression(
146169 if not query :
147170 return '""'
148171
149- expanded = expand_synonyms (query , synonyms )
172+ expanded = expand_synonyms (query , synonyms , _concept_patterns = _concept_patterns )
150173 original_tokens = set (query .lower ().split ())
151174
152175 # If expansion added new terms, OR-join all terms
0 commit comments