|
| 1 | +"""Edge-case search quality tests — Korean/English crossover, typos, long queries. |
| 2 | +
|
| 3 | +Tests tricky scenarios that stress synonym expansion, fuzzy matching, |
| 4 | +spreading activation, and reinforcement ranking. |
| 5 | +""" |
| 6 | + |
| 7 | +from __future__ import annotations |
| 8 | + |
| 9 | +import pytest |
| 10 | + |
| 11 | +from synaptic.backends.memory import MemoryBackend |
| 12 | +from synaptic.graph import SynapticGraph |
| 13 | +from synaptic.models import NodeKind |
| 14 | + |
| 15 | +pytestmark = pytest.mark.qa |
| 16 | + |
| 17 | + |
| 18 | +class TestKoreanOnlyTerms: |
| 19 | + """Search for terms that only exist in Korean should find results.""" |
| 20 | + |
| 21 | + async def test_korean_compound_term(self, wiki_graph: SynapticGraph) -> None: |
| 22 | + """'관계형 데이터베이스' — Korean compound term should find relevant results.""" |
| 23 | + result = await wiki_graph.search("관계형 데이터베이스", limit=10) |
| 24 | + |
| 25 | + if not result.nodes: |
| 26 | + pytest.skip("No results — data may not contain relational DB articles") |
| 27 | + |
| 28 | + all_text = " ".join(f"{n.node.title} {n.node.content}" for n in result.nodes).lower() |
| 29 | + db_terms = ["데이터베이스", "관계형", "sql", "테이블", "rdbms", "database"] |
| 30 | + matches = [t for t in db_terms if t in all_text] |
| 31 | + assert len(matches) > 0, ( |
| 32 | + f"'관계형 데이터베이스' search returned no DB-related content. " |
| 33 | + f"Titles: {[n.node.title for n in result.nodes[:5]]}" |
| 34 | + ) |
| 35 | + |
| 36 | + async def test_korean_only_no_english_equivalent(self, wiki_graph: SynapticGraph) -> None: |
| 37 | + """Korean-only search for '운영 체제' (operating system).""" |
| 38 | + result = await wiki_graph.search("운영 체제", limit=10) |
| 39 | + |
| 40 | + if not result.nodes: |
| 41 | + pytest.skip("No results for '운영 체제'") |
| 42 | + |
| 43 | + all_text = " ".join(f"{n.node.title} {n.node.content}" for n in result.nodes).lower() |
| 44 | + os_terms = ["운영", "체제", "커널", "프로세스", "시스템"] |
| 45 | + matches = [t for t in os_terms if t in all_text] |
| 46 | + assert len(matches) > 0 |
| 47 | + |
| 48 | + |
| 49 | +class TestEnglishToKoreanSynonym: |
| 50 | + """English terms should find Korean articles via synonym expansion.""" |
| 51 | + |
| 52 | + async def test_machine_learning_finds_korean(self, wiki_graph: SynapticGraph) -> None: |
| 53 | + """'machine learning' should find Korean ML/AI articles via synonym expansion.""" |
| 54 | + result = await wiki_graph.search("machine learning", limit=10) |
| 55 | + |
| 56 | + # Synonym expansion should map 'learning' -> '학습', '훈련', etc. |
| 57 | + if not result.nodes: |
| 58 | + pytest.skip("No results for 'machine learning'") |
| 59 | + |
| 60 | + assert "synonym" in result.stages_used or len(result.nodes) > 0, ( |
| 61 | + "Expected synonym expansion to trigger for English query on Korean data" |
| 62 | + ) |
| 63 | + |
| 64 | + all_text = " ".join(f"{n.node.title} {n.node.content}" for n in result.nodes).lower() |
| 65 | + ml_terms = ["학습", "기계", "인공지능", "machine", "learning", "훈련", "신경망"] |
| 66 | + matches = [t for t in ml_terms if t in all_text] |
| 67 | + assert len(matches) > 0, ( |
| 68 | + f"'machine learning' found no ML-related Korean content. " |
| 69 | + f"Titles: {[n.node.title for n in result.nodes[:5]]}" |
| 70 | + ) |
| 71 | + |
| 72 | + async def test_database_finds_korean(self, wiki_graph: SynapticGraph) -> None: |
| 73 | + """'database' should find Korean DB articles via synonym group.""" |
| 74 | + result = await wiki_graph.search("database", limit=10) |
| 75 | + |
| 76 | + if not result.nodes: |
| 77 | + pytest.skip("No results for 'database'") |
| 78 | + |
| 79 | + all_text = " ".join(f"{n.node.title} {n.node.content}" for n in result.nodes).lower() |
| 80 | + assert any(t in all_text for t in ["데이터베이스", "database", "db", "sql"]) |
| 81 | + |
| 82 | + |
| 83 | +class TestTypoFuzzyMatch: |
| 84 | + """Fuzzy search should handle common Korean typos and spelling variants.""" |
| 85 | + |
| 86 | + async def test_korean_typo_variant(self, wiki_graph: SynapticGraph) -> None: |
| 87 | + """'데이타베이스' (old spelling) vs '데이터베이스' (standard) — fuzzy should catch.""" |
| 88 | + # Standard spelling |
| 89 | + result_standard = await wiki_graph.search("데이터베이스", limit=10) |
| 90 | + # Typo/variant spelling |
| 91 | + result_typo = await wiki_graph.search("데이타베이스", limit=10) |
| 92 | + |
| 93 | + if not result_standard.nodes: |
| 94 | + pytest.skip("No results for standard spelling") |
| 95 | + |
| 96 | + # Fuzzy search should find at least some results for the typo variant |
| 97 | + assert len(result_typo.nodes) > 0, ( |
| 98 | + "Fuzzy search found 0 results for '데이타베이스' — " |
| 99 | + f"standard '데이터베이스' found {len(result_standard.nodes)} results. " |
| 100 | + "Fuzzy matching may need improvement." |
| 101 | + ) |
| 102 | + |
| 103 | + # Check that fuzzy stage was used |
| 104 | + assert "fuzzy" in result_typo.stages_used |
| 105 | + |
| 106 | + async def test_english_typo(self, wiki_graph: SynapticGraph) -> None: |
| 107 | + """'Pytohn' (typo for Python) — fuzzy should still find Python articles.""" |
| 108 | + result = await wiki_graph.search("Pytohn", limit=10) |
| 109 | + |
| 110 | + # Fuzzy matching should catch this 1-char transposition |
| 111 | + if not result.nodes: |
| 112 | + pytest.skip("Fuzzy search could not recover 'Pytohn' typo") |
| 113 | + |
| 114 | + # At least verify fuzzy stage ran |
| 115 | + assert "fuzzy" in result.stages_used |
| 116 | + |
| 117 | + |
| 118 | +class TestLongQuery: |
| 119 | + """Very long queries should not crash or hang.""" |
| 120 | + |
| 121 | + async def test_long_query_no_crash(self, wiki_graph: SynapticGraph) -> None: |
| 122 | + """100+ character query should return without error.""" |
| 123 | + long_query = ( |
| 124 | + "인공지능과 머신러닝을 활용한 데이터베이스 최적화 방법론에 대한 " |
| 125 | + "심층적인 분석과 클라우드 컴퓨팅 환경에서의 성능 개선 전략 그리고 " |
| 126 | + "마이크로서비스 아키텍처에서의 분산 시스템 모니터링과 장애 복구 자동화" |
| 127 | + ) |
| 128 | + assert len(long_query) > 100 |
| 129 | + |
| 130 | + result = await wiki_graph.search(long_query, limit=10) |
| 131 | + |
| 132 | + # Should not crash, and should return a valid SearchResult |
| 133 | + assert result.query == long_query |
| 134 | + assert result.search_time_ms >= 0 |
| 135 | + assert isinstance(result.nodes, list) |
| 136 | + |
| 137 | + async def test_very_long_query_200_chars(self, wiki_graph: SynapticGraph) -> None: |
| 138 | + """200+ character query should also work.""" |
| 139 | + long_query = "프로그래밍 " * 50 # 300+ chars |
| 140 | + assert len(long_query) > 200 |
| 141 | + |
| 142 | + result = await wiki_graph.search(long_query.strip(), limit=5) |
| 143 | + assert isinstance(result.nodes, list) |
| 144 | + assert result.search_time_ms < 5000 # Should not hang |
| 145 | + |
| 146 | + |
| 147 | +class TestSpreadingActivation: |
| 148 | + """Linked nodes should surface via spreading activation.""" |
| 149 | + |
| 150 | + async def test_linked_node_surfaces_in_search(self) -> None: |
| 151 | + """Add 2 nodes, link them, search for one — spreading activation brings the other.""" |
| 152 | + backend = MemoryBackend() |
| 153 | + await backend.connect() |
| 154 | + graph = SynapticGraph(backend) |
| 155 | + |
| 156 | + # Node A: clearly about "quantum computing" |
| 157 | + node_a = await graph.add( |
| 158 | + title="양자 컴퓨팅 개론", |
| 159 | + content="양자 컴퓨터는 큐비트를 사용하여 계산을 수행하는 새로운 패러다임이다.", |
| 160 | + kind=NodeKind.CONCEPT, |
| 161 | + tags=["quantum"], |
| 162 | + ) |
| 163 | + |
| 164 | + # Node B: about "encryption" — not directly matching "양자 컴퓨팅" |
| 165 | + node_b = await graph.add( |
| 166 | + title="암호화 알고리즘", |
| 167 | + content="RSA와 AES를 비롯한 현대 암호화 기술의 원리를 설명한다.", |
| 168 | + kind=NodeKind.CONCEPT, |
| 169 | + tags=["encryption"], |
| 170 | + ) |
| 171 | + |
| 172 | + # Link them |
| 173 | + await graph.link(node_a.id, node_b.id) |
| 174 | + |
| 175 | + # Search for quantum computing — node_b should appear via spreading activation |
| 176 | + result = await graph.search("양자 컴퓨팅", limit=10) |
| 177 | + result_ids = [n.node.id for n in result.nodes] |
| 178 | + |
| 179 | + assert node_a.id in result_ids, "Primary node not found in search results" |
| 180 | + assert node_b.id in result_ids, ( |
| 181 | + f"Linked node not found via spreading activation. Got IDs: {result_ids}" |
| 182 | + ) |
| 183 | + |
| 184 | + await backend.close() |
| 185 | + |
| 186 | + async def test_spreading_activation_with_weight(self) -> None: |
| 187 | + """Higher edge weight should give higher activation to neighbor.""" |
| 188 | + backend = MemoryBackend() |
| 189 | + await backend.connect() |
| 190 | + graph = SynapticGraph(backend) |
| 191 | + |
| 192 | + node_a = await graph.add( |
| 193 | + title="메인 토픽", |
| 194 | + content="이것은 검색의 주요 대상이다.", |
| 195 | + kind=NodeKind.CONCEPT, |
| 196 | + ) |
| 197 | + node_b = await graph.add( |
| 198 | + title="강한 연결", |
| 199 | + content="전혀 다른 내용이지만 강하게 연결되어 있다.", |
| 200 | + kind=NodeKind.CONCEPT, |
| 201 | + ) |
| 202 | + node_c = await graph.add( |
| 203 | + title="약한 연결", |
| 204 | + content="역시 다른 내용이고 약하게 연결되어 있다.", |
| 205 | + kind=NodeKind.CONCEPT, |
| 206 | + ) |
| 207 | + |
| 208 | + await graph.link(node_a.id, node_b.id, weight=3.0) |
| 209 | + await graph.link(node_a.id, node_c.id, weight=0.2) |
| 210 | + |
| 211 | + result = await graph.search("메인 토픽", limit=10) |
| 212 | + result_map = {n.node.id: n for n in result.nodes} |
| 213 | + |
| 214 | + if node_b.id in result_map and node_c.id in result_map: |
| 215 | + # Strong link should have higher activation than weak link |
| 216 | + assert result_map[node_b.id].activation >= result_map[node_c.id].activation, ( |
| 217 | + f"Strong link activation ({result_map[node_b.id].activation:.3f}) " |
| 218 | + f"should >= weak link ({result_map[node_c.id].activation:.3f})" |
| 219 | + ) |
| 220 | + |
| 221 | + await backend.close() |
| 222 | + |
| 223 | + |
| 224 | +class TestReinforcementRanking: |
| 225 | + """Reinforced nodes should rank higher in search results.""" |
| 226 | + |
| 227 | + async def test_reinforcement_boosts_ranking(self) -> None: |
| 228 | + """Reinforce a node 10 times, then verify it ranks higher.""" |
| 229 | + backend = MemoryBackend() |
| 230 | + await backend.connect() |
| 231 | + graph = SynapticGraph(backend) |
| 232 | + |
| 233 | + # Create several nodes with similar content |
| 234 | + nodes = [] |
| 235 | + for i in range(5): |
| 236 | + node = await graph.add( |
| 237 | + title=f"소프트웨어 설계 원칙 {i + 1}", |
| 238 | + content=f"소프트웨어 공학에서 중요한 설계 원칙 번호 {i + 1}에 대한 설명.", |
| 239 | + kind=NodeKind.CONCEPT, |
| 240 | + tags=["설계", "소프트웨어"], |
| 241 | + ) |
| 242 | + nodes.append(node) |
| 243 | + |
| 244 | + # Initial search — get baseline ranking |
| 245 | + result_before = await graph.search("소프트웨어 설계", limit=5) |
| 246 | + assert len(result_before.nodes) >= 3, "Need at least 3 results for ranking test" |
| 247 | + |
| 248 | + # Pick the LAST result (lowest ranked) |
| 249 | + target = result_before.nodes[-1] |
| 250 | + target_id = target.node.id |
| 251 | + initial_rank = len(result_before.nodes) - 1 |
| 252 | + |
| 253 | + # Reinforce 10 times |
| 254 | + for _ in range(10): |
| 255 | + await graph.reinforce([target_id], success=True) |
| 256 | + |
| 257 | + # Search again |
| 258 | + result_after = await graph.search("소프트웨어 설계", limit=5) |
| 259 | + new_rank = next( |
| 260 | + (i for i, n in enumerate(result_after.nodes) if n.node.id == target_id), |
| 261 | + len(result_after.nodes), |
| 262 | + ) |
| 263 | + |
| 264 | + # Should have improved ranking (lower index = higher rank) |
| 265 | + assert new_rank < initial_rank, ( |
| 266 | + f"After 10 reinforcements, rank should improve: was #{initial_rank}, now #{new_rank}" |
| 267 | + ) |
| 268 | + |
| 269 | + # Also verify resonance increased |
| 270 | + target_after = next((n for n in result_after.nodes if n.node.id == target_id), None) |
| 271 | + assert target_after is not None |
| 272 | + assert target_after.resonance > target.resonance, ( |
| 273 | + f"Resonance should increase after reinforcement: " |
| 274 | + f"was {target.resonance:.3f}, now {target_after.resonance:.3f}" |
| 275 | + ) |
| 276 | + |
| 277 | + await backend.close() |
| 278 | + |
| 279 | + async def test_unreinforced_vs_reinforced_ordering(self) -> None: |
| 280 | + """Two identical nodes — reinforced one should rank higher.""" |
| 281 | + backend = MemoryBackend() |
| 282 | + await backend.connect() |
| 283 | + graph = SynapticGraph(backend) |
| 284 | + |
| 285 | + node_plain = await graph.add( |
| 286 | + title="데이터 분석 기법", |
| 287 | + content="데이터 분석과 통계적 방법론에 대한 설명이다.", |
| 288 | + kind=NodeKind.CONCEPT, |
| 289 | + ) |
| 290 | + node_reinforced = await graph.add( |
| 291 | + title="데이터 분석 방법", |
| 292 | + content="데이터 분석과 통계적 접근법에 대한 설명이다.", |
| 293 | + kind=NodeKind.CONCEPT, |
| 294 | + ) |
| 295 | + |
| 296 | + # Reinforce one node heavily |
| 297 | + for _ in range(10): |
| 298 | + await graph.reinforce([node_reinforced.id], success=True) |
| 299 | + |
| 300 | + result = await graph.search("데이터 분석", limit=5) |
| 301 | + result_ids = [n.node.id for n in result.nodes] |
| 302 | + |
| 303 | + assert node_reinforced.id in result_ids, "Reinforced node should appear in results" |
| 304 | + assert node_plain.id in result_ids, "Plain node should also appear in results" |
| 305 | + |
| 306 | + rank_reinforced = result_ids.index(node_reinforced.id) |
| 307 | + rank_plain = result_ids.index(node_plain.id) |
| 308 | + assert rank_reinforced < rank_plain, ( |
| 309 | + f"Reinforced node (rank {rank_reinforced}) should rank higher " |
| 310 | + f"than plain node (rank {rank_plain})" |
| 311 | + ) |
| 312 | + |
| 313 | + await backend.close() |
0 commit comments