Skip to content

Commit fea7c01

Browse files
authored
Revert "feat(retrieve): use tags metadata for cross-subtree retrieval (#1162)" (#1200)
This reverts commit e72b614.
1 parent a566fe2 commit fea7c01

32 files changed

Lines changed: 32 additions & 1293 deletions

openviking/async_client.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,6 @@ async def add_resource(
215215
build_index: bool = True,
216216
summarize: bool = False,
217217
watch_interval: float = 0,
218-
tags: Optional[Union[str, List[str]]] = None,
219218
telemetry: TelemetryRequest = False,
220219
**kwargs,
221220
) -> Dict[str, Any]:
@@ -248,7 +247,6 @@ async def add_resource(
248247
timeout=timeout,
249248
build_index=build_index,
250249
summarize=summarize,
251-
tags=tags,
252250
telemetry=telemetry,
253251
watch_interval=watch_interval,
254252
**kwargs,
@@ -315,7 +313,6 @@ async def search(
315313
limit: int = 10,
316314
score_threshold: Optional[float] = None,
317315
filter: Optional[Dict] = None,
318-
tags: Optional[Union[str, List[str]]] = None,
319316
telemetry: TelemetryRequest = False,
320317
):
321318
"""
@@ -341,7 +338,6 @@ async def search(
341338
limit=limit,
342339
score_threshold=score_threshold,
343340
filter=filter,
344-
tags=tags,
345341
telemetry=telemetry,
346342
)
347343

@@ -352,7 +348,6 @@ async def find(
352348
limit: int = 10,
353349
score_threshold: Optional[float] = None,
354350
filter: Optional[Dict] = None,
355-
tags: Optional[Union[str, List[str]]] = None,
356351
telemetry: TelemetryRequest = False,
357352
):
358353
"""Semantic search"""
@@ -363,7 +358,6 @@ async def find(
363358
limit=limit,
364359
score_threshold=score_threshold,
365360
filter=filter,
366-
tags=tags,
367361
telemetry=telemetry,
368362
)
369363

openviking/client/local.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
attach_telemetry_payload,
1515
run_with_telemetry,
1616
)
17-
from openviking.utils.tag_utils import canonicalize_user_tags, expand_query_tags
1817
from openviking_cli.client.base import BaseClient
1918
from openviking_cli.session.user_id import UserIdentifier
2019
from openviking_cli.utils import run_async
@@ -84,7 +83,6 @@ async def add_resource(
8483
summarize: bool = False,
8584
telemetry: TelemetryRequest = False,
8685
watch_interval: float = 0,
87-
tags: Optional[Union[str, List[str]]] = None,
8886
**kwargs,
8987
) -> Dict[str, Any]:
9088
"""Add resource to OpenViking."""
@@ -106,7 +104,6 @@ async def add_resource(
106104
build_index=build_index,
107105
summarize=summarize,
108106
watch_interval=watch_interval,
109-
tags=canonicalize_user_tags(tags),
110107
**kwargs,
111108
),
112109
)
@@ -266,7 +263,6 @@ async def find(
266263
limit: int = 10,
267264
score_threshold: Optional[float] = None,
268265
filter: Optional[Dict[str, Any]] = None,
269-
tags: Optional[Union[str, List[str]]] = None,
270266
telemetry: TelemetryRequest = False,
271267
) -> Any:
272268
"""Semantic search without session context."""
@@ -280,7 +276,6 @@ async def find(
280276
limit=limit,
281277
score_threshold=score_threshold,
282278
filter=filter,
283-
tags=expand_query_tags(tags),
284279
),
285280
)
286281
return attach_telemetry_payload(
@@ -296,7 +291,6 @@ async def search(
296291
limit: int = 10,
297292
score_threshold: Optional[float] = None,
298293
filter: Optional[Dict[str, Any]] = None,
299-
tags: Optional[Union[str, List[str]]] = None,
300294
telemetry: TelemetryRequest = False,
301295
) -> Any:
302296
"""Semantic search with optional session context."""
@@ -314,7 +308,6 @@ async def _search():
314308
limit=limit,
315309
score_threshold=score_threshold,
316310
filter=filter,
317-
tags=expand_query_tags(tags),
318311
)
319312

320313
execution = await run_with_telemetry(

openviking/core/context.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from typing import Any, Dict, List, Optional
88
from uuid import uuid4
99

10-
from openviking.utils.tag_utils import parse_tags
1110
from openviking.utils.time_utils import format_iso8601, parse_iso_datetime
1211
from openviking_cli.session.user_id import UserIdentifier
1312
from openviking_cli.utils.uri import VikingURI
@@ -68,7 +67,6 @@ def __init__(
6867
active_count: int = 0,
6968
related_uri: Optional[List[str]] = None,
7069
meta: Optional[Dict[str, Any]] = None,
71-
tags: Optional[List[str] | str] = None,
7270
level: int | ContextLevel | None = None,
7371
session_id: Optional[str] = None,
7472
user: Optional[UserIdentifier] = None,
@@ -92,9 +90,6 @@ def __init__(
9290
self.active_count = active_count
9391
self.related_uri = related_uri or []
9492
self.meta = meta or {}
95-
self.tags = parse_tags(tags if tags is not None else self.meta.get("tags"))
96-
if self.tags and "tags" not in self.meta:
97-
self.meta["tags"] = list(self.tags)
9893
try:
9994
self.level = int(level) if level is not None else None
10095
except (TypeError, ValueError):
@@ -177,7 +172,6 @@ def to_dict(self) -> Dict[str, Any]:
177172
"active_count": self.active_count,
178173
"vector": self.vector,
179174
"meta": self.meta,
180-
"tags": self.tags,
181175
"related_uri": self.related_uri,
182176
"session_id": self.session_id,
183177
"account_id": self.account_id,
@@ -231,13 +225,6 @@ def from_dict(cls, data: Dict[str, Any]) -> "Context":
231225
active_count=data.get("active_count", 0),
232226
related_uri=data.get("related_uri", []),
233227
meta=data.get("meta", {}),
234-
tags=(
235-
data.get("tags")
236-
if data.get("tags") is not None
237-
else data.get("meta", {}).get("tags")
238-
if isinstance(data.get("meta"), dict)
239-
else None
240-
),
241228
level=(
242229
data.get("level")
243230
if data.get("level") is not None

openviking/retrieve/hierarchical_retriever.py

Lines changed: 0 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
from openviking.storage import VikingDBManager, VikingDBManagerProxy
2323
from openviking.storage.viking_fs import get_viking_fs
2424
from openviking.telemetry import get_current_telemetry
25-
from openviking.utils.tag_utils import expand_query_tags, merge_tags, parse_tags
2625
from openviking.utils.time_utils import parse_iso_datetime
2726
from openviking_cli.retrieve.types import (
2827
ContextType,
@@ -51,9 +50,6 @@ class HierarchicalRetriever:
5150
DIRECTORY_DOMINANCE_RATIO = 1.2 # Directory score must exceed max child score
5251
GLOBAL_SEARCH_TOPK = 10 # Global retrieval count (more candidates = better rerank precision)
5352
HOTNESS_ALPHA = 0.2 # Weight for hotness score in final ranking (0 = disabled)
54-
MAX_TAG_EXPANSION_TAGS = 8 # Upper bound on expansion tags collected per query.
55-
TAG_EXPANSION_LIMIT = 8 # Upper bound on extra nodes discovered via tags.
56-
TAG_EXPANSION_SCORE = 0.15 # Lower seed score for tag-expanded nodes.
5753
LEVEL_URI_SUFFIX = {0: ".abstract.md", 1: ".overview.md"}
5854

5955
def __init__(
@@ -117,7 +113,6 @@ async def retrieve(
117113
vector_proxy = VikingDBManagerProxy(self.vector_store, ctx)
118114

119115
target_dirs = [d for d in (query.target_directories or []) if d]
120-
query_tags = expand_query_tags(query.tags)
121116

122117
if not await vector_proxy.collection_exists_bound():
123118
logger.warning(
@@ -151,7 +146,6 @@ async def retrieve(
151146
sparse_query_vector=sparse_query_vector,
152147
context_type=query.context_type.value if query.context_type else None,
153148
target_dirs=target_dirs,
154-
tags=query_tags,
155149
scope_dsl=scope_dsl,
156150
limit=max(limit, self.GLOBAL_SEARCH_TOPK),
157151
)
@@ -173,22 +167,11 @@ async def retrieve(
173167
f" [{i}] URI: {uri}, score: {score:.4f}, level: {level}, account_id: {account_id}"
174168
)
175169

176-
expanded_points, expanded_candidates = await self._expand_starting_points_by_tags(
177-
vector_proxy=vector_proxy,
178-
global_results=global_results,
179-
explicit_tags=query_tags,
180-
context_type=query.context_type.value if query.context_type else None,
181-
target_dirs=target_dirs,
182-
scope_dsl=scope_dsl,
183-
limit=self.TAG_EXPANSION_LIMIT,
184-
)
185-
186170
# Step 3: Merge starting points
187171
starting_points = self._merge_starting_points(
188172
query.query,
189173
root_uris,
190174
global_results,
191-
extra_points=expanded_points,
192175
mode=mode,
193176
)
194177

@@ -200,10 +183,6 @@ async def retrieve(
200183
initial_candidates,
201184
mode=mode,
202185
)
203-
initial_candidates = self._merge_initial_candidates(
204-
initial_candidates,
205-
expanded_candidates,
206-
)
207186

208187
# Step 4: Recursive search
209188
candidates = await self._recursive_search(
@@ -250,7 +229,6 @@ async def _global_vector_search(
250229
sparse_query_vector: Optional[Dict[str, float]],
251230
context_type: Optional[str],
252231
target_dirs: List[str],
253-
tags: List[str],
254232
scope_dsl: Optional[Dict[str, Any]],
255233
limit: int,
256234
) -> List[Dict[str, Any]]:
@@ -260,7 +238,6 @@ async def _global_vector_search(
260238
sparse_query_vector=sparse_query_vector,
261239
context_type=context_type,
262240
target_directories=target_dirs,
263-
tags=tags,
264241
extra_filter=scope_dsl,
265242
limit=limit,
266243
)
@@ -307,7 +284,6 @@ def _merge_starting_points(
307284
query: str,
308285
root_uris: List[str],
309286
global_results: List[Dict[str, Any]],
310-
extra_points: Optional[List[Tuple[str, float]]] = None,
311287
mode: str = "thinking",
312288
) -> List[Tuple[str, float]]:
313289
"""Merge starting points.
@@ -344,111 +320,8 @@ def _merge_starting_points(
344320
points.append((uri, 0.0))
345321
seen.add(uri)
346322

347-
for uri, score in extra_points or []:
348-
if uri not in seen:
349-
points.append((uri, score))
350-
seen.add(uri)
351-
352323
return points
353324

354-
def _merge_initial_candidates(
355-
self,
356-
*candidate_groups: Optional[List[Dict[str, Any]]],
357-
) -> List[Dict[str, Any]]:
358-
merged: Dict[str, Dict[str, Any]] = {}
359-
for group in candidate_groups:
360-
for candidate in group or []:
361-
uri = candidate.get("uri", "")
362-
if not uri:
363-
continue
364-
previous = merged.get(uri)
365-
if previous is None or candidate.get("_score", 0.0) > previous.get("_score", 0.0):
366-
merged[uri] = candidate
367-
return sorted(merged.values(), key=lambda item: item.get("_score", 0.0), reverse=True)
368-
369-
async def _expand_starting_points_by_tags(
370-
self,
371-
vector_proxy: VikingDBManagerProxy,
372-
global_results: List[Dict[str, Any]],
373-
explicit_tags: List[str],
374-
context_type: Optional[str],
375-
target_dirs: List[str],
376-
scope_dsl: Optional[Dict[str, Any]],
377-
limit: int,
378-
) -> Tuple[List[Tuple[str, float]], List[Dict[str, Any]]]:
379-
expansion_tags = self._collect_expansion_tags(global_results, explicit_tags)
380-
if not expansion_tags or limit <= 0:
381-
return [], []
382-
383-
tag_matches = await vector_proxy.search_by_tags_in_tenant(
384-
tags=expansion_tags,
385-
context_type=context_type,
386-
target_directories=target_dirs,
387-
extra_filter=scope_dsl,
388-
levels=[0, 1, 2],
389-
limit=limit,
390-
)
391-
telemetry = get_current_telemetry()
392-
telemetry.count("retrieval.tag_expansion.tags", len(expansion_tags))
393-
telemetry.count("retrieval.tag_expansion.matches", len(tag_matches))
394-
395-
seen_uris = {result.get("uri", "") for result in global_results}
396-
expansion_points: Dict[str, float] = {}
397-
expansion_candidates: Dict[str, Dict[str, Any]] = {}
398-
expansion_tag_set = set(expansion_tags)
399-
400-
for match in tag_matches:
401-
uri = match.get("uri", "")
402-
if not uri or uri in seen_uris:
403-
continue
404-
405-
overlap = expansion_tag_set.intersection(parse_tags(match.get("tags")))
406-
score = self._score_tag_expansion(len(overlap))
407-
408-
if match.get("level", 2) == 2:
409-
candidate = dict(match)
410-
candidate["_score"] = score
411-
previous = expansion_candidates.get(uri)
412-
if previous is None or score > previous.get("_score", 0.0):
413-
expansion_candidates[uri] = candidate
414-
415-
start_uri = self._start_uri_from_record(match)
416-
if start_uri and score > expansion_points.get(start_uri, 0.0):
417-
expansion_points[start_uri] = score
418-
419-
return list(expansion_points.items()), list(expansion_candidates.values())
420-
421-
def _collect_expansion_tags(
422-
self,
423-
global_results: List[Dict[str, Any]],
424-
explicit_tags: List[str],
425-
) -> List[str]:
426-
collected = [explicit_tags]
427-
for result in global_results:
428-
collected.append(parse_tags(result.get("tags")))
429-
return merge_tags(*collected, max_tags=self.MAX_TAG_EXPANSION_TAGS)
430-
431-
def _score_tag_expansion(self, overlap_count: int) -> float:
432-
if overlap_count <= 1:
433-
return self.TAG_EXPANSION_SCORE
434-
return self.TAG_EXPANSION_SCORE * (1.0 + 0.2 * min(overlap_count - 1, 3))
435-
436-
def _start_uri_from_record(self, record: Dict[str, Any]) -> str:
437-
uri = record.get("uri", "")
438-
if not uri:
439-
return ""
440-
if record.get("level", 2) != 2:
441-
return uri
442-
443-
parent_uri = record.get("parent_uri")
444-
if parent_uri:
445-
return parent_uri
446-
447-
normalized = uri.rstrip("/")
448-
if "/" not in normalized:
449-
return ""
450-
return normalized.rsplit("/", 1)[0]
451-
452325
def _prepare_initial_candidates(
453326
self,
454327
query: str,

openviking/server/routers/resources.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
from openviking.server.models import Response
2121
from openviking.server.telemetry import run_operation
2222
from openviking.telemetry import TelemetryRequest
23-
from openviking.utils.tag_utils import canonicalize_user_tags
2423
from openviking_cli.exceptions import InvalidArgumentError
2524
from openviking_cli.utils.config.open_viking_config import get_openviking_config
2625

@@ -51,7 +50,6 @@ class AddResourceRequest(BaseModel):
5150
exclude: Glob pattern for files to exclude during parsing.
5251
directly_upload_media: Whether to directly upload media files. Default is True.
5352
preserve_structure: Whether to preserve directory structure when adding directories.
54-
tags: Optional semicolon-delimited tags to persist on indexed contexts.
5553
watch_interval: Watch interval in minutes for automatic resource monitoring.
5654
- watch_interval > 0: Creates or updates a watch task. The resource will be
5755
automatically re-processed at the specified interval.
@@ -82,7 +80,6 @@ class AddResourceRequest(BaseModel):
8280
exclude: Optional[str] = None
8381
directly_upload_media: bool = True
8482
preserve_structure: Optional[bool] = None
85-
tags: Optional[str] = None
8683
telemetry: TelemetryRequest = False
8784
watch_interval: float = 0
8885

@@ -216,7 +213,6 @@ async def add_resource(
216213
instruction=request.instruction,
217214
wait=request.wait,
218215
timeout=request.timeout,
219-
tags=canonicalize_user_tags(request.tags),
220216
allow_local_path_resolution=allow_local_path_resolution,
221217
**kwargs,
222218
),

0 commit comments

Comments
 (0)