Skip to content

Commit 7ac01c3

Browse files
authored
Remove legacy search indicators logic and config (#166)
1 parent e2fbcf7 commit 7ac01c3

5 files changed

Lines changed: 32 additions & 351 deletions

File tree

packages/datacommons-mcp/.env.sample

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,16 +62,13 @@ DC_TYPE=base
6262
# =============================================================================
6363

6464
# Use these variables to run the server against non-prod (autopush, staging) or local instances
65-
# of the Data Commons API and Search endpoints.
65+
# of the Data Commons API.
6666
# When using a local instance, you may also need to use the
6767
# --skip-api-key-validation command-line flag if running without a DC_API_KEY.
6868

6969
# Root URL for a non-prod or local Data Commons API (mixer) instance
7070
# DC_API_ROOT=http://localhost:8081/v2
7171

72-
# Root URL for a non-prod or local Data Commons Search (website) instance
73-
# DC_SEARCH_ROOT=http://localhost:8080
74-
7572
# Root URL for Data Commons API key validation
7673
# Configure for non-prod environments
7774
# DC_API_KEY_VALIDATION_ROOT=https://api.datacommons.org

packages/datacommons-mcp/datacommons_mcp/clients.py

Lines changed: 14 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import re
2222
from pathlib import Path
2323

24-
import requests
2524
from datacommons_client.client import DataCommonsClient
2625

2726
from datacommons_mcp._constrained_vars import place_statvar_constraint_mapping
@@ -60,35 +59,22 @@ def __init__(
6059
self,
6160
dc: DataCommonsClient,
6261
search_scope: SearchScope = SearchScope.BASE_ONLY,
63-
base_index: str = "base_uae_mem",
64-
custom_index: str | None = None,
65-
sv_search_base_url: str = "https://datacommons.org",
6662
topic_store: TopicStore | None = None,
6763
_place_like_constraints: list[str] | None = None,
68-
*,
69-
use_search_indicators: bool = False,
7064
) -> None:
7165
"""
7266
Initialize the DCClient with a DataCommonsClient and search configuration.
7367
7468
Args:
7569
dc: DataCommonsClient instance
7670
search_scope: SearchScope enum controlling search behavior
77-
base_index: Index to use for base DC searches
78-
custom_index: Index to use for custom DC searches (None for base DC)
79-
sv_search_base_url: Base URL for SV search endpoint
8071
topic_store: Optional TopicStore for caching
8172
8273
# TODO(@jm-rivera): Remove this parameter once new endpoint is live.
8374
_place_like_constraints: Optional list of place-like constraints
8475
"""
8576
self.dc = dc
8677
self.search_scope = search_scope
87-
self.base_index = base_index
88-
self.custom_index = custom_index
89-
# Precompute search indices to validate configuration at instantiation time
90-
self.search_indices = self._compute_search_indices()
91-
self.sv_search_base_url = sv_search_base_url
9278
self.variable_cache = LruCache(128)
9379

9480
if topic_store is None:
@@ -100,31 +86,9 @@ def __init__(
10086
else:
10187
self._place_like_statvar_store = {}
10288

103-
self.use_search_indicators = use_search_indicators
104-
10589
#
10690
# Initialization & Configuration
10791
#
108-
def _compute_search_indices(self) -> list[str]:
109-
"""Compute and validate search indices based on the configured search_scope.
110-
111-
Raises a ValueError immediately for invalid configurations (e.g., CUSTOM_ONLY
112-
without a custom_index).
113-
"""
114-
indices: list[str] = []
115-
116-
if self.search_scope in [SearchScope.CUSTOM_ONLY, SearchScope.BASE_AND_CUSTOM]:
117-
if self.custom_index is not None and self.custom_index != "":
118-
indices.append(self.custom_index)
119-
elif self.search_scope == SearchScope.CUSTOM_ONLY:
120-
raise ValueError(
121-
"Custom index not configured but CUSTOM_ONLY search scope requested"
122-
)
123-
124-
if self.search_scope in [SearchScope.BASE_ONLY, SearchScope.BASE_AND_CUSTOM]:
125-
indices.append(self.base_index)
126-
127-
return indices
12892

12993
def _compute_place_like_statvar_store(self, constraints: list[str]) -> None:
13094
"""Compute and cache place-like to statistical variable mappings.
@@ -275,10 +239,6 @@ def _get_topic_places_with_data(
275239

276240
return places_with_data
277241

278-
#
279-
# New Search Indicators Endpoint (/api/nl/search-indicators)
280-
#
281-
282242
def _check_topic_exists_recursive(
283243
self, topic_dcid: str, place_dcids: list[str]
284244
) -> bool:
@@ -359,60 +319,6 @@ def _expand_topics_to_variables(
359319

360320
return list(expanded_variables.values())
361321

362-
async def _call_search_indicators_temp(
363-
self, queries: list[str], *, max_results: int = 10
364-
) -> dict:
365-
"""
366-
Temporary method that mirrors search_svs but calls the new search-indicators endpoint.
367-
368-
This method takes the same arguments and returns the same structure as search_svs,
369-
but uses the new /api/nl/search-indicators endpoint instead of /api/nl/search-vector.
370-
371-
This method is temporary to create a minimal delta between the two endpoints to minimize the impact of the change.
372-
After the 1.0 release, this method should be removed in favor of a more complete implementation.
373-
374-
Returns:
375-
Dictionary mapping query strings to lists of results with 'SV' and 'CosineScore' keys
376-
"""
377-
results_map = {}
378-
endpoint_url = f"{self.sv_search_base_url}/api/nl/search-indicators"
379-
headers = {"Content-Type": "application/json", **SURFACE_HEADER}
380-
381-
# Use precomputed indices based on configured search scope
382-
indices = self.search_indices
383-
384-
for query in queries:
385-
# Prepare parameters for the new endpoint
386-
params = {
387-
"queries": [query],
388-
"limit_per_index": max_results,
389-
"index": indices,
390-
}
391-
392-
try:
393-
response = await asyncio.to_thread(
394-
requests.get,
395-
endpoint_url,
396-
params=params,
397-
headers=headers, # noqa: S113
398-
)
399-
response.raise_for_status()
400-
api_response = response.json()
401-
402-
# Transform the response to match search_svs format
403-
transformed_results = self._transform_search_indicators_to_svs_format(
404-
api_response, max_results=max_results
405-
)
406-
results_map[query] = transformed_results
407-
408-
except Exception as e: # noqa: BLE001
409-
logger.error(
410-
"An unexpected error occurred for query '%s': %s", query, e
411-
)
412-
results_map[query] = []
413-
414-
return results_map
415-
416322
def _call_fetch_indicators(self, queries: list[str]) -> dict:
417323
"""
418324
Helper method to call the datacommons-client fetch_indicators and transform the response.
@@ -473,43 +379,6 @@ def _call_fetch_indicators(self, queries: list[str]) -> dict:
473379

474380
return results_map
475381

476-
def _transform_search_indicators_to_svs_format(
477-
self, api_response: dict, *, max_results: int = 10
478-
) -> list[dict]:
479-
"""
480-
Transform search-indicators response to match search_svs format.
481-
482-
Returns:
483-
List of dictionaries with 'SV' and 'CosineScore' keys
484-
"""
485-
results = []
486-
query_results = api_response.get("queryResults", [])
487-
488-
for query_result in query_results:
489-
for index_result in query_result.get("indexResults", []):
490-
for indicator in index_result.get("results", []):
491-
dcid = indicator.get("dcid")
492-
if not dcid:
493-
continue
494-
495-
# Extract score (default to 0.0 if not present)
496-
score = indicator.get("score", 0.0)
497-
498-
results.append(
499-
{
500-
"SV": dcid,
501-
"CosineScore": score,
502-
"description": indicator.get("description"),
503-
"alternate_descriptions": indicator.get(
504-
"search_descriptions"
505-
),
506-
}
507-
)
508-
509-
# Sort by score descending, then limit results
510-
results.sort(key=lambda x: x["CosineScore"], reverse=True)
511-
return results[:max_results]
512-
513382
async def fetch_indicators(
514383
self,
515384
query: str,
@@ -616,25 +485,24 @@ async def fetch_indicators(
616485
}
617486

618487
async def _search_vector(
619-
self, query: str, max_results: int = 10, *, include_topics: bool = True
488+
self,
489+
query: str,
490+
# TODO(keyurs): Use max_results once it's supported by the underlying client.
491+
# The noqa: ARG002 is to suppress the unused argument error.
492+
max_results: int = 10, # noqa: ARG002
493+
*,
494+
include_topics: bool = True,
620495
) -> dict:
621496
"""
622-
Search for topics and variables using the search-indicators or search-vector endpoint.
497+
Search for topics and variables using the fetch_indicators library method.
623498
"""
624499
# Always include topics since we need to expand topics to variables.
625-
if self.use_search_indicators:
626-
logger.info("Calling legacy search-indicators endpoint for: '%s'", query)
627-
search_results = await self._call_search_indicators_temp(
628-
queries=[query],
629-
max_results=max_results,
630-
)
631-
else:
632-
logger.info("Calling client library fetch_indicators for: '%s'", query)
633-
# Run the synchronous client method in a thread
634-
search_results = await asyncio.to_thread(
635-
self._call_fetch_indicators,
636-
queries=[query],
637-
)
500+
logger.info("Calling client library fetch_indicators for: '%s'", query)
501+
# Run the synchronous client method in a thread
502+
search_results = await asyncio.to_thread(
503+
self._call_fetch_indicators,
504+
queries=[query],
505+
)
638506

639507
results = search_results.get(query, [])
640508

@@ -839,19 +707,14 @@ def _create_base_dc_client(settings: BaseDCSettings) -> DCClient:
839707
}
840708
if settings.api_root:
841709
logger.info("Using API root for base DC: %s", settings.api_root)
842-
logger.info("Using search root for base DC: %s", settings.search_root)
843710
dc_client_args["url"] = settings.api_root
844711
dc = DataCommonsClient(**dc_client_args)
845712

846713
# Create DCClient
847714
return DCClient(
848715
dc=dc,
849716
search_scope=SearchScope.BASE_ONLY,
850-
base_index=settings.base_index,
851-
custom_index=None,
852-
sv_search_base_url=settings.search_root,
853717
topic_store=topic_store,
854-
use_search_indicators=settings.use_search_indicators,
855718
)
856719

857720

@@ -884,11 +747,7 @@ def _create_custom_dc_client(settings: CustomDCSettings) -> DCClient:
884747
return DCClient(
885748
dc=dc,
886749
search_scope=search_scope,
887-
base_index=settings.base_index,
888-
custom_index=settings.custom_index,
889-
sv_search_base_url=settings.custom_dc_url, # Use custom_dc_url as sv_search_base_url
890750
topic_store=topic_store,
891751
# TODO (@jm-rivera): Remove place-like parameter new search endpoint is live.
892752
_place_like_constraints=settings.place_like_constraints,
893-
use_search_indicators=settings.use_search_indicators,
894753
)

packages/datacommons-mcp/datacommons_mcp/data_models/settings.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,6 @@ class DCSettings(BaseSettings):
4747
default="", alias="DC_API_KEY", description="API key for Data Commons"
4848
)
4949

50-
use_search_indicators: bool = Field(
51-
default=False,
52-
alias="DC_USE_SEARCH_INDICATORS",
53-
description="Whether to use the legacy search-indicators endpoint (True) or the client library (False) for fetching indicators.",
54-
)
55-
5650
instructions_dir: str | None = Field(
5751
default=None,
5852
alias="DC_INSTRUCTIONS_DIR",
@@ -71,16 +65,6 @@ def __init__(self, **kwargs: dict[str, Any]) -> None:
7165
alias="DC_TYPE",
7266
description="Type of Data Commons (must be 'base')",
7367
)
74-
search_root: str = Field(
75-
default="https://datacommons.org",
76-
alias="DC_SEARCH_ROOT",
77-
description="Search base URL for base DC",
78-
)
79-
base_index: str = Field(
80-
default="base_uae_mem",
81-
alias="DC_BASE_INDEX",
82-
description="Search index for base DC",
83-
)
8468
topic_cache_paths: list[str] | None = Field(
8569
default=None,
8670
alias="DC_TOPIC_CACHE_PATHS",
@@ -130,16 +114,6 @@ def __init__(self, **kwargs: dict[str, Any]) -> None:
130114
alias="DC_SEARCH_SCOPE",
131115
description="Search scope for queries",
132116
)
133-
base_index: str = Field(
134-
default="medium_ft",
135-
alias="DC_BASE_INDEX",
136-
description="Search index for base DC",
137-
)
138-
custom_index: str = Field(
139-
default="user_all_minilm_mem",
140-
alias="DC_CUSTOM_INDEX",
141-
description="Search index for custom DC",
142-
)
143117
root_topic_dcids: list[str] | None = Field(
144118
default=None,
145119
alias="DC_ROOT_TOPIC_DCIDS",

0 commit comments

Comments
 (0)