Skip to content

Commit 313df3f

Browse files
committed
refactor: replace VfbConnect with SolrTermInfoFetcher for term info retrieval
1 parent 361dcf1 commit 313df3f

2 files changed

Lines changed: 75 additions & 2 deletions

File tree

src/test/term_info_queries_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
import unittest
22
import time
33
from src.vfbquery.term_info_queries import deserialize_term_info, deserialize_term_info_from_dict, process
4-
from vfb_connect.cross_server_tools import VfbConnect
4+
from src.vfbquery.solr_fetcher import SolrTermInfoFetcher
55

66

77
class TermInfoQueriesTest(unittest.TestCase):
88

99
def setUp(self):
10-
self.vc = VfbConnect()
10+
self.vc = SolrTermInfoFetcher()
1111
self.variable = TestVariable("my_id", "my_name")
1212

1313
def test_term_info_deserialization(self):

src/vfbquery/solr_fetcher.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import requests
2+
import json
3+
import logging
4+
from typing import List, Dict, Any, Optional
5+
6+
class SolrTermInfoFetcher:
7+
"""Fetches term information directly from the Solr server instead of using VfbConnect"""
8+
9+
def __init__(self, solr_url: str = "https://solr.virtualflybrain.org/solr/vfb_json"):
10+
"""Initialize with the Solr server URL"""
11+
self.solr_url = solr_url
12+
self.logger = logging.getLogger(__name__)
13+
14+
def get_TermInfo(self, short_forms: List[str],
15+
return_dataframe: bool = False,
16+
summary: bool = False) -> List[Dict[str, Any]]:
17+
"""
18+
Fetch term info from Solr directly, mimicking VFBconnect's interface
19+
20+
Args:
21+
short_forms: List of term IDs to fetch
22+
return_dataframe: If True, return as pandas DataFrame (not fully implemented)
23+
summary: If True, return summarized version
24+
25+
Returns:
26+
List of term info dictionaries
27+
"""
28+
results = []
29+
30+
for short_form in short_forms:
31+
try:
32+
url = f"{self.solr_url}/select"
33+
params = {
34+
"indent": "true",
35+
"fl": "term_info",
36+
"q.op": "OR",
37+
"q": f"id:{short_form}"
38+
}
39+
40+
self.logger.debug(f"Querying Solr for {short_form}")
41+
response = requests.get(url, params=params)
42+
response.raise_for_status()
43+
44+
data = response.json()
45+
docs = data.get("response", {}).get("docs", [])
46+
47+
if not docs:
48+
self.logger.warning(f"No results found for {short_form}")
49+
continue
50+
51+
if "term_info" not in docs[0] or not docs[0]["term_info"]:
52+
self.logger.warning(f"No term_info found for {short_form}")
53+
continue
54+
55+
# Extract and parse the term_info string which is itself JSON
56+
term_info_str = docs[0]["term_info"][0]
57+
# No need to handle escapes - json.loads does that automatically
58+
term_info_obj = json.loads(term_info_str)
59+
results.append(term_info_obj)
60+
61+
except requests.RequestException as e:
62+
self.logger.error(f"Error fetching data from Solr: {e}")
63+
except json.JSONDecodeError as e:
64+
self.logger.error(f"Error decoding JSON for {short_form}: {e}")
65+
except Exception as e:
66+
self.logger.error(f"Unexpected error for {short_form}: {e}")
67+
68+
# Handle dataframe conversion if needed (this would need to be implemented)
69+
if return_dataframe:
70+
self.logger.warning("return_dataframe=True not fully implemented")
71+
# You would need to implement pandas DataFrame conversion logic here
72+
73+
return results

0 commit comments

Comments
 (0)