Skip to content

Commit 09583f2

Browse files
committed
Enhance caching logic in SolrResultCache and implement SimpleVFBConnect client to replace VFBConnect dependency
1 parent 9b103eb commit 09583f2

3 files changed

Lines changed: 305 additions & 7 deletions

File tree

src/vfbquery/owlery_client.py

Lines changed: 288 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,288 @@
1+
"""
2+
Simple Owlery REST API client to replace VFBConnect dependency.
3+
4+
This module provides direct HTTP access to the Owlery OWL reasoning service,
5+
eliminating the need for vfb_connect which has problematic GUI dependencies.
6+
"""
7+
8+
import requests
9+
import json
10+
import pandas as pd
11+
import re
12+
from urllib.parse import quote
13+
from typing import List, Optional, Dict, Any, Union
14+
15+
16+
def short_form_to_iri(short_form: str) -> str:
17+
"""
18+
Convert a short form (e.g., 'FBbt_00003748') to full IRI.
19+
20+
:param short_form: Short form like 'FBbt_00003748'
21+
:return: Full IRI like 'http://purl.obolibrary.org/obo/FBbt_00003748'
22+
"""
23+
# OBO library IRIs use underscores in the ID
24+
return f"http://purl.obolibrary.org/obo/{short_form}"
25+
26+
27+
def gen_short_form(iri: str) -> str:
28+
"""
29+
Generate short_form from an IRI string (VFBConnect compatible).
30+
Splits by '/' or '#' and takes the last part.
31+
32+
:param iri: An IRI string
33+
:return: short_form
34+
"""
35+
return re.split('/|#', iri)[-1]
36+
37+
38+
class OwleryClient:
39+
"""
40+
Simple client for Owlery OWL reasoning service.
41+
42+
Provides minimal interface matching VFBConnect's OWLeryConnect functionality
43+
for subclass queries needed by VFBquery.
44+
"""
45+
46+
def __init__(self, owlery_endpoint: str = "http://owl.virtualflybrain.org/kbs/vfb"):
47+
"""
48+
Initialize Owlery client.
49+
50+
:param owlery_endpoint: Base URL for Owlery service (default: VFB public instance)
51+
"""
52+
self.owlery_endpoint = owlery_endpoint.rstrip('/')
53+
54+
def get_subclasses(self, query: str, query_by_label: bool = True,
55+
verbose: bool = False, prefixes: bool = False, direct: bool = False) -> List[str]:
56+
"""
57+
Query Owlery for subclasses matching an OWL class expression.
58+
59+
This replicates the VFBConnect OWLeryConnect.get_subclasses() method.
60+
Based on: https://github.com/VirtualFlyBrain/VFB_connect/blob/master/src/vfb_connect/owl/owlery_query_tools.py
61+
62+
:param query: OWL class expression query string (with short forms like '<FBbt_00003748>')
63+
:param query_by_label: If True, query uses label syntax (quotes).
64+
If False, uses IRI syntax (angle brackets).
65+
:param verbose: If True, print debug information
66+
:param prefixes: If True, return full IRIs. If False, return short forms.
67+
:param direct: Return direct subclasses only. Default False.
68+
:return: List of class IDs (short forms like 'FBbt_00003748')
69+
"""
70+
try:
71+
# Convert short forms in query to full IRIs
72+
# Pattern: <FBbt_00003748> -> <http://purl.obolibrary.org/obo/FBbt_00003748>
73+
# Match angle brackets with content that looks like a short form (alphanumeric + underscore)
74+
import re
75+
def convert_short_form_to_iri(match):
76+
short_form = match.group(1) # Extract content between < >
77+
# Only convert if it looks like a short form (contains underscore, no slashes)
78+
if '_' in short_form and '/' not in short_form:
79+
return f"<{short_form_to_iri(short_form)}>"
80+
else:
81+
# Already an IRI or other syntax, leave as-is
82+
return match.group(0)
83+
84+
# Replace all <SHORT_FORM> patterns with <FULL_IRI>
85+
iri_query = re.sub(r'<([^>]+)>', convert_short_form_to_iri, query)
86+
87+
if verbose:
88+
print(f"Original query: {query}")
89+
print(f"IRI query: {iri_query}")
90+
91+
# Build Owlery subclasses endpoint URL
92+
# Based on VFBConnect's query() method
93+
params = {
94+
'object': iri_query,
95+
'prefixes': json.dumps({
96+
"FBbt": "http://purl.obolibrary.org/obo/FBbt_",
97+
"RO": "http://purl.obolibrary.org/obo/RO_",
98+
"BFO": "http://purl.obolibrary.org/obo/BFO_"
99+
})
100+
}
101+
if direct:
102+
params['direct'] = 'False' # Note: Owlery expects string 'False', not boolean
103+
104+
# Make HTTP GET request with longer timeout for complex queries
105+
response = requests.get(
106+
f"{self.owlery_endpoint}/subclasses",
107+
params=params,
108+
timeout=120
109+
)
110+
111+
if verbose:
112+
print(f"Owlery query: {response.url}")
113+
114+
response.raise_for_status()
115+
116+
# Parse JSON response
117+
# Owlery returns: {"superClassOf": ["IRI1", "IRI2", ...]}
118+
# Based on VFBConnect: return_type='superClassOf' for subclasses
119+
data = response.json()
120+
121+
if verbose:
122+
print(f"Response keys: {data.keys() if isinstance(data, dict) else 'not a dict'}")
123+
124+
# Extract IRIs from response using VFBConnect's key
125+
iris = []
126+
if isinstance(data, dict) and 'superClassOf' in data:
127+
iris = data['superClassOf']
128+
elif isinstance(data, list):
129+
# Fallback: simple list response
130+
iris = data
131+
else:
132+
if verbose:
133+
print(f"Unexpected Owlery response format: {type(data)}")
134+
print(f"Response: {data}")
135+
return []
136+
137+
if not isinstance(iris, list):
138+
if verbose:
139+
print(f"Warning: No results! This is likely due to a query error")
140+
print(f"Query: {query}")
141+
return []
142+
143+
# Convert IRIs to short forms using gen_short_form logic from VFBConnect
144+
# gen_short_form splits by '/' or '#' and takes the last part
145+
import re
146+
def gen_short_form(iri):
147+
"""Generate short_form from an IRI string (VFBConnect compatible)"""
148+
return re.split('/|#', iri)[-1]
149+
150+
short_forms = list(map(gen_short_form, iris))
151+
152+
if verbose:
153+
print(f"Found {len(short_forms)} subclasses")
154+
155+
return short_forms
156+
157+
except requests.RequestException as e:
158+
print(f"ERROR: Owlery request failed: {e}")
159+
raise
160+
except Exception as e:
161+
print(f"ERROR: Unexpected error in Owlery query: {e}")
162+
raise
163+
164+
165+
class MockNeo4jClient:
166+
"""
167+
Mock Neo4j client that raises informative errors.
168+
169+
Neo4j queries require full vfb_connect installation which has
170+
GUI dependencies. This mock provides clear error messages.
171+
"""
172+
173+
def commit_list(self, queries):
174+
"""
175+
Mock Neo4j commit_list that raises NotImplementedError.
176+
177+
:param queries: List of Cypher queries
178+
:raises NotImplementedError: Always - Neo4j requires full vfb_connect
179+
"""
180+
raise NotImplementedError(
181+
"Neo4j queries require full vfb_connect installation. "
182+
"In development environment without GUI libraries, only Owlery-based "
183+
"queries are available (e.g., get_neurons_with_part_in, get_parts_of, etc.). "
184+
"Neo4j-based queries (e.g., get_instances, get_similar_neurons) are not available."
185+
)
186+
187+
188+
class SimpleVFBConnect:
189+
"""
190+
Minimal replacement for VFBConnect that works in headless environments.
191+
192+
Provides:
193+
- Owlery client (vc.vfb.oc) for OWL reasoning queries
194+
- Mock Neo4j client (vc.nc) that raises informative errors
195+
- SOLR term info fetcher (vc.get_TermInfo) for term metadata
196+
197+
This eliminates the need for vfb_connect which requires GUI libraries
198+
(vispy, Quartz.framework on macOS) that aren't available in all dev environments.
199+
"""
200+
201+
def __init__(self, solr_url: str = "https://solr.virtualflybrain.org/solr/vfb_json"):
202+
"""
203+
Initialize simple VFB connection with Owlery and SOLR access.
204+
205+
:param solr_url: Base URL for SOLR server (default: VFB public instance)
206+
"""
207+
self._vfb = None
208+
self._nc = None
209+
self.solr_url = solr_url
210+
211+
@property
212+
def vfb(self):
213+
"""Get VFB object with Owlery client."""
214+
if self._vfb is None:
215+
# Create simple object with oc (Owlery client) property
216+
class VFBObject:
217+
def __init__(self):
218+
self.oc = OwleryClient()
219+
self._vfb = VFBObject()
220+
return self._vfb
221+
222+
@property
223+
def nc(self):
224+
"""Get Neo4j client (mock that raises errors)."""
225+
if self._nc is None:
226+
self._nc = MockNeo4jClient()
227+
return self._nc
228+
229+
def get_TermInfo(self, short_forms: List[str],
230+
return_dataframe: bool = False,
231+
summary: bool = False) -> Union[List[Dict[str, Any]], pd.DataFrame]:
232+
"""
233+
Fetch term info from SOLR directly.
234+
235+
This replicates VFBConnect's get_TermInfo method using direct SOLR queries.
236+
237+
:param short_forms: List of term IDs to fetch (e.g., ['FBbt_00003748'])
238+
:param return_dataframe: If True, return as pandas DataFrame
239+
:param summary: If True, return summarized version (currently ignored)
240+
:return: List of term info dictionaries or DataFrame
241+
"""
242+
results = []
243+
244+
for short_form in short_forms:
245+
try:
246+
url = f"{self.solr_url}/select"
247+
params = {
248+
"indent": "true",
249+
"fl": "term_info",
250+
"q.op": "OR",
251+
"q": f"id:{short_form}"
252+
}
253+
254+
response = requests.get(url, params=params, timeout=30)
255+
response.raise_for_status()
256+
257+
data = response.json()
258+
docs = data.get("response", {}).get("docs", [])
259+
260+
if not docs:
261+
print(f"WARNING: No results found for {short_form}")
262+
continue
263+
264+
if "term_info" not in docs[0] or not docs[0]["term_info"]:
265+
print(f"WARNING: No term_info found for {short_form}")
266+
continue
267+
268+
# Extract and parse the term_info string which is itself JSON
269+
term_info_str = docs[0]["term_info"][0]
270+
term_info_obj = json.loads(term_info_str)
271+
results.append(term_info_obj)
272+
273+
except requests.RequestException as e:
274+
print(f"ERROR: Error fetching data from SOLR: {e}")
275+
except json.JSONDecodeError as e:
276+
print(f"ERROR: Error decoding JSON for {short_form}: {e}")
277+
except Exception as e:
278+
print(f"ERROR: Unexpected error for {short_form}: {e}")
279+
280+
# Convert to DataFrame if requested
281+
if return_dataframe and results:
282+
try:
283+
return pd.json_normalize(results)
284+
except Exception as e:
285+
print(f"ERROR: Error converting to DataFrame: {e}")
286+
return results
287+
288+
return results

src/vfbquery/solr_result_cache.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,14 @@ def wrapper(*args, **kwargs):
596596
preview = kwargs.get('preview', True) # Default is True
597597
cache_term_id = f"{term_id}_preview_{preview}"
598598

599+
# Include return_dataframe parameter in cache key for queries that support it
600+
# This ensures DataFrame and dict formats are cached separately
601+
if query_type in ['instances', 'neurons_part_here', 'neurons_synaptic',
602+
'neurons_presynaptic', 'neurons_postsynaptic',
603+
'components_of', 'parts_of', 'subclasses_of']:
604+
return_dataframe = kwargs.get('return_dataframe', True) # Default is True
605+
cache_term_id = f"{cache_term_id}_df_{return_dataframe}"
606+
599607
cache = get_solr_cache()
600608

601609
# Clear cache if force_refresh is True

src/vfbquery/vfb_queries.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pysolr
22
from .term_info_queries import deserialize_term_info
3-
# Replace VfbConnect import with our new SolrTermInfoFetcher
4-
from .solr_fetcher import SolrTermInfoFetcher
3+
# Replace VfbConnect import with our new SimpleVFBConnect
4+
from .owlery_client import SimpleVFBConnect
55
# Keep dict_cursor if it's used elsewhere - lazy import to avoid GUI issues
66
from marshmallow import Schema, fields, post_load
77
from typing import List, Tuple, Dict, Any, Union
@@ -59,8 +59,8 @@ def get_dict_cursor():
5959
# Connect to the VFB SOLR server
6060
vfb_solr = pysolr.Solr('http://solr.virtualflybrain.org/solr/vfb_json/', always_commit=False, timeout=990)
6161

62-
# Replace VfbConnect with SolrTermInfoFetcher
63-
vc = SolrTermInfoFetcher()
62+
# Replace VfbConnect with SimpleVFBConnect
63+
vc = SimpleVFBConnect()
6464

6565
def initialize_vfb_connect():
6666
"""
@@ -2096,7 +2096,7 @@ def _owlery_query_to_results(owl_query_string: str, short_form: str, return_data
20962096

20972097
except Exception as e:
20982098
# Construct the Owlery URL for debugging failed queries
2099-
owlery_base = "https://owl.virtualflybrain.org/kbs/vfb" # Default
2099+
owlery_base = "http://owl.virtualflybrain.org/kbs/vfb" # Default
21002100
try:
21012101
if hasattr(vc.vfb, 'oc') and hasattr(vc.vfb.oc, 'owlery_endpoint'):
21022102
owlery_base = vc.vfb.oc.owlery_endpoint.rstrip('/')
@@ -2107,8 +2107,10 @@ def _owlery_query_to_results(owl_query_string: str, short_form: str, return_data
21072107
query_encoded = quote(owl_query_string, safe='')
21082108
owlery_url = f"{owlery_base}/subclasses?object={query_encoded}"
21092109

2110-
print(f"ERROR: Owlery query failed: {e}")
2111-
print(f" Test URL: {owlery_url}")
2110+
# Always use stderr for error messages to ensure they are visible
2111+
import sys
2112+
print(f"ERROR: Owlery query failed: {e}", file=sys.stderr)
2113+
print(f" Test URL: {owlery_url}", file=sys.stderr)
21122114
import traceback
21132115
traceback.print_exc()
21142116
# Return error indication with count=-1

0 commit comments

Comments
 (0)