|
| 1 | +""" |
| 2 | +Simple Owlery REST API client to replace VFBConnect dependency. |
| 3 | +
|
| 4 | +This module provides direct HTTP access to the Owlery OWL reasoning service, |
| 5 | +eliminating the need for vfb_connect which has problematic GUI dependencies. |
| 6 | +""" |
| 7 | + |
| 8 | +import requests |
| 9 | +import json |
| 10 | +import pandas as pd |
| 11 | +import re |
| 12 | +from urllib.parse import quote |
| 13 | +from typing import List, Optional, Dict, Any, Union |
| 14 | + |
| 15 | + |
| 16 | +def short_form_to_iri(short_form: str) -> str: |
| 17 | + """ |
| 18 | + Convert a short form (e.g., 'FBbt_00003748') to full IRI. |
| 19 | + |
| 20 | + :param short_form: Short form like 'FBbt_00003748' |
| 21 | + :return: Full IRI like 'http://purl.obolibrary.org/obo/FBbt_00003748' |
| 22 | + """ |
| 23 | + # OBO library IRIs use underscores in the ID |
| 24 | + return f"http://purl.obolibrary.org/obo/{short_form}" |
| 25 | + |
| 26 | + |
| 27 | +def gen_short_form(iri: str) -> str: |
| 28 | + """ |
| 29 | + Generate short_form from an IRI string (VFBConnect compatible). |
| 30 | + Splits by '/' or '#' and takes the last part. |
| 31 | + |
| 32 | + :param iri: An IRI string |
| 33 | + :return: short_form |
| 34 | + """ |
| 35 | + return re.split('/|#', iri)[-1] |
| 36 | + |
| 37 | + |
| 38 | +class OwleryClient: |
| 39 | + """ |
| 40 | + Simple client for Owlery OWL reasoning service. |
| 41 | + |
| 42 | + Provides minimal interface matching VFBConnect's OWLeryConnect functionality |
| 43 | + for subclass queries needed by VFBquery. |
| 44 | + """ |
| 45 | + |
| 46 | + def __init__(self, owlery_endpoint: str = "http://owl.virtualflybrain.org/kbs/vfb"): |
| 47 | + """ |
| 48 | + Initialize Owlery client. |
| 49 | + |
| 50 | + :param owlery_endpoint: Base URL for Owlery service (default: VFB public instance) |
| 51 | + """ |
| 52 | + self.owlery_endpoint = owlery_endpoint.rstrip('/') |
| 53 | + |
| 54 | + def get_subclasses(self, query: str, query_by_label: bool = True, |
| 55 | + verbose: bool = False, prefixes: bool = False, direct: bool = False) -> List[str]: |
| 56 | + """ |
| 57 | + Query Owlery for subclasses matching an OWL class expression. |
| 58 | + |
| 59 | + This replicates the VFBConnect OWLeryConnect.get_subclasses() method. |
| 60 | + Based on: https://github.com/VirtualFlyBrain/VFB_connect/blob/master/src/vfb_connect/owl/owlery_query_tools.py |
| 61 | + |
| 62 | + :param query: OWL class expression query string (with short forms like '<FBbt_00003748>') |
| 63 | + :param query_by_label: If True, query uses label syntax (quotes). |
| 64 | + If False, uses IRI syntax (angle brackets). |
| 65 | + :param verbose: If True, print debug information |
| 66 | + :param prefixes: If True, return full IRIs. If False, return short forms. |
| 67 | + :param direct: Return direct subclasses only. Default False. |
| 68 | + :return: List of class IDs (short forms like 'FBbt_00003748') |
| 69 | + """ |
| 70 | + try: |
| 71 | + # Convert short forms in query to full IRIs |
| 72 | + # Pattern: <FBbt_00003748> -> <http://purl.obolibrary.org/obo/FBbt_00003748> |
| 73 | + # Match angle brackets with content that looks like a short form (alphanumeric + underscore) |
| 74 | + import re |
| 75 | + def convert_short_form_to_iri(match): |
| 76 | + short_form = match.group(1) # Extract content between < > |
| 77 | + # Only convert if it looks like a short form (contains underscore, no slashes) |
| 78 | + if '_' in short_form and '/' not in short_form: |
| 79 | + return f"<{short_form_to_iri(short_form)}>" |
| 80 | + else: |
| 81 | + # Already an IRI or other syntax, leave as-is |
| 82 | + return match.group(0) |
| 83 | + |
| 84 | + # Replace all <SHORT_FORM> patterns with <FULL_IRI> |
| 85 | + iri_query = re.sub(r'<([^>]+)>', convert_short_form_to_iri, query) |
| 86 | + |
| 87 | + if verbose: |
| 88 | + print(f"Original query: {query}") |
| 89 | + print(f"IRI query: {iri_query}") |
| 90 | + |
| 91 | + # Build Owlery subclasses endpoint URL |
| 92 | + # Based on VFBConnect's query() method |
| 93 | + params = { |
| 94 | + 'object': iri_query, |
| 95 | + 'prefixes': json.dumps({ |
| 96 | + "FBbt": "http://purl.obolibrary.org/obo/FBbt_", |
| 97 | + "RO": "http://purl.obolibrary.org/obo/RO_", |
| 98 | + "BFO": "http://purl.obolibrary.org/obo/BFO_" |
| 99 | + }) |
| 100 | + } |
| 101 | + if direct: |
| 102 | + params['direct'] = 'False' # Note: Owlery expects string 'False', not boolean |
| 103 | + |
| 104 | + # Make HTTP GET request with longer timeout for complex queries |
| 105 | + response = requests.get( |
| 106 | + f"{self.owlery_endpoint}/subclasses", |
| 107 | + params=params, |
| 108 | + timeout=120 |
| 109 | + ) |
| 110 | + |
| 111 | + if verbose: |
| 112 | + print(f"Owlery query: {response.url}") |
| 113 | + |
| 114 | + response.raise_for_status() |
| 115 | + |
| 116 | + # Parse JSON response |
| 117 | + # Owlery returns: {"superClassOf": ["IRI1", "IRI2", ...]} |
| 118 | + # Based on VFBConnect: return_type='superClassOf' for subclasses |
| 119 | + data = response.json() |
| 120 | + |
| 121 | + if verbose: |
| 122 | + print(f"Response keys: {data.keys() if isinstance(data, dict) else 'not a dict'}") |
| 123 | + |
| 124 | + # Extract IRIs from response using VFBConnect's key |
| 125 | + iris = [] |
| 126 | + if isinstance(data, dict) and 'superClassOf' in data: |
| 127 | + iris = data['superClassOf'] |
| 128 | + elif isinstance(data, list): |
| 129 | + # Fallback: simple list response |
| 130 | + iris = data |
| 131 | + else: |
| 132 | + if verbose: |
| 133 | + print(f"Unexpected Owlery response format: {type(data)}") |
| 134 | + print(f"Response: {data}") |
| 135 | + return [] |
| 136 | + |
| 137 | + if not isinstance(iris, list): |
| 138 | + if verbose: |
| 139 | + print(f"Warning: No results! This is likely due to a query error") |
| 140 | + print(f"Query: {query}") |
| 141 | + return [] |
| 142 | + |
| 143 | + # Convert IRIs to short forms using gen_short_form logic from VFBConnect |
| 144 | + # gen_short_form splits by '/' or '#' and takes the last part |
| 145 | + import re |
| 146 | + def gen_short_form(iri): |
| 147 | + """Generate short_form from an IRI string (VFBConnect compatible)""" |
| 148 | + return re.split('/|#', iri)[-1] |
| 149 | + |
| 150 | + short_forms = list(map(gen_short_form, iris)) |
| 151 | + |
| 152 | + if verbose: |
| 153 | + print(f"Found {len(short_forms)} subclasses") |
| 154 | + |
| 155 | + return short_forms |
| 156 | + |
| 157 | + except requests.RequestException as e: |
| 158 | + print(f"ERROR: Owlery request failed: {e}") |
| 159 | + raise |
| 160 | + except Exception as e: |
| 161 | + print(f"ERROR: Unexpected error in Owlery query: {e}") |
| 162 | + raise |
| 163 | + |
| 164 | + |
| 165 | +class MockNeo4jClient: |
| 166 | + """ |
| 167 | + Mock Neo4j client that raises informative errors. |
| 168 | + |
| 169 | + Neo4j queries require full vfb_connect installation which has |
| 170 | + GUI dependencies. This mock provides clear error messages. |
| 171 | + """ |
| 172 | + |
| 173 | + def commit_list(self, queries): |
| 174 | + """ |
| 175 | + Mock Neo4j commit_list that raises NotImplementedError. |
| 176 | + |
| 177 | + :param queries: List of Cypher queries |
| 178 | + :raises NotImplementedError: Always - Neo4j requires full vfb_connect |
| 179 | + """ |
| 180 | + raise NotImplementedError( |
| 181 | + "Neo4j queries require full vfb_connect installation. " |
| 182 | + "In development environment without GUI libraries, only Owlery-based " |
| 183 | + "queries are available (e.g., get_neurons_with_part_in, get_parts_of, etc.). " |
| 184 | + "Neo4j-based queries (e.g., get_instances, get_similar_neurons) are not available." |
| 185 | + ) |
| 186 | + |
| 187 | + |
| 188 | +class SimpleVFBConnect: |
| 189 | + """ |
| 190 | + Minimal replacement for VFBConnect that works in headless environments. |
| 191 | + |
| 192 | + Provides: |
| 193 | + - Owlery client (vc.vfb.oc) for OWL reasoning queries |
| 194 | + - Mock Neo4j client (vc.nc) that raises informative errors |
| 195 | + - SOLR term info fetcher (vc.get_TermInfo) for term metadata |
| 196 | + |
| 197 | + This eliminates the need for vfb_connect which requires GUI libraries |
| 198 | + (vispy, Quartz.framework on macOS) that aren't available in all dev environments. |
| 199 | + """ |
| 200 | + |
| 201 | + def __init__(self, solr_url: str = "https://solr.virtualflybrain.org/solr/vfb_json"): |
| 202 | + """ |
| 203 | + Initialize simple VFB connection with Owlery and SOLR access. |
| 204 | + |
| 205 | + :param solr_url: Base URL for SOLR server (default: VFB public instance) |
| 206 | + """ |
| 207 | + self._vfb = None |
| 208 | + self._nc = None |
| 209 | + self.solr_url = solr_url |
| 210 | + |
| 211 | + @property |
| 212 | + def vfb(self): |
| 213 | + """Get VFB object with Owlery client.""" |
| 214 | + if self._vfb is None: |
| 215 | + # Create simple object with oc (Owlery client) property |
| 216 | + class VFBObject: |
| 217 | + def __init__(self): |
| 218 | + self.oc = OwleryClient() |
| 219 | + self._vfb = VFBObject() |
| 220 | + return self._vfb |
| 221 | + |
| 222 | + @property |
| 223 | + def nc(self): |
| 224 | + """Get Neo4j client (mock that raises errors).""" |
| 225 | + if self._nc is None: |
| 226 | + self._nc = MockNeo4jClient() |
| 227 | + return self._nc |
| 228 | + |
| 229 | + def get_TermInfo(self, short_forms: List[str], |
| 230 | + return_dataframe: bool = False, |
| 231 | + summary: bool = False) -> Union[List[Dict[str, Any]], pd.DataFrame]: |
| 232 | + """ |
| 233 | + Fetch term info from SOLR directly. |
| 234 | + |
| 235 | + This replicates VFBConnect's get_TermInfo method using direct SOLR queries. |
| 236 | + |
| 237 | + :param short_forms: List of term IDs to fetch (e.g., ['FBbt_00003748']) |
| 238 | + :param return_dataframe: If True, return as pandas DataFrame |
| 239 | + :param summary: If True, return summarized version (currently ignored) |
| 240 | + :return: List of term info dictionaries or DataFrame |
| 241 | + """ |
| 242 | + results = [] |
| 243 | + |
| 244 | + for short_form in short_forms: |
| 245 | + try: |
| 246 | + url = f"{self.solr_url}/select" |
| 247 | + params = { |
| 248 | + "indent": "true", |
| 249 | + "fl": "term_info", |
| 250 | + "q.op": "OR", |
| 251 | + "q": f"id:{short_form}" |
| 252 | + } |
| 253 | + |
| 254 | + response = requests.get(url, params=params, timeout=30) |
| 255 | + response.raise_for_status() |
| 256 | + |
| 257 | + data = response.json() |
| 258 | + docs = data.get("response", {}).get("docs", []) |
| 259 | + |
| 260 | + if not docs: |
| 261 | + print(f"WARNING: No results found for {short_form}") |
| 262 | + continue |
| 263 | + |
| 264 | + if "term_info" not in docs[0] or not docs[0]["term_info"]: |
| 265 | + print(f"WARNING: No term_info found for {short_form}") |
| 266 | + continue |
| 267 | + |
| 268 | + # Extract and parse the term_info string which is itself JSON |
| 269 | + term_info_str = docs[0]["term_info"][0] |
| 270 | + term_info_obj = json.loads(term_info_str) |
| 271 | + results.append(term_info_obj) |
| 272 | + |
| 273 | + except requests.RequestException as e: |
| 274 | + print(f"ERROR: Error fetching data from SOLR: {e}") |
| 275 | + except json.JSONDecodeError as e: |
| 276 | + print(f"ERROR: Error decoding JSON for {short_form}: {e}") |
| 277 | + except Exception as e: |
| 278 | + print(f"ERROR: Unexpected error for {short_form}: {e}") |
| 279 | + |
| 280 | + # Convert to DataFrame if requested |
| 281 | + if return_dataframe and results: |
| 282 | + try: |
| 283 | + return pd.json_normalize(results) |
| 284 | + except Exception as e: |
| 285 | + print(f"ERROR: Error converting to DataFrame: {e}") |
| 286 | + return results |
| 287 | + |
| 288 | + return results |
0 commit comments