Skip to content

Commit 300ce4d

Browse files
committed
Implement parallel fetching for term info in SimpleVFBConnect to improve performance
1 parent b6e763e commit 300ce4d

1 file changed

Lines changed: 33 additions & 16 deletions

File tree

src/vfbquery/owlery_client.py

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import re
1212
from urllib.parse import quote
1313
from typing import List, Optional, Dict, Any, Union
14+
import concurrent.futures
1415

1516

1617
def short_form_to_iri(short_form: str) -> str:
@@ -399,9 +400,11 @@ def get_TermInfo(self, short_forms: List[str],
399400
:param summary: If True, return summarized version (currently ignored)
400401
:return: List of term info dictionaries or DataFrame
401402
"""
402-
results = []
403-
404-
for short_form in short_forms:
403+
# Fetch term info entries in parallel to speed up multiple short_form requests.
404+
# We preserve input order in the returned list.
405+
results_map = {}
406+
407+
def fetch(short_form: str):
405408
try:
406409
url = f"{self.solr_url}/select"
407410
params = {
@@ -410,32 +413,46 @@ def get_TermInfo(self, short_forms: List[str],
410413
"q.op": "OR",
411414
"q": f"id:{short_form}"
412415
}
413-
416+
414417
response = requests.get(url, params=params, timeout=30)
415418
response.raise_for_status()
416-
419+
417420
data = response.json()
418421
docs = data.get("response", {}).get("docs", [])
419-
422+
420423
if not docs:
421-
print(f"WARNING: No results found for {short_form}")
422-
continue
423-
424+
# no result for this id
425+
return None
426+
424427
if "term_info" not in docs[0] or not docs[0]["term_info"]:
425-
print(f"WARNING: No term_info found for {short_form}")
426-
continue
427-
428-
# Extract and parse the term_info string which is itself JSON
428+
return None
429+
429430
term_info_str = docs[0]["term_info"][0]
430431
term_info_obj = json.loads(term_info_str)
431-
results.append(term_info_obj)
432-
432+
return term_info_obj
433+
433434
except requests.RequestException as e:
434-
print(f"ERROR: Error fetching data from SOLR: {e}")
435+
print(f"ERROR: Error fetching data from SOLR for {short_form}: {e}")
435436
except json.JSONDecodeError as e:
436437
print(f"ERROR: Error decoding JSON for {short_form}: {e}")
437438
except Exception as e:
438439
print(f"ERROR: Unexpected error for {short_form}: {e}")
440+
return None
441+
442+
max_workers = min(10, max(1, len(short_forms)))
443+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as exc:
444+
# map preserves order of input keys in Python 3.9+ when using as_completed we reassemble
445+
future_to_sf = {exc.submit(fetch, sf): sf for sf in short_forms}
446+
for fut in concurrent.futures.as_completed(future_to_sf):
447+
sf = future_to_sf[fut]
448+
try:
449+
res = fut.result()
450+
results_map[sf] = res
451+
except Exception as e:
452+
print(f"ERROR: Exception while fetching {sf}: {e}")
453+
454+
# Build results list in the same order as short_forms input, skipping None results
455+
results = [results_map[sf] for sf in short_forms if sf in results_map and results_map[sf] is not None]
439456

440457
# Convert to DataFrame if requested
441458
if return_dataframe and results:

0 commit comments

Comments
 (0)