Skip to content

Commit 25d81a9

Browse files
committed
Implement NeuronsPartHere query and corresponding test script
1 parent 15f86ad commit 25d81a9

3 files changed

Lines changed: 308 additions & 1 deletion

File tree

src/vfbquery/solr_result_cache.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -635,7 +635,19 @@ def wrapper(*args, **kwargs):
635635
result = func(*args, **kwargs)
636636

637637
# Cache the result asynchronously to avoid blocking
638-
if result:
638+
# Handle DataFrame, dict, and other result types properly
639+
result_is_valid = False
640+
if result is not None:
641+
if hasattr(result, 'empty'): # DataFrame
642+
result_is_valid = not result.empty
643+
elif isinstance(result, dict):
644+
result_is_valid = bool(result)
645+
elif isinstance(result, (list, str)):
646+
result_is_valid = len(result) > 0
647+
else:
648+
result_is_valid = True
649+
650+
if result_is_valid:
639651
# Validate result before caching for term_info
640652
if query_type == 'term_info':
641653
if (result and isinstance(result, dict) and

src/vfbquery/vfb_queries.py

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,15 @@ def term_info_parse_object(results, short_form):
658658
q = NeuronInputsTo_to_schema(termInfo["Name"], {"neuron_short_form": vfbTerm.term.core.short_form})
659659
queries.append(q)
660660

661+
# NeuronsPartHere query - for Class+Anatomy terms (synaptic neuropils, etc.)
662+
# Matches XMI criteria: Class + Synaptic_neuropil, or other anatomical regions
663+
if contains_all_tags(termInfo["SuperTypes"], ["Class"]) and (
664+
"Synaptic_neuropil" in termInfo["SuperTypes"] or
665+
"Anatomy" in termInfo["SuperTypes"]
666+
):
667+
q = NeuronsPartHere_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
668+
queries.append(q)
669+
661670
# Add Publications to the termInfo object
662671
if vfbTerm.pubs and len(vfbTerm.pubs) > 0:
663672
publications = []
@@ -824,6 +833,30 @@ def ListAllAvailableImages_to_schema(name, take_default):
824833

825834
return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
826835

836+
def NeuronsPartHere_to_schema(name, take_default):
837+
"""
838+
Schema for NeuronsPartHere query.
839+
Finds neuron classes that have some part overlapping with the specified anatomical region.
840+
841+
Matching criteria from XMI:
842+
- Class + Synaptic_neuropil (types.1 + types.5)
843+
- Additional type matches for comprehensive coverage
844+
845+
Query chain: Owlery subclass query → process → SOLR
846+
OWL query: "Neuron and overlaps some $ID"
847+
"""
848+
query = "NeuronsPartHere"
849+
label = f"Neurons with some part in {name}"
850+
function = "get_neurons_with_part_in"
851+
takes = {
852+
"short_form": {"$and": ["Class", "Anatomy"]},
853+
"default": take_default,
854+
}
855+
preview = 5
856+
preview_columns = ["id", "label", "tags", "thumbnail"]
857+
858+
return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
859+
827860
def serialize_solr_output(results):
828861
# Create a copy of the document and remove Solr-specific fields
829862
doc = dict(results.docs[0])
@@ -1544,6 +1577,184 @@ def contains_all_tags(lst: List[str], tags: List[str]) -> bool:
15441577
"""
15451578
return all(tag in lst for tag in tags)
15461579

1580+
@with_solr_cache('neurons_part_here')
1581+
def get_neurons_with_part_in(short_form: str, return_dataframe=True, limit: int = -1):
1582+
"""
1583+
Retrieves neuron classes that have some part overlapping with the specified anatomical region.
1584+
1585+
This implements the NeuronsPartHere query from the VFB XMI specification.
1586+
Query chain (from XMI): Owlery (Index 1) → Process → SOLR (Index 3)
1587+
OWL query: "'Neuron' that 'overlaps' some '<anatomical_region>'"
1588+
1589+
:param short_form: short form of the anatomical region (Class)
1590+
:param return_dataframe: Returns pandas dataframe if true, otherwise returns formatted dict
1591+
:param limit: maximum number of results to return (default -1, returns all results)
1592+
:return: Neuron classes with parts in the specified region
1593+
"""
1594+
1595+
try:
1596+
# Step 1: Query Owlery for neuron classes that overlap this anatomical region
1597+
# This uses the OWL reasoner to find all neuron subclasses matching the pattern
1598+
neuron_class_ids = vc.vfb.oc.get_subclasses(
1599+
query=f"'Neuron' that 'overlaps' some '{short_form}'",
1600+
query_by_label=True,
1601+
verbose=False
1602+
)
1603+
1604+
if not neuron_class_ids:
1605+
# No neurons found - return empty results
1606+
if return_dataframe:
1607+
return pd.DataFrame()
1608+
return {
1609+
"headers": _get_neurons_part_here_headers(),
1610+
"rows": [],
1611+
"count": 0
1612+
}
1613+
1614+
# Apply limit if specified (before SOLR query to save processing)
1615+
if limit != -1 and limit > 0:
1616+
neuron_class_ids = neuron_class_ids[:limit]
1617+
1618+
total_count = len(neuron_class_ids)
1619+
1620+
# Step 2: Query SOLR directly for just the anat_query field
1621+
# For Class terms (neuron classes), the field is 'anat_query' not 'anat_image_query'
1622+
# This matches the original VFBquery pattern and contains all result row metadata
1623+
# This is much faster than loading full term_info for each neuron
1624+
rows = []
1625+
for neuron_id in neuron_class_ids:
1626+
try:
1627+
# Query SOLR with fl=anat_query to get only the result table data
1628+
# This is the same field used in the original VFBquery implementation
1629+
results = vfb_solr.search(
1630+
q=f'id:{neuron_id}',
1631+
fl='anat_query',
1632+
rows=1
1633+
)
1634+
1635+
if results.hits > 0 and results.docs and 'anat_query' in results.docs[0]:
1636+
# Parse the anat_query JSON string
1637+
anat_query_str = results.docs[0]['anat_query'][0]
1638+
anat_data = json.loads(anat_query_str)
1639+
1640+
# Extract core term information
1641+
term_core = anat_data.get('term', {}).get('core', {})
1642+
neuron_short_form = term_core.get('short_form', neuron_id)
1643+
1644+
# Extract label (prefer symbol over label, matching Neo4j behavior)
1645+
label_text = term_core.get('label', 'Unknown')
1646+
if term_core.get('symbol') and len(term_core.get('symbol', '')) > 0:
1647+
label_text = term_core.get('symbol')
1648+
# Decode URL-encoded strings from SOLR
1649+
from urllib.parse import unquote
1650+
label_text = unquote(label_text)
1651+
1652+
# Extract tags from unique_facets
1653+
tags = '|'.join(term_core.get('unique_facets', []))
1654+
1655+
# Extract thumbnail from anatomy_channel_image if available
1656+
thumbnail = ''
1657+
anatomy_images = anat_data.get('anatomy_channel_image', [])
1658+
if anatomy_images and len(anatomy_images) > 0:
1659+
# Get the first anatomy channel image (example instance)
1660+
first_img = anatomy_images[0]
1661+
channel_image = first_img.get('channel_image', {})
1662+
image_info = channel_image.get('image', {})
1663+
thumbnail_url = image_info.get('image_thumbnail', '')
1664+
1665+
if thumbnail_url:
1666+
# Convert to HTTPS and use non-transparent version
1667+
thumbnail_url = thumbnail_url.replace('http://', 'https://').replace('thumbnailT.png', 'thumbnail.png')
1668+
1669+
# Format thumbnail markdown with template info
1670+
template_anatomy = image_info.get('template_anatomy', {})
1671+
if template_anatomy:
1672+
template_label = template_anatomy.get('symbol') or template_anatomy.get('label', '')
1673+
template_label = unquote(template_label)
1674+
# Get the anatomy info for alt text
1675+
anatomy_info = first_img.get('anatomy', {})
1676+
anatomy_label = anatomy_info.get('symbol') or anatomy_info.get('label', label_text)
1677+
anatomy_label = unquote(anatomy_label)
1678+
alt_text = f"{anatomy_label} aligned to {template_label}"
1679+
thumbnail = f"[![{alt_text}]({thumbnail_url} '{alt_text}')]({neuron_short_form})"
1680+
1681+
# Extract source information from xrefs if available
1682+
source = ''
1683+
source_id = ''
1684+
xrefs = anat_data.get('xrefs', [])
1685+
if xrefs and len(xrefs) > 0:
1686+
# Get the first data source xref
1687+
for xref in xrefs:
1688+
if xref.get('is_data_source', False):
1689+
site_info = xref.get('site', {})
1690+
site_label = site_info.get('symbol') or site_info.get('label', '')
1691+
site_short_form = site_info.get('short_form', '')
1692+
if site_label and site_short_form:
1693+
source = f"[{site_label}]({site_short_form})"
1694+
1695+
accession = xref.get('accession', '')
1696+
link_base = xref.get('link_base', '')
1697+
if accession and link_base:
1698+
source_id = f"[{accession}]({link_base}{accession})"
1699+
break
1700+
1701+
# Build row matching expected format
1702+
row = {
1703+
'id': neuron_short_form,
1704+
'label': f"[{label_text}]({neuron_short_form})",
1705+
'tags': tags,
1706+
'source': source,
1707+
'source_id': source_id,
1708+
'thumbnail': thumbnail
1709+
}
1710+
rows.append(row)
1711+
1712+
except Exception as e:
1713+
print(f"Error fetching SOLR data for {neuron_id}: {e}")
1714+
continue
1715+
1716+
# Convert to DataFrame if requested
1717+
if return_dataframe:
1718+
df = pd.DataFrame(rows)
1719+
# Apply markdown encoding
1720+
columns_to_encode = ['label', 'thumbnail']
1721+
df = encode_markdown_links(df, columns_to_encode)
1722+
return df
1723+
1724+
# Convert to expected format with proper headers
1725+
formatted_results = {
1726+
"headers": _get_neurons_part_here_headers(),
1727+
"rows": rows,
1728+
"count": total_count
1729+
}
1730+
1731+
return formatted_results
1732+
1733+
except Exception as e:
1734+
print(f"Error in get_neurons_with_part_in: {e}")
1735+
import traceback
1736+
traceback.print_exc()
1737+
# Return empty results with proper structure
1738+
if return_dataframe:
1739+
return pd.DataFrame()
1740+
return {
1741+
"headers": _get_neurons_part_here_headers(),
1742+
"rows": [],
1743+
"count": 0
1744+
}
1745+
1746+
def _get_neurons_part_here_headers():
1747+
"""Return standard headers for get_neurons_with_part_in results"""
1748+
return {
1749+
"id": {"title": "Add", "type": "selection_id", "order": -1},
1750+
"label": {"title": "Name", "type": "markdown", "order": 0, "sort": {0: "Asc"}},
1751+
"tags": {"title": "Tags", "type": "tags", "order": 2},
1752+
"source": {"title": "Data Source", "type": "metadata", "order": 3},
1753+
"source_id": {"title": "Data Source ID", "type": "metadata", "order": 4},
1754+
"thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9}
1755+
}
1756+
1757+
15471758
def fill_query_results(term_info):
15481759
for query in term_info['Queries']:
15491760
# print(f"Query Keys:{query.keys()}")

test_neurons_part_here.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Test script for NeuronsPartHere query implementation.
4+
Tests with medulla [FBbt_00003748] which should return 471 results per the screenshot.
5+
"""
6+
7+
import sys
8+
import os
9+
10+
# Add src to path
11+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
12+
13+
from vfbquery.vfb_queries import get_neurons_with_part_in
14+
15+
def test_neurons_part_here():
16+
"""Test NeuronsPartHere query with medulla"""
17+
18+
print("="*80)
19+
print("Testing NeuronsPartHere query with medulla [FBbt_00003748]")
20+
print("Expected: 471 results (from screenshot)")
21+
print("="*80)
22+
print()
23+
24+
# Test with medulla - should return 471 results
25+
medulla_id = "FBbt_00003748"
26+
27+
try:
28+
print(f"Querying neurons with parts in medulla ({medulla_id})...")
29+
print()
30+
31+
# Get results as dataframe
32+
results_df = get_neurons_with_part_in(medulla_id, return_dataframe=True, limit=-1)
33+
34+
if results_df is not None and not results_df.empty:
35+
count = len(results_df)
36+
print(f"✓ SUCCESS: Found {count} neuron classes")
37+
print()
38+
39+
# Show first few results
40+
print("First 5 results:")
41+
print("-" * 80)
42+
for idx, row in results_df.head(5).iterrows():
43+
print(f" {idx+1}. {row.get('label', 'N/A')[:60]}")
44+
print(f" ID: {row.get('id', 'N/A')}")
45+
print(f" Tags: {row.get('tags', 'N/A')[:60]}")
46+
print()
47+
48+
# Verify count matches expected
49+
if count == 471:
50+
print("✓✓ PERFECT MATCH: Got exactly 471 results as expected!")
51+
elif count > 450 and count < 500:
52+
print(f"⚠ CLOSE: Got {count} results (expected 471)")
53+
print(" This might be due to data updates in VFB")
54+
else:
55+
print(f"⚠ WARNING: Expected 471 results but got {count}")
56+
57+
print()
58+
print("=" * 80)
59+
print("QUERY SUCCESSFUL")
60+
print("=" * 80)
61+
return True
62+
63+
else:
64+
print("✗ FAILED: No results returned")
65+
print()
66+
print("=" * 80)
67+
print("QUERY FAILED - No results")
68+
print("=" * 80)
69+
return False
70+
71+
except Exception as e:
72+
print(f"✗ ERROR: {type(e).__name__}: {e}")
73+
print()
74+
import traceback
75+
traceback.print_exc()
76+
print()
77+
print("=" * 80)
78+
print("QUERY FAILED - Exception occurred")
79+
print("=" * 80)
80+
return False
81+
82+
if __name__ == "__main__":
83+
success = test_neurons_part_here()
84+
sys.exit(0 if success else 1)

0 commit comments

Comments
 (0)