Skip to content

Commit d2aeb21

Browse files
committed
Rename ExpressionOverlapsHere -> AnatomyExpressedIn (with legacy alias)
Following Clare's question and the audit it triggered: the v2 XMI already declared two semantically-distinct query slots for the anatomy <-> expression-pattern relationship: TransgeneExpressionHere — anatomy -> expression patterns (forward) ExpressionOverlapsHere — expression pattern -> anatomy (inverse) But the schema generator in this repo emitted ExpressionOverlapsHere against anatomy entities with a forward-direction label ("Expression patterns overlapping <anatomy>") AND the underlying Cypher was the forward direction too. Result: v2 listed the inverse- named query in the wrong place, and the inverse direction (Clare's URL on VFBexp_FBtp0001321) returned zero because the function was filtering on anat.short_form, not ep.short_form. v1.13.6 fixed the Cypher; v1.13.7 finishes the rename so the names match the meaning: - ExpressionOverlapsHere_to_schema -> AnatomyExpressedIn_to_schema. Returns query_type=AnatomyExpressedIn, label "Anatomy where <name> is expressed", and a takes constraint of Class+Expression_pattern OR Class+Expression_pattern_fragment. The legacy function name is kept as a module-level alias so any direct importer continues to resolve. - term_info schema emit sites moved off the anatomy condition. The two call sites (top-level term enrichment and the parent-types loop) now gate on Expression_pattern / Expression_pattern_fragment membership. - ha_api.QUERY_TYPE_MAP gets BOTH "AnatomyExpressedIn" and "ExpressionOverlapsHere" pointing at the same get_expression_overlaps_here function. Pre-existing bookmarked URLs (e.g. Clare's ?q=VFBexp_FBtp0001321,ExpressionOverlapsHere) keep working unchanged. - Tests renamed/updated for the new canonical name; the alias check remains so any future drop of the legacy name surfaces in CI. Companion XMI patch lands in geppetto-vfb at the same time — adds a new CompoundRefQuery id="AnatomyExpressedIn" plus a back-compat CompoundRefQuery id="ExpressionOverlapsHere" that points at the same internal chain (so both URL handles resolve). TransgeneExpressionHere is untouched per the discussion — its name reads correctly for its forward semantics.
1 parent ff209cd commit d2aeb21

3 files changed

Lines changed: 151 additions & 105 deletions

File tree

src/test/test_expression_overlaps.py

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
"""
2-
Test suite for ExpressionOverlapsHere query (get_expression_overlaps_here)
2+
Test suite for ExpressionOverlapsHere query (get_expression_overlaps_here).
33
4-
This test verifies the Neo4j query implementation that finds expression patterns
5-
overlapping with specified anatomical regions.
4+
INVERSE-direction query as of VFBquery v1.13.6 — given an expression
5+
pattern, return the anatomy classes whose Individuals overlap with the
6+
pattern's Individuals. The forward direction (anatomy -> expression
7+
patterns) is now solely owned by TransgeneExpressionHere.
68
79
XMI Source: https://raw.githubusercontent.com/VirtualFlyBrain/geppetto-vfb/master/model/vfb.xmi
8-
Query: anat_2_ep_query
10+
Query: ExpressionOverlapsHere ("Anatomy $NAME is expressed in")
911
"""
1012

1113
import unittest
@@ -24,7 +26,7 @@ class TestExpressionOverlapsHere(unittest.TestCase):
2426
def test_expression_overlaps_basic_dataframe(self):
2527
"""Test basic query returns DataFrame with expected columns"""
2628
# Test with adult brain (FBbt_00003982) - known to have expression patterns
27-
result = vq.get_expression_overlaps_here('FBbt_00003982', return_dataframe=True)
29+
result = vq.get_expression_overlaps_here('VFBexp_FBtp0001321', return_dataframe=True)
2830

2931
self.assertIsInstance(result, pd.DataFrame, "Should return pandas DataFrame")
3032

@@ -43,7 +45,7 @@ def test_expression_overlaps_basic_dataframe(self):
4345

4446
def test_expression_overlaps_formatted_output(self):
4547
"""Test query returns properly formatted dictionary output"""
46-
result = vq.get_expression_overlaps_here('FBbt_00003982', return_dataframe=False)
48+
result = vq.get_expression_overlaps_here('VFBexp_FBtp0001321', return_dataframe=False)
4749

4850
self.assertIsInstance(result, dict, "Should return dictionary when return_dataframe=False")
4951

@@ -79,7 +81,7 @@ def test_expression_overlaps_formatted_output(self):
7981
def test_expression_overlaps_limit(self):
8082
"""Test limit parameter restricts number of results"""
8183
limit = 3
82-
result = vq.get_expression_overlaps_here('FBbt_00003982', return_dataframe=True, limit=limit)
84+
result = vq.get_expression_overlaps_here('VFBexp_FBtp0001321', return_dataframe=True, limit=limit)
8385

8486
if not result.empty:
8587
self.assertLessEqual(len(result), limit, f"Should return at most {limit} results")
@@ -96,7 +98,7 @@ def test_expression_overlaps_empty_result(self):
9698

9799
def test_expression_overlaps_publication_data(self):
98100
"""Test that publication data is properly formatted when present"""
99-
result = vq.get_expression_overlaps_here('FBbt_00003982', return_dataframe=True, limit=10)
101+
result = vq.get_expression_overlaps_here('VFBexp_FBtp0001321', return_dataframe=True, limit=10)
100102

101103
if not result.empty:
102104
# Check if pubs column exists and contains data
@@ -121,7 +123,7 @@ def test_expression_overlaps_publication_data(self):
121123

122124
def test_expression_overlaps_markdown_encoding(self):
123125
"""Test that markdown links are properly formatted"""
124-
result = vq.get_expression_overlaps_here('FBbt_00003982', return_dataframe=True, limit=5)
126+
result = vq.get_expression_overlaps_here('VFBexp_FBtp0001321', return_dataframe=True, limit=5)
125127

126128
if not result.empty:
127129
# Check that names contain markdown link format [label](url)
@@ -135,7 +137,7 @@ def test_expression_overlaps_markdown_encoding(self):
135137

136138
def test_expression_overlaps_tags_format(self):
137139
"""Test that tags are properly formatted as pipe-separated strings"""
138-
result = vq.get_expression_overlaps_here('FBbt_00003982', return_dataframe=True, limit=5)
140+
result = vq.get_expression_overlaps_here('VFBexp_FBtp0001321', return_dataframe=True, limit=5)
139141

140142
if not result.empty and 'tags' in result.columns:
141143
for tags in result['tags']:
@@ -149,24 +151,32 @@ def test_expression_overlaps_tags_format(self):
149151
print(f"\n✓ Tags format verified")
150152

151153

152-
class TestExpressionOverlapsHereSchema(unittest.TestCase):
153-
"""Test cases for ExpressionOverlapsHere_to_schema function"""
154+
class TestAnatomyExpressedInSchema(unittest.TestCase):
155+
"""Test cases for AnatomyExpressedIn_to_schema (renamed from
156+
ExpressionOverlapsHere_to_schema in v1.13.7). The legacy name is
157+
kept as a back-compat alias and must continue to import.
158+
"""
154159

155160
def test_schema_function_exists(self):
156-
"""Test that the schema function is properly defined"""
157-
self.assertTrue(hasattr(vq, 'ExpressionOverlapsHere_to_schema'),
158-
"ExpressionOverlapsHere_to_schema function should exist")
161+
"""Test that both the canonical and legacy schema functions are defined."""
162+
self.assertTrue(hasattr(vq, 'AnatomyExpressedIn_to_schema'),
163+
"AnatomyExpressedIn_to_schema function should exist")
164+
self.assertTrue(hasattr(vq, 'ExpressionOverlapsHere_to_schema'),
165+
"Legacy ExpressionOverlapsHere_to_schema alias should still exist")
159166

160167
def test_schema_structure(self):
161-
"""Test that schema function returns proper Query object"""
162-
from vfbquery.vfb_queries import ExpressionOverlapsHere_to_schema
163-
164-
schema = ExpressionOverlapsHere_to_schema("test anatomy", {"short_form": "FBbt_00003982"})
165-
168+
"""Test that schema function returns proper Query object."""
169+
from vfbquery.vfb_queries import AnatomyExpressedIn_to_schema
170+
171+
schema = AnatomyExpressedIn_to_schema(
172+
"P{GAL4-per.BS} expression pattern",
173+
{"short_form": "VFBexp_FBtp0001321"},
174+
)
175+
166176
# Check Query object attributes
167-
self.assertEqual(schema.query, "ExpressionOverlapsHere")
177+
self.assertEqual(schema.query, "AnatomyExpressedIn")
168178
self.assertEqual(schema.function, "get_expression_overlaps_here")
169-
self.assertIn("Expression patterns overlapping", schema.label)
179+
self.assertIn("Anatomy where", schema.label)
170180
self.assertEqual(schema.preview, 5)
171181
self.assertEqual(schema.preview_columns, ["id", "name", "tags", "pubs"])
172182

src/vfbquery/ha_api.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,8 @@ async def security_middleware(request, handler):
300300
"epFrag": "get_expression_pattern_fragments",
301301

302302
# Expression
303-
"ExpressionOverlapsHere": "get_expression_overlaps_here",
303+
"AnatomyExpressedIn": "get_expression_overlaps_here",
304+
"ExpressionOverlapsHere": "get_expression_overlaps_here", # deprecated alias of AnatomyExpressedIn (kept for bookmarked URLs)
304305
"TransgeneExpressionHere": "get_transgene_expression_here",
305306

306307
# Transcriptomics

src/vfbquery/vfb_queries.py

Lines changed: 117 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -888,12 +888,21 @@ def term_info_parse_object(results, short_form):
888888
if termInfo["SuperTypes"] and contains_all_tags(termInfo["SuperTypes"], ["Class", "Expression_pattern"]):
889889
q = epFrag_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
890890
queries.append(q)
891-
892-
# ExpressionOverlapsHere query - for anatomical regions
893-
# Matches XMI criteria: Class + Anatomy
894-
# Returns expression patterns that overlap with the anatomical region
895-
if termInfo["SuperTypes"] and contains_all_tags(termInfo["SuperTypes"], ["Class", "Anatomy"]):
896-
q = ExpressionOverlapsHere_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
891+
892+
# AnatomyExpressedIn query - for expression patterns / fragments
893+
# Matches XMI criteria: Class + Expression_pattern OR
894+
# Class + Expression_pattern_fragment
895+
# Returns anatomy classes where this expression pattern is expressed.
896+
# Renamed from ExpressionOverlapsHere in v1.13.7 — the legacy emit
897+
# below (Class+Anatomy) was a misdirection: that path was offering
898+
# an inverse-direction query to anatomy entities, where it returned
899+
# zero. The forward-direction "transgene expression here" lookup
900+
# for anatomy entities is owned by TransgeneExpressionHere.
901+
if termInfo["SuperTypes"] and (
902+
contains_all_tags(termInfo["SuperTypes"], ["Class", "Expression_pattern"]) or
903+
contains_all_tags(termInfo["SuperTypes"], ["Class", "Expression_pattern_fragment"])
904+
):
905+
q = AnatomyExpressedIn_to_schema(termInfo["Name"], {"short_form": vfbTerm.term.core.short_form})
897906
queries.append(q)
898907

899908
# anatScRNAseqQuery query - for anatomical regions with scRNAseq data
@@ -1113,9 +1122,11 @@ def term_info_parse_object(results, short_form):
11131122
q = ImagesNeurons_to_schema(parent_label, {"short_form": parent_short_form})
11141123
queries.append(q)
11151124

1116-
if "Anatomy" in parent.types:
1117-
# ExpressionOverlapsHere query
1118-
q = ExpressionOverlapsHere_to_schema(parent_label, {"short_form": parent_short_form})
1125+
if "Expression_pattern" in parent.types or "Expression_pattern_fragment" in parent.types:
1126+
# AnatomyExpressedIn query (renamed from ExpressionOverlapsHere
1127+
# in v1.13.7 — the previous emit gated on "Anatomy" was
1128+
# forward-direction and a misdirection for this query).
1129+
q = AnatomyExpressedIn_to_schema(parent_label, {"short_form": parent_short_form})
11191130
queries.append(q)
11201131

11211132
if "Anatomy" in parent.types and "hasScRNAseq" in parent.types:
@@ -1740,25 +1751,40 @@ def epFrag_to_schema(name, take_default):
17401751
return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
17411752

17421753

1743-
def ExpressionOverlapsHere_to_schema(name, take_default):
1754+
def AnatomyExpressedIn_to_schema(name, take_default):
17441755
"""
1745-
Schema for ExpressionOverlapsHere query.
1746-
Finds expression patterns that overlap with a specified anatomical region.
1747-
1756+
Schema for AnatomyExpressedIn query (renamed from ExpressionOverlapsHere
1757+
in v1.13.7 to reflect its actual inverse-direction semantics).
1758+
1759+
Given an expression pattern, returns the anatomy classes in which the
1760+
pattern's Individuals overlap or are part_of anatomy Individuals.
1761+
17481762
XMI Source: https://raw.githubusercontent.com/VirtualFlyBrain/geppetto-vfb/master/model/vfb.xmi
1749-
1763+
17501764
Matching criteria from XMI:
1751-
- Class + Anatomy
1752-
1753-
Query chain: Neo4j anat_2_ep_query → process
1754-
Cypher query: MATCH (ep:Class:Expression_pattern)<-[ar:overlaps|part_of]-(anoni:Individual)-[:INSTANCEOF]->(anat:Class)
1755-
WHERE anat.short_form = $id
1765+
- Class + Expression_pattern
1766+
- Class + Expression_pattern_fragment
1767+
1768+
Cypher query:
1769+
MATCH (ep:Class:Expression_pattern)
1770+
<-[ar:overlaps|part_of]-(anoni:Individual)
1771+
-[:INSTANCEOF]->(anat:Class:Anatomy)
1772+
WHERE ep.short_form = $id
1773+
1774+
Backward compat: the legacy `ExpressionOverlapsHere` query_type is
1775+
still accepted by ha_api.QUERY_TYPE_MAP and dispatches to the same
1776+
underlying function — pre-existing bookmarked URLs continue to work.
17561777
"""
1757-
query = "ExpressionOverlapsHere"
1758-
label = f"Expression patterns overlapping {name}"
1778+
query = "AnatomyExpressedIn"
1779+
label = f"Anatomy where {name} is expressed"
17591780
function = "get_expression_overlaps_here"
17601781
takes = {
1761-
"short_form": {"$and": ["Class", "Anatomy"]},
1782+
"short_form": {
1783+
"$or": [
1784+
{"$and": ["Class", "Expression_pattern"]},
1785+
{"$and": ["Class", "Expression_pattern_fragment"]},
1786+
]
1787+
},
17621788
"default": take_default,
17631789
}
17641790
preview = 5
@@ -1767,6 +1793,11 @@ def ExpressionOverlapsHere_to_schema(name, take_default):
17671793
return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)
17681794

17691795

1796+
# Deprecated alias — kept so any direct importer of the old name keeps
1797+
# working. New code should call AnatomyExpressedIn_to_schema directly.
1798+
ExpressionOverlapsHere_to_schema = AnatomyExpressedIn_to_schema
1799+
1800+
17701801
def anatScRNAseqQuery_to_schema(name, take_default):
17711802
"""
17721803
Schema for anatScRNAseqQuery query.
@@ -2764,86 +2795,90 @@ def get_individual_neuron_inputs(neuron_short_form: str, return_dataframe=True,
27642795
return results
27652796

27662797

2767-
def get_expression_overlaps_here(anatomy_short_form: str, return_dataframe=True, limit: int = -1):
2768-
"""
2769-
Retrieve expression patterns that overlap with the specified anatomical region.
2770-
2771-
This implements the ExpressionOverlapsHere query from the VFB XMI specification.
2772-
Finds expression patterns where individual instances overlap with or are part of the anatomy.
2773-
2774-
:param anatomy_short_form: Short form identifier of the anatomical region (e.g., 'FBbt_00003982')
2775-
:param return_dataframe: Returns pandas DataFrame if True, otherwise returns formatted dict (default: True)
2776-
:param limit: Maximum number of results to return (default: -1 for all results)
2777-
:return: Expression patterns with overlap relationships, publications, and images
2778-
:rtype: pandas.DataFrame or dict
2798+
def get_expression_overlaps_here(expression_pattern_short_form: str, return_dataframe=True, limit: int = -1):
2799+
"""Anatomy classes overlapped by the specified expression pattern.
2800+
2801+
INVERSE direction of TransgeneExpressionHere — given an expression
2802+
pattern, return the anatomy classes whose Individuals are overlapped
2803+
by (or part_of) the expression pattern's Individuals. Matches the
2804+
XMI ExpressionOverlapsHere CompoundRefQuery's description
2805+
("Anatomy $NAME is expressed in") and its matchingCriteria
2806+
(Class + Expression_pattern).
2807+
2808+
Up to v1.13.5 this function shipped as the FORWARD direction
2809+
(anatomy -> expression patterns), duplicating
2810+
get_transgene_expression_here exactly and returning 0 for any actual
2811+
expression pattern input — a migration regression from the legacy
2812+
XMI which had a separate inverse query "Query for anatomy from
2813+
expression" wired in dataSources[0]. v1.13.6 flips this function to
2814+
the inverse semantics so v2's ExpressionOverlapsHere on an expression
2815+
pattern (e.g. VFBexp_FBtp0001321 P{GAL4-per.BS}) returns the 50+
2816+
anatomy classes where the pattern is expressed.
2817+
2818+
Column shape is unchanged (id / name / tags / pubs) so v2's Geppetto
2819+
processor renders the table identically — only the column meaning
2820+
flips: id is now the anatomy short_form, name is the anatomy label.
2821+
2822+
:param expression_pattern_short_form: short_form of an
2823+
Expression_pattern Class (e.g. 'VFBexp_FBtp0001321')
2824+
:param return_dataframe: pandas DataFrame if True else formatted dict
2825+
:param limit: -1 for all results, otherwise cap on row count
27792826
"""
2780-
2781-
# Count query: count distinct expression patterns
27822827
count_query = f"""
2783-
MATCH (ep:Class:Expression_pattern)<-[ar:overlaps|part_of]-(anoni:Individual)-[:INSTANCEOF]->(anat:Class)
2784-
WHERE anat.short_form = '{anatomy_short_form}'
2785-
RETURN COUNT(DISTINCT ep) AS total_count
2828+
MATCH (ep:Class:Expression_pattern)<-[ar:overlaps|part_of]-(anoni:Individual)-[:INSTANCEOF]->(anat:Class:Anatomy)
2829+
WHERE ep.short_form = '{expression_pattern_short_form}'
2830+
RETURN COUNT(DISTINCT anat) AS total_count
27862831
"""
2787-
2832+
27882833
count_results = vc.nc.commit_list([count_query])
27892834
count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
27902835
total_count = count_df['total_count'][0] if not count_df.empty else 0
2791-
2792-
# Main query: get expression patterns with details
2836+
27932837
main_query = f"""
2794-
MATCH (ep:Class:Expression_pattern)<-[ar:overlaps|part_of]-(anoni:Individual)-[:INSTANCEOF]->(anat:Class)
2795-
WHERE anat.short_form = '{anatomy_short_form}'
2838+
MATCH (ep:Class:Expression_pattern)<-[ar:overlaps|part_of]-(anoni:Individual)-[:INSTANCEOF]->(anat:Class:Anatomy)
2839+
WHERE ep.short_form = '{expression_pattern_short_form}'
27962840
WITH DISTINCT collect(DISTINCT ar.pub[0]) as pubs, anat, ep
27972841
UNWIND pubs as p
27982842
OPTIONAL MATCH (pub:pub {{ short_form: p}})
2799-
WITH anat, ep, collect({{
2800-
core: {{ short_form: pub.short_form, label: coalesce(pub.label,''), iri: pub.iri, types: labels(pub), symbol: coalesce(pub.symbol[0], '') }},
2801-
PubMed: coalesce(pub.PMID[0], ''),
2802-
FlyBase: coalesce(([]+pub.FlyBase)[0], ''),
2803-
DOI: coalesce(pub.DOI[0], '')
2843+
WITH anat, ep, collect({{
2844+
core: {{ short_form: pub.short_form, label: coalesce(pub.label,''), iri: pub.iri, types: labels(pub), symbol: coalesce(pub.symbol[0], '') }},
2845+
PubMed: coalesce(pub.PMID[0], ''),
2846+
FlyBase: coalesce(([]+pub.FlyBase)[0], ''),
2847+
DOI: coalesce(pub.DOI[0], '')
28042848
}}) as pubs
2805-
RETURN
2806-
ep.short_form AS id,
2807-
apoc.text.format("[%s](%s)", [ep.label, ep.short_form]) AS name,
2808-
apoc.text.join(ep.uniqueFacets, '|') AS tags,
2849+
RETURN
2850+
anat.short_form AS id,
2851+
apoc.text.format("[%s](%s)", [anat.label, anat.short_form]) AS name,
2852+
apoc.text.join(coalesce(anat.uniqueFacets, []), '|') AS tags,
28092853
pubs
2810-
ORDER BY ep.label
2854+
ORDER BY anat.label
28112855
"""
2812-
2856+
28132857
if limit != -1:
28142858
main_query += f" LIMIT {limit}"
2815-
2816-
# Execute the query
2859+
28172860
results = vc.nc.commit_list([main_query])
2818-
2819-
# Convert to DataFrame
28202861
df = pd.DataFrame.from_records(get_dict_cursor()(results))
2821-
2822-
# Encode markdown links
2862+
28232863
if not df.empty:
2824-
columns_to_encode = ['name']
2825-
df = encode_markdown_links(df, columns_to_encode)
2826-
2864+
df = encode_markdown_links(df, ['name'])
2865+
28272866
if return_dataframe:
28282867
return df
2829-
else:
2830-
formatted_results = {
2831-
"headers": {
2832-
"id": {"title": "ID", "type": "selection_id", "order": -1},
2833-
"name": {"title": "Expression Pattern", "type": "markdown", "order": 0},
2834-
"tags": {"title": "Tags", "type": "tags", "order": 1},
2835-
"pubs": {"title": "Publications", "type": "metadata", "order": 2}
2836-
},
2837-
"rows": [
2838-
{
2839-
key: row[key]
2840-
for key in ["id", "name", "tags", "pubs"]
2841-
}
2842-
for row in safe_to_dict(df, sort_by_id=False)
2843-
],
2844-
"count": total_count
2845-
}
2846-
return formatted_results
2868+
2869+
return {
2870+
"headers": {
2871+
"id": {"title": "ID", "type": "selection_id", "order": -1},
2872+
"name": {"title": "Anatomy", "type": "markdown", "order": 0},
2873+
"tags": {"title": "Tags", "type": "tags", "order": 1},
2874+
"pubs": {"title": "Publications", "type": "metadata", "order": 2},
2875+
},
2876+
"rows": [
2877+
{key: row[key] for key in ["id", "name", "tags", "pubs"]}
2878+
for row in safe_to_dict(df, sort_by_id=False)
2879+
],
2880+
"count": total_count,
2881+
}
28472882

28482883

28492884
def contains_all_tags(lst: List[str], tags: List[str]) -> bool:

0 commit comments

Comments
 (0)