Skip to content

Commit 1ec1b74

Browse files
committed
Merge feature/termsforpub-reference-type: TermsForPub expression refs + Reference type column (v1.20.0)
2 parents 3a2bfc8 + f7a2146 commit 1ec1b74

2 files changed

Lines changed: 71 additions & 29 deletions

File tree

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
here = path.abspath(path.dirname(__file__))
55

6-
__version__ = "1.19.0"
6+
__version__ = "1.20.0"
77

88
# Get the long description from the README file
99
with open(path.join(here, 'README.md')) as f:

src/vfbquery/vfb_queries.py

Lines changed: 70 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1971,7 +1971,7 @@ def AllDatasets_to_schema(name, take_default):
19711971

19721972
def TermsForPub_to_schema(name, take_default):
19731973
"""Schema for TermsForPub query."""
1974-
return Query(query="TermsForPub", label=f"Terms referencing {name}", function="get_terms_for_pub", takes={"short_form": {"$and": ["Individual", "pub"]}, "default": take_default}, preview=10, preview_columns=["id", "name", "tags", "type"])
1974+
return Query(query="TermsForPub", label=f"Terms referencing {name}", function="get_terms_for_pub", takes={"short_form": {"$and": ["Individual", "pub"]}, "default": take_default}, preview=10, preview_columns=["id", "name", "reference_type", "tags", "type"])
19751975

19761976

19771977
def TransgeneExpressionHere_to_schema(name, take_default):
@@ -5551,27 +5551,30 @@ def get_terms_for_pub(pub_short_form: str, return_dataframe=True, limit: int = -
55515551
these cells empty — matches v2 prod which shows the columns blank
55525552
on dataset rows (e.g. Wolff2018).
55535553
"""
5554-
count_query = f"MATCH (:pub:Individual {{short_form:'{pub_short_form}'}})<-[:has_reference]-(primary:Individual) RETURN count(DISTINCT primary) AS count"
5555-
count_results = vc.nc.commit_list([count_query])
5556-
total_count = get_dict_cursor()(count_results)[0]['count'] if count_results else 0
5557-
5558-
# Apply LIMIT before the CALL subquery fires so the multi-hop walk
5559-
# only runs on the rows we actually return — same pattern as
5560-
# AnatomyExpressedIn / TransgeneExpressionHere.
5561-
limit_clause = f"LIMIT {limit}" if limit != -1 else ""
5562-
main_query = f"""
5554+
# A publication is cited two different ways in the graph, and the legacy
5555+
# TermsForPub only saw the first:
5556+
# 1. Reference — a term has a direct (:term)-[:has_reference]->(:pub)
5557+
# edge (datasets, images, anatomy the paper is the
5558+
# source/citation for).
5559+
# 2. Expression — the pub is recorded as a `pub` array PROPERTY on an
5560+
# overlaps/part_of relationship of an expression-pattern
5561+
# individual (the same model AnatomyExpressedIn /
5562+
# TransgeneExpressionHere read via `r.pub`). Expression-
5563+
# data papers (e.g. FBrf0232433, VT-GAL4 lines) have NO
5564+
# has_reference edges at all, so the old query returned
5565+
# nothing despite thousands of referenced patterns.
5566+
# We surface both and add a "Reference type" column so users can tell why
5567+
# each term is listed. NB: the Expression branch scans overlaps/part_of by
5568+
# the relationship `pub` property (no node path exists — the pub node has
5569+
# no edges), so it is the expensive leg; the whole query is cached.
5570+
5571+
# Source 1: direct has_reference terms, with image enrichment for
5572+
# channel-image primaries (one representative image via the CALL).
5573+
ref_query = f"""
55635574
MATCH (:pub:Individual {{short_form:'{pub_short_form}'}})<-[:has_reference]-(primary:Individual)
55645575
OPTIONAL MATCH (primary)-[:INSTANCEOF]->(typ:Class)
55655576
WITH DISTINCT primary, typ
5566-
ORDER BY primary.label
5567-
{limit_clause}
55685577
CALL {{
5569-
// primary is the channel itself when it's a channel_image —
5570-
// walk to its template alignment + imaging technique.
5571-
// For non-image primaries (dataset, EP, anatomy) these
5572-
// OPTIONAL MATCHes return null and the row's
5573-
// template / technique / thumbnail cells render empty,
5574-
// matching v2 prod's behaviour on dataset rows.
55755578
WITH primary
55765579
OPTIONAL MATCH (primary)-[irw:in_register_with]->(template:Individual)-[:depicts]->(template_anat:Individual)
55775580
OPTIONAL MATCH (primary)-[:is_specified_output_of]->(technique:Class)
@@ -5584,13 +5587,51 @@ def get_terms_for_pub(pub_short_form: str, return_dataframe=True, limit: int = -
55845587
apoc.text.format("[%s](%s)", [primary.label, primary.short_form]) AS name,
55855588
apoc.text.join(coalesce(primary.uniqueFacets, []), '|') AS tags,
55865589
REPLACE(apoc.text.format("[%s](%s)", [typ.label, typ.short_form]), '[null](null)', '') AS type,
5590+
'Reference' AS reference_type,
55875591
REPLACE(apoc.text.format("[%s](%s)", [CASE WHEN template_anat.symbol[0] <> '' THEN template_anat.symbol[0] ELSE template_anat.label END, template_anat.short_form]), '[null](null)', '') AS template,
55885592
coalesce(technique.label, '') AS technique,
55895593
REPLACE(apoc.text.format("[![%s](%s '%s')](%s)", [coalesce(primary.label, 'image') + " aligned to " + CASE WHEN template_anat.symbol[0] <> '' THEN template_anat.symbol[0] ELSE template_anat.label END, REPLACE(COALESCE(irw.thumbnail[0], ''), 'thumbnailT.png', 'thumbnail.png'), coalesce(primary.label, 'image') + " aligned to " + CASE WHEN template_anat.symbol[0] <> '' THEN template_anat.symbol[0] ELSE template_anat.label END, template_anat.short_form + "," + primary.short_form]), "[![null]( 'null')](null)", "") AS thumbnail
55905594
"""
55915595

5592-
results = vc.nc.commit_list([main_query])
5593-
df = pd.DataFrame.from_records(get_dict_cursor()(results))
5596+
# Source 2: expression patterns whose overlaps/part_of edges cite this pub.
5597+
exp_query = f"""
5598+
MATCH (:Individual)-[r:overlaps|part_of]->(b:Class:Expression_pattern)
5599+
WHERE '{pub_short_form}' IN r.pub
5600+
WITH DISTINCT b
5601+
RETURN
5602+
b.short_form AS id,
5603+
apoc.text.format("[%s](%s)", [b.label, b.short_form]) AS name,
5604+
apoc.text.join(coalesce(b.uniqueFacets, []), '|') AS tags,
5605+
'' AS type,
5606+
'Expression' AS reference_type,
5607+
'' AS template,
5608+
'' AS technique,
5609+
'' AS thumbnail
5610+
"""
5611+
5612+
df_ref = pd.DataFrame.from_records(get_dict_cursor()(vc.nc.commit_list([ref_query])))
5613+
df_exp = pd.DataFrame.from_records(get_dict_cursor()(vc.nc.commit_list([exp_query])))
5614+
df = pd.concat([df_ref, df_exp], ignore_index=True, sort=False)
5615+
5616+
if not df.empty:
5617+
# A term could be cited both ways — collapse to one row per term and
5618+
# join its reference types (e.g. "Expression; Reference").
5619+
df = (df.groupby('id', as_index=False, sort=False)
5620+
.agg({
5621+
'name': 'first',
5622+
'tags': 'first',
5623+
'type': 'first',
5624+
'reference_type': lambda s: '; '.join(sorted({x for x in s if x})),
5625+
'template': 'first',
5626+
'technique': 'first',
5627+
'thumbnail': 'first',
5628+
}))
5629+
df = df.sort_values('name', kind='stable').reset_index(drop=True)
5630+
5631+
total_count = len(df)
5632+
if limit != -1:
5633+
df = df.head(limit)
5634+
55945635
if not df.empty:
55955636
df = encode_markdown_links(df, ['name', 'template', 'thumbnail'])
55965637

@@ -5599,16 +5640,17 @@ def get_terms_for_pub(pub_short_form: str, return_dataframe=True, limit: int = -
55995640

56005641
return {
56015642
"headers": {
5602-
"id": {"title": "ID", "type": "selection_id", "order": -1},
5603-
"name": {"title": "Term", "type": "markdown", "order": 0},
5604-
"tags": {"title": "Tags", "type": "tags", "order": 1},
5605-
"type": {"title": "Type", "type": "text", "order": 2},
5606-
"template": {"title": "Template", "type": "markdown", "order": 3},
5607-
"technique": {"title": "Imaging Technique", "type": "text", "order": 4},
5608-
"thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9},
5643+
"id": {"title": "ID", "type": "selection_id", "order": -1},
5644+
"name": {"title": "Term", "type": "markdown", "order": 0},
5645+
"reference_type": {"title": "Reference type", "type": "text", "order": 1},
5646+
"tags": {"title": "Tags", "type": "tags", "order": 2},
5647+
"type": {"title": "Type", "type": "text", "order": 3},
5648+
"template": {"title": "Template", "type": "markdown", "order": 4},
5649+
"technique": {"title": "Imaging Technique", "type": "text", "order": 5},
5650+
"thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9},
56095651
},
56105652
"rows": [
5611-
{k: row[k] for k in ["id", "name", "tags", "type", "template", "technique", "thumbnail"]}
5653+
{k: row[k] for k in ["id", "name", "reference_type", "tags", "type", "template", "technique", "thumbnail"]}
56125654
for row in safe_to_dict(df, sort_by_id=False)
56135655
],
56145656
"count": total_count,

0 commit comments

Comments
 (0)