@@ -1971,7 +1971,7 @@ def AllDatasets_to_schema(name, take_default):
19711971
19721972def TermsForPub_to_schema (name , take_default ):
19731973 """Schema for TermsForPub query."""
1974- return Query (query = "TermsForPub" , label = f"Terms referencing { name } " , function = "get_terms_for_pub" , takes = {"short_form" : {"$and" : ["Individual" , "pub" ]}, "default" : take_default }, preview = 10 , preview_columns = ["id" , "name" , "tags" , "type" ])
1974+ return Query (query = "TermsForPub" , label = f"Terms referencing { name } " , function = "get_terms_for_pub" , takes = {"short_form" : {"$and" : ["Individual" , "pub" ]}, "default" : take_default }, preview = 10 , preview_columns = ["id" , "name" , "reference_type" , " tags" , "type" ])
19751975
19761976
19771977def TransgeneExpressionHere_to_schema (name , take_default ):
@@ -5551,27 +5551,30 @@ def get_terms_for_pub(pub_short_form: str, return_dataframe=True, limit: int = -
55515551 these cells empty — matches v2 prod which shows the columns blank
55525552 on dataset rows (e.g. Wolff2018).
55535553 """
5554- count_query = f"MATCH (:pub:Individual {{short_form:'{ pub_short_form } '}})<-[:has_reference]-(primary:Individual) RETURN count(DISTINCT primary) AS count"
5555- count_results = vc .nc .commit_list ([count_query ])
5556- total_count = get_dict_cursor ()(count_results )[0 ]['count' ] if count_results else 0
5557-
5558- # Apply LIMIT before the CALL subquery fires so the multi-hop walk
5559- # only runs on the rows we actually return — same pattern as
5560- # AnatomyExpressedIn / TransgeneExpressionHere.
5561- limit_clause = f"LIMIT { limit } " if limit != - 1 else ""
5562- main_query = f"""
5554+ # A publication is cited two different ways in the graph, and the legacy
5555+ # TermsForPub only saw the first:
5556+ # 1. Reference — a term has a direct (:term)-[:has_reference]->(:pub)
5557+ # edge (datasets, images, anatomy the paper is the
5558+ # source/citation for).
5559+ # 2. Expression — the pub is recorded as a `pub` array PROPERTY on an
5560+ # overlaps/part_of relationship of an expression-pattern
5561+ # individual (the same model AnatomyExpressedIn /
5562+ # TransgeneExpressionHere read via `r.pub`). Expression-
5563+ # data papers (e.g. FBrf0232433, VT-GAL4 lines) have NO
5564+ # has_reference edges at all, so the old query returned
5565+ # nothing despite thousands of referenced patterns.
5566+ # We surface both and add a "Reference type" column so users can tell why
5567+ # each term is listed. NB: the Expression branch scans overlaps/part_of by
5568+ # the relationship `pub` property (no node path exists — the pub node has
5569+ # no edges), so it is the expensive leg; the whole query is cached.
5570+
5571+ # Source 1: direct has_reference terms, with image enrichment for
5572+ # channel-image primaries (one representative image via the CALL).
5573+ ref_query = f"""
55635574 MATCH (:pub:Individual {{short_form:'{ pub_short_form } '}})<-[:has_reference]-(primary:Individual)
55645575 OPTIONAL MATCH (primary)-[:INSTANCEOF]->(typ:Class)
55655576 WITH DISTINCT primary, typ
5566- ORDER BY primary.label
5567- { limit_clause }
55685577 CALL {{
5569- // primary is the channel itself when it's a channel_image —
5570- // walk to its template alignment + imaging technique.
5571- // For non-image primaries (dataset, EP, anatomy) these
5572- // OPTIONAL MATCHes return null and the row's
5573- // template / technique / thumbnail cells render empty,
5574- // matching v2 prod's behaviour on dataset rows.
55755578 WITH primary
55765579 OPTIONAL MATCH (primary)-[irw:in_register_with]->(template:Individual)-[:depicts]->(template_anat:Individual)
55775580 OPTIONAL MATCH (primary)-[:is_specified_output_of]->(technique:Class)
@@ -5584,13 +5587,51 @@ def get_terms_for_pub(pub_short_form: str, return_dataframe=True, limit: int = -
55845587 apoc.text.format("[%s](%s)", [primary.label, primary.short_form]) AS name,
55855588 apoc.text.join(coalesce(primary.uniqueFacets, []), '|') AS tags,
55865589 REPLACE(apoc.text.format("[%s](%s)", [typ.label, typ.short_form]), '[null](null)', '') AS type,
5590+ 'Reference' AS reference_type,
55875591 REPLACE(apoc.text.format("[%s](%s)", [CASE WHEN template_anat.symbol[0] <> '' THEN template_anat.symbol[0] ELSE template_anat.label END, template_anat.short_form]), '[null](null)', '') AS template,
55885592 coalesce(technique.label, '') AS technique,
55895593 REPLACE(apoc.text.format("[](%s)", [coalesce(primary.label, 'image') + " aligned to " + CASE WHEN template_anat.symbol[0] <> '' THEN template_anat.symbol[0] ELSE template_anat.label END, REPLACE(COALESCE(irw.thumbnail[0], ''), 'thumbnailT.png', 'thumbnail.png'), coalesce(primary.label, 'image') + " aligned to " + CASE WHEN template_anat.symbol[0] <> '' THEN template_anat.symbol[0] ELSE template_anat.label END, template_anat.short_form + "," + primary.short_form]), "[](null)", "") AS thumbnail
55905594 """
55915595
5592- results = vc .nc .commit_list ([main_query ])
5593- df = pd .DataFrame .from_records (get_dict_cursor ()(results ))
5596+ # Source 2: expression patterns whose overlaps/part_of edges cite this pub.
5597+ exp_query = f"""
5598+ MATCH (:Individual)-[r:overlaps|part_of]->(b:Class:Expression_pattern)
5599+ WHERE '{ pub_short_form } ' IN r.pub
5600+ WITH DISTINCT b
5601+ RETURN
5602+ b.short_form AS id,
5603+ apoc.text.format("[%s](%s)", [b.label, b.short_form]) AS name,
5604+ apoc.text.join(coalesce(b.uniqueFacets, []), '|') AS tags,
5605+ '' AS type,
5606+ 'Expression' AS reference_type,
5607+ '' AS template,
5608+ '' AS technique,
5609+ '' AS thumbnail
5610+ """
5611+
5612+ df_ref = pd .DataFrame .from_records (get_dict_cursor ()(vc .nc .commit_list ([ref_query ])))
5613+ df_exp = pd .DataFrame .from_records (get_dict_cursor ()(vc .nc .commit_list ([exp_query ])))
5614+ df = pd .concat ([df_ref , df_exp ], ignore_index = True , sort = False )
5615+
5616+ if not df .empty :
5617+ # A term could be cited both ways — collapse to one row per term and
5618+ # join its reference types (e.g. "Expression; Reference").
5619+ df = (df .groupby ('id' , as_index = False , sort = False )
5620+ .agg ({
5621+ 'name' : 'first' ,
5622+ 'tags' : 'first' ,
5623+ 'type' : 'first' ,
5624+ 'reference_type' : lambda s : '; ' .join (sorted ({x for x in s if x })),
5625+ 'template' : 'first' ,
5626+ 'technique' : 'first' ,
5627+ 'thumbnail' : 'first' ,
5628+ }))
5629+ df = df .sort_values ('name' , kind = 'stable' ).reset_index (drop = True )
5630+
5631+ total_count = len (df )
5632+ if limit != - 1 :
5633+ df = df .head (limit )
5634+
55945635 if not df .empty :
55955636 df = encode_markdown_links (df , ['name' , 'template' , 'thumbnail' ])
55965637
@@ -5599,16 +5640,17 @@ def get_terms_for_pub(pub_short_form: str, return_dataframe=True, limit: int = -
55995640
56005641 return {
56015642 "headers" : {
5602- "id" : {"title" : "ID" , "type" : "selection_id" , "order" : - 1 },
5603- "name" : {"title" : "Term" , "type" : "markdown" , "order" : 0 },
5604- "tags" : {"title" : "Tags" , "type" : "tags" , "order" : 1 },
5605- "type" : {"title" : "Type" , "type" : "text" , "order" : 2 },
5606- "template" : {"title" : "Template" , "type" : "markdown" , "order" : 3 },
5607- "technique" : {"title" : "Imaging Technique" , "type" : "text" , "order" : 4 },
5608- "thumbnail" : {"title" : "Thumbnail" , "type" : "markdown" , "order" : 9 },
5643+ "id" : {"title" : "ID" , "type" : "selection_id" , "order" : - 1 },
5644+ "name" : {"title" : "Term" , "type" : "markdown" , "order" : 0 },
5645+ "reference_type" : {"title" : "Reference type" , "type" : "text" , "order" : 1 },
5646+ "tags" : {"title" : "Tags" , "type" : "tags" , "order" : 2 },
5647+ "type" : {"title" : "Type" , "type" : "text" , "order" : 3 },
5648+ "template" : {"title" : "Template" , "type" : "markdown" , "order" : 4 },
5649+ "technique" : {"title" : "Imaging Technique" , "type" : "text" , "order" : 5 },
5650+ "thumbnail" : {"title" : "Thumbnail" , "type" : "markdown" , "order" : 9 },
56095651 },
56105652 "rows" : [
5611- {k : row [k ] for k in ["id" , "name" , "tags" , "type" , "template" , "technique" , "thumbnail" ]}
5653+ {k : row [k ] for k in ["id" , "name" , "reference_type" , " tags" , "type" , "template" , "technique" , "thumbnail" ]}
56125654 for row in safe_to_dict (df , sort_by_id = False )
56135655 ],
56145656 "count" : total_count ,
0 commit comments