@@ -4872,6 +4872,13 @@ def _dataset_return_clause(ds_var: str = "ds") -> str:
48724872 get_all_datasets. Matches v2 prod columns:
48734873 id, name, pubs(Reference), tags(Gross_Type), license, template,
48744874 technique, thumbnail, image_count.
4875+
4876+ NB: no ORDER BY here — the caller applies LIMIT (and any ORDER BY)
4877+ after ``WITH DISTINCT ds`` and BEFORE the CALL subqueries fire, so
4878+ we only enrich the rows we actually need. Otherwise 130 datasets
4879+ × 4 CALL subqueries (one of which counts edges over millions of
4880+ ``has_source`` relationships) easily breaches the 3 s perf-test
4881+ threshold.
48754882 """
48764883 return f"""
48774884 RETURN
@@ -4884,7 +4891,6 @@ def _dataset_return_clause(ds_var: str = "ds") -> str:
48844891 coalesce(technique.label, '') AS technique,
48854892 REPLACE(apoc.text.format("[](%s)", [COALESCE(i.symbol[0], coalesce(i.label, 'image')) + " aligned to " + COALESCE(templ.symbol[0], templ.label), REPLACE(COALESCE(irw.thumbnail[0], ''), 'thumbnailT.png', 'thumbnail.png'), COALESCE(i.symbol[0], coalesce(i.label, 'image')) + " aligned to " + COALESCE(templ.symbol[0], templ.label), templ.short_form + "," + coalesce(i.short_form, { ds_var } .short_form)]), "[](null)", "") AS thumbnail,
48864893 image_count
4887- ORDER BY name
48884894 """
48894895
48904896
@@ -4921,11 +4927,18 @@ def get_aligned_datasets(template_short_form: str, return_dataframe=True, limit:
49214927 count_results = vc .nc .commit_list ([count_query ])
49224928 total_count = get_dict_cursor ()(count_results )[0 ]['count' ] if count_results else 0
49234929
4930+ # LIMIT applied AFTER DISTINCT and BEFORE the CALL subqueries — otherwise
4931+ # all 86 (AlignedDatasets) / 130 (AllDatasets) datasets get enriched
4932+ # through 4 CALL subqueries (one of which counts has_source edges) and
4933+ # the limit only trims afterwards. That blew past the THRESHOLD_MEDIUM
4934+ # (3 s) perf-test budget on CI.
4935+ limit_clause = f"LIMIT { limit } " if limit != - 1 else ""
49244936 main_query = f"""MATCH (ds:DataSet:Individual) WHERE NOT ds:Deprecated AND (:Template:Individual {{short_form:'{ template_short_form } '}})<-[:depicts]-(:Template:Individual)-[:in_register_with]-(:Individual)-[:depicts]->(:Individual)-[:has_source]->(ds)
49254937 WITH DISTINCT ds
4938+ ORDER BY coalesce(ds.label, ds.short_form)
4939+ { limit_clause }
49264940 { _dataset_enrichment_cypher ('ds' )}
49274941 { _dataset_return_clause ('ds' )} """
4928- if limit != - 1 : main_query += f" LIMIT { limit } "
49294942
49304943 results = vc .nc .commit_list ([main_query ])
49314944 df = pd .DataFrame .from_records (get_dict_cursor ()(results ))
@@ -4945,11 +4958,13 @@ def get_all_datasets(return_dataframe=True, limit: int = -1):
49454958 count_results = vc .nc .commit_list ([count_query ])
49464959 total_count = get_dict_cursor ()(count_results )[0 ]['count' ] if count_results else 0
49474960
4961+ limit_clause = f"LIMIT { limit } " if limit != - 1 else ""
49484962 main_query = f"""MATCH (ds:DataSet:Individual) WHERE NOT ds:Deprecated AND (:Template:Individual)<-[:depicts]-(:Template:Individual)-[:in_register_with]-(:Individual)-[:depicts]->(:Individual)-[:has_source]->(ds)
49494963 WITH DISTINCT ds
4964+ ORDER BY coalesce(ds.label, ds.short_form)
4965+ { limit_clause }
49504966 { _dataset_enrichment_cypher ('ds' )}
49514967 { _dataset_return_clause ('ds' )} """
4952- if limit != - 1 : main_query += f" LIMIT { limit } "
49534968
49544969 results = vc .nc .commit_list ([main_query ])
49554970 df = pd .DataFrame .from_records (get_dict_cursor ()(results ))
@@ -5016,10 +5031,17 @@ def get_transgene_expression_here(anatomy_short_form: str, return_dataframe=True
50165031 count_df = pd .DataFrame .from_records (get_dict_cursor ()(count_results ))
50175032 total_count = count_df ['total_count' ][0 ] if not count_df .empty else 0
50185033
5034+ # Same as get_aligned_datasets: apply LIMIT before the CALL subqueries
5035+ # fire so we only enrich the rows we actually need. With 2,340
5036+ # mushroom-body EPs and a 5-hop thumbnail join inside the CALL, the
5037+ # naive "append LIMIT at the end" form ran for tens of seconds.
5038+ limit_clause = f"LIMIT { limit } " if limit != - 1 else ""
50195039 main_query = f"""
50205040 MATCH (ep:Class:Expression_pattern)<-[ar:overlaps|part_of]-(:Individual)-[:INSTANCEOF]->(anat:Class)
50215041 WHERE anat.short_form = '{ anatomy_short_form } '
50225042 WITH DISTINCT ep
5043+ ORDER BY ep.label
5044+ { limit_clause }
50235045 CALL {{
50245046 WITH ep
50255047 OPTIONAL MATCH (ep)<-[:overlaps|part_of]-(:Individual)-[:has_reference|pub]->(p:pub)
@@ -5040,10 +5062,7 @@ def get_transgene_expression_here(anatomy_short_form: str, return_dataframe=True
50405062 REPLACE(apoc.text.format("[%s](%s)", [COALESCE(templ.symbol[0], templ.label), templ.short_form]), '[null](null)', '') AS template,
50415063 coalesce(technique.label, '') AS technique,
50425064 REPLACE(apoc.text.format("[](%s)", [COALESCE(i.symbol[0], coalesce(i.label, 'image')) + " aligned to " + COALESCE(templ.symbol[0], templ.label), REPLACE(COALESCE(irw.thumbnail[0], ''), 'thumbnailT.png', 'thumbnail.png'), COALESCE(i.symbol[0], coalesce(i.label, 'image')) + " aligned to " + COALESCE(templ.symbol[0], templ.label), templ.short_form + "," + coalesce(i.short_form, ep.short_form)]), "[](null)", "") AS thumbnail
5043- ORDER BY ep.label
50445065 """
5045- if limit != - 1 :
5046- main_query += f" LIMIT { limit } "
50475066
50485067 results = vc .nc .commit_list ([main_query ])
50495068 df = pd .DataFrame .from_records (get_dict_cursor ()(results ))
0 commit comments