Skip to content

Commit f8d8618

Browse files
authored
Merge pull request #44 from VirtualFlyBrain/fix/perf-limit-before-call
Hotfix: apply LIMIT before CALL subqueries in dataset/transgene queries
2 parents 15a6a4a + ea64a35 commit f8d8618

1 file changed

Lines changed: 25 additions & 6 deletions

File tree

src/vfbquery/vfb_queries.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4872,6 +4872,13 @@ def _dataset_return_clause(ds_var: str = "ds") -> str:
48724872
get_all_datasets. Matches v2 prod columns:
48734873
id, name, pubs(Reference), tags(Gross_Type), license, template,
48744874
technique, thumbnail, image_count.
4875+
4876+
NB: no ORDER BY here — the caller applies LIMIT (and any ORDER BY)
4877+
after ``WITH DISTINCT ds`` and BEFORE the CALL subqueries fire, so
4878+
we only enrich the rows we actually need. Otherwise 130 datasets
4879+
× 4 CALL subqueries (one of which counts edges over millions of
4880+
``has_source`` relationships) easily breaches the 3 s perf-test
4881+
threshold.
48754882
"""
48764883
return f"""
48774884
RETURN
@@ -4884,7 +4891,6 @@ def _dataset_return_clause(ds_var: str = "ds") -> str:
48844891
coalesce(technique.label, '') AS technique,
48854892
REPLACE(apoc.text.format("[![%s](%s '%s')](%s)", [COALESCE(i.symbol[0], coalesce(i.label, 'image')) + " aligned to " + COALESCE(templ.symbol[0], templ.label), REPLACE(COALESCE(irw.thumbnail[0], ''), 'thumbnailT.png', 'thumbnail.png'), COALESCE(i.symbol[0], coalesce(i.label, 'image')) + " aligned to " + COALESCE(templ.symbol[0], templ.label), templ.short_form + "," + coalesce(i.short_form, {ds_var}.short_form)]), "[![null]( 'null')](null)", "") AS thumbnail,
48864893
image_count
4887-
ORDER BY name
48884894
"""
48894895

48904896

@@ -4921,11 +4927,18 @@ def get_aligned_datasets(template_short_form: str, return_dataframe=True, limit:
49214927
count_results = vc.nc.commit_list([count_query])
49224928
total_count = get_dict_cursor()(count_results)[0]['count'] if count_results else 0
49234929

4930+
# LIMIT applied AFTER DISTINCT and BEFORE the CALL subqueries — otherwise
4931+
# all 86 (AlignedDatasets) / 130 (AllDatasets) datasets get enriched
4932+
# through 4 CALL subqueries (one of which counts has_source edges) and
4933+
# the limit only trims afterwards. That blew past the THRESHOLD_MEDIUM
4934+
# (3 s) perf-test budget on CI.
4935+
limit_clause = f"LIMIT {limit}" if limit != -1 else ""
49244936
main_query = f"""MATCH (ds:DataSet:Individual) WHERE NOT ds:Deprecated AND (:Template:Individual {{short_form:'{template_short_form}'}})<-[:depicts]-(:Template:Individual)-[:in_register_with]-(:Individual)-[:depicts]->(:Individual)-[:has_source]->(ds)
49254937
WITH DISTINCT ds
4938+
ORDER BY coalesce(ds.label, ds.short_form)
4939+
{limit_clause}
49264940
{_dataset_enrichment_cypher('ds')}
49274941
{_dataset_return_clause('ds')}"""
4928-
if limit != -1: main_query += f" LIMIT {limit}"
49294942

49304943
results = vc.nc.commit_list([main_query])
49314944
df = pd.DataFrame.from_records(get_dict_cursor()(results))
@@ -4945,11 +4958,13 @@ def get_all_datasets(return_dataframe=True, limit: int = -1):
49454958
count_results = vc.nc.commit_list([count_query])
49464959
total_count = get_dict_cursor()(count_results)[0]['count'] if count_results else 0
49474960

4961+
limit_clause = f"LIMIT {limit}" if limit != -1 else ""
49484962
main_query = f"""MATCH (ds:DataSet:Individual) WHERE NOT ds:Deprecated AND (:Template:Individual)<-[:depicts]-(:Template:Individual)-[:in_register_with]-(:Individual)-[:depicts]->(:Individual)-[:has_source]->(ds)
49494963
WITH DISTINCT ds
4964+
ORDER BY coalesce(ds.label, ds.short_form)
4965+
{limit_clause}
49504966
{_dataset_enrichment_cypher('ds')}
49514967
{_dataset_return_clause('ds')}"""
4952-
if limit != -1: main_query += f" LIMIT {limit}"
49534968

49544969
results = vc.nc.commit_list([main_query])
49554970
df = pd.DataFrame.from_records(get_dict_cursor()(results))
@@ -5016,10 +5031,17 @@ def get_transgene_expression_here(anatomy_short_form: str, return_dataframe=True
50165031
count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
50175032
total_count = count_df['total_count'][0] if not count_df.empty else 0
50185033

5034+
# Same as get_aligned_datasets: apply LIMIT before the CALL subqueries
5035+
# fire so we only enrich the rows we actually need. With 2,340
5036+
# mushroom-body EPs and a 5-hop thumbnail join inside the CALL, the
5037+
# naive "append LIMIT at the end" form ran for tens of seconds.
5038+
limit_clause = f"LIMIT {limit}" if limit != -1 else ""
50195039
main_query = f"""
50205040
MATCH (ep:Class:Expression_pattern)<-[ar:overlaps|part_of]-(:Individual)-[:INSTANCEOF]->(anat:Class)
50215041
WHERE anat.short_form = '{anatomy_short_form}'
50225042
WITH DISTINCT ep
5043+
ORDER BY ep.label
5044+
{limit_clause}
50235045
CALL {{
50245046
WITH ep
50255047
OPTIONAL MATCH (ep)<-[:overlaps|part_of]-(:Individual)-[:has_reference|pub]->(p:pub)
@@ -5040,10 +5062,7 @@ def get_transgene_expression_here(anatomy_short_form: str, return_dataframe=True
50405062
REPLACE(apoc.text.format("[%s](%s)", [COALESCE(templ.symbol[0], templ.label), templ.short_form]), '[null](null)', '') AS template,
50415063
coalesce(technique.label, '') AS technique,
50425064
REPLACE(apoc.text.format("[![%s](%s '%s')](%s)", [COALESCE(i.symbol[0], coalesce(i.label, 'image')) + " aligned to " + COALESCE(templ.symbol[0], templ.label), REPLACE(COALESCE(irw.thumbnail[0], ''), 'thumbnailT.png', 'thumbnail.png'), COALESCE(i.symbol[0], coalesce(i.label, 'image')) + " aligned to " + COALESCE(templ.symbol[0], templ.label), templ.short_form + "," + coalesce(i.short_form, ep.short_form)]), "[![null]( 'null')](null)", "") AS thumbnail
5043-
ORDER BY ep.label
50445065
"""
5045-
if limit != -1:
5046-
main_query += f" LIMIT {limit}"
50475066

50485067
results = vc.nc.commit_list([main_query])
50495068
df = pd.DataFrame.from_records(get_dict_cursor()(results))

0 commit comments

Comments
 (0)