Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 25 additions & 6 deletions src/vfbquery/vfb_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -4872,6 +4872,13 @@ def _dataset_return_clause(ds_var: str = "ds") -> str:
get_all_datasets. Matches v2 prod columns:
id, name, pubs(Reference), tags(Gross_Type), license, template,
technique, thumbnail, image_count.

NB: no ORDER BY here — the caller applies LIMIT (and any ORDER BY)
after ``WITH DISTINCT ds`` and BEFORE the CALL subqueries fire, so
we only enrich the rows we actually need. Otherwise 130 datasets
× 4 CALL subqueries (one of which counts edges over millions of
``has_source`` relationships) easily breaches the 3 s perf-test
threshold.
"""
return f"""
RETURN
Expand All @@ -4884,7 +4891,6 @@ def _dataset_return_clause(ds_var: str = "ds") -> str:
coalesce(technique.label, '') AS technique,
REPLACE(apoc.text.format("[![%s](%s '%s')](%s)", [COALESCE(i.symbol[0], coalesce(i.label, 'image')) + " aligned to " + COALESCE(templ.symbol[0], templ.label), REPLACE(COALESCE(irw.thumbnail[0], ''), 'thumbnailT.png', 'thumbnail.png'), COALESCE(i.symbol[0], coalesce(i.label, 'image')) + " aligned to " + COALESCE(templ.symbol[0], templ.label), templ.short_form + "," + coalesce(i.short_form, {ds_var}.short_form)]), "[![null]( 'null')](null)", "") AS thumbnail,
image_count
ORDER BY name
"""


Expand Down Expand Up @@ -4921,11 +4927,18 @@ def get_aligned_datasets(template_short_form: str, return_dataframe=True, limit:
count_results = vc.nc.commit_list([count_query])
total_count = get_dict_cursor()(count_results)[0]['count'] if count_results else 0

# LIMIT applied AFTER DISTINCT and BEFORE the CALL subqueries — otherwise
# all 86 (AlignedDatasets) / 130 (AllDatasets) datasets get enriched
# through 4 CALL subqueries (one of which counts has_source edges) and
# the limit only trims afterwards. That blew past the THRESHOLD_MEDIUM
# (3 s) perf-test budget on CI.
limit_clause = f"LIMIT {limit}" if limit != -1 else ""
main_query = f"""MATCH (ds:DataSet:Individual) WHERE NOT ds:Deprecated AND (:Template:Individual {{short_form:'{template_short_form}'}})<-[:depicts]-(:Template:Individual)-[:in_register_with]-(:Individual)-[:depicts]->(:Individual)-[:has_source]->(ds)
WITH DISTINCT ds
ORDER BY coalesce(ds.label, ds.short_form)
{limit_clause}
{_dataset_enrichment_cypher('ds')}
{_dataset_return_clause('ds')}"""
if limit != -1: main_query += f" LIMIT {limit}"

results = vc.nc.commit_list([main_query])
df = pd.DataFrame.from_records(get_dict_cursor()(results))
Expand All @@ -4945,11 +4958,13 @@ def get_all_datasets(return_dataframe=True, limit: int = -1):
count_results = vc.nc.commit_list([count_query])
total_count = get_dict_cursor()(count_results)[0]['count'] if count_results else 0

limit_clause = f"LIMIT {limit}" if limit != -1 else ""
main_query = f"""MATCH (ds:DataSet:Individual) WHERE NOT ds:Deprecated AND (:Template:Individual)<-[:depicts]-(:Template:Individual)-[:in_register_with]-(:Individual)-[:depicts]->(:Individual)-[:has_source]->(ds)
WITH DISTINCT ds
ORDER BY coalesce(ds.label, ds.short_form)
{limit_clause}
{_dataset_enrichment_cypher('ds')}
{_dataset_return_clause('ds')}"""
if limit != -1: main_query += f" LIMIT {limit}"

results = vc.nc.commit_list([main_query])
df = pd.DataFrame.from_records(get_dict_cursor()(results))
Expand Down Expand Up @@ -5016,10 +5031,17 @@ def get_transgene_expression_here(anatomy_short_form: str, return_dataframe=True
count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results))
total_count = count_df['total_count'][0] if not count_df.empty else 0

# Same as get_aligned_datasets: apply LIMIT before the CALL subqueries
# fire so we only enrich the rows we actually need. With 2,340
# mushroom-body EPs and a 5-hop thumbnail join inside the CALL, the
# naive "append LIMIT at the end" form ran for tens of seconds.
limit_clause = f"LIMIT {limit}" if limit != -1 else ""
main_query = f"""
MATCH (ep:Class:Expression_pattern)<-[ar:overlaps|part_of]-(:Individual)-[:INSTANCEOF]->(anat:Class)
WHERE anat.short_form = '{anatomy_short_form}'
WITH DISTINCT ep
ORDER BY ep.label
{limit_clause}
CALL {{
WITH ep
OPTIONAL MATCH (ep)<-[:overlaps|part_of]-(:Individual)-[:has_reference|pub]->(p:pub)
Expand All @@ -5040,10 +5062,7 @@ def get_transgene_expression_here(anatomy_short_form: str, return_dataframe=True
REPLACE(apoc.text.format("[%s](%s)", [COALESCE(templ.symbol[0], templ.label), templ.short_form]), '[null](null)', '') AS template,
coalesce(technique.label, '') AS technique,
REPLACE(apoc.text.format("[![%s](%s '%s')](%s)", [COALESCE(i.symbol[0], coalesce(i.label, 'image')) + " aligned to " + COALESCE(templ.symbol[0], templ.label), REPLACE(COALESCE(irw.thumbnail[0], ''), 'thumbnailT.png', 'thumbnail.png'), COALESCE(i.symbol[0], coalesce(i.label, 'image')) + " aligned to " + COALESCE(templ.symbol[0], templ.label), templ.short_form + "," + coalesce(i.short_form, ep.short_form)]), "[![null]( 'null')](null)", "") AS thumbnail
ORDER BY ep.label
"""
if limit != -1:
main_query += f" LIMIT {limit}"

results = vc.nc.commit_list([main_query])
df = pd.DataFrame.from_records(get_dict_cursor()(results))
Expand Down
Loading