Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 62 additions & 7 deletions .github/workflows/performance-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@ jobs:
name: "Performance Test"
runs-on: ubuntu-latest
timeout-minutes: 60 # Set a timeout to prevent jobs from running indefinitely

defaults:
run:
# pipefail so `python -m unittest ... | tee` propagates unittest's exit
# status instead of always returning tee's 0.
shell: bash -o pipefail -e {0}

steps:
- uses: actions/checkout@v4

Expand All @@ -41,15 +46,36 @@ jobs:
run: |
python -m unittest src.test.test_query_performance -v 2>&1 | tee performance_test_output.log

- name: Run Legacy Performance Test
- name: Run Legacy Performance Test
# Always run, even if the previous test step failed, so we still get
# the report data and don't mask additional regressions.
if: always()
env:
VFBQUERY_CACHE_ENABLED: 'true'
MPLBACKEND: 'Agg'
VISPY_GL_LIB: 'osmesa'
VISPY_USE_EGL: '0'
run: |
python -m unittest -v src.test.term_info_queries_test.TermInfoQueriesTest.test_term_info_performance 2>&1 | tee -a performance_test_output.log


- name: Run Connectivity Tests
if: always()
env:
VFBQUERY_CACHE_ENABLED: 'true'
MPLBACKEND: 'Agg'
VISPY_GL_LIB: 'osmesa'
VISPY_USE_EGL: '0'
run: |
# These files are pytest-style (plain classes + @pytest.mark.integration).
# Run with pytest so the markers are honoured and collection works.
pytest -v \
src/test/test_neuron_neuron_connectivity.py \
src/test/test_neuron_region_connectivity.py \
src/test/test_upstream_class_connectivity.py \
src/test/test_downstream_class_connectivity.py \
src/test/test_vfb_connectivity.py \
2>&1 | tee -a performance_test_output.log

- name: Create Performance Report
if: always() # Always run this step, even if the test fails
run: |
Expand Down Expand Up @@ -148,9 +174,21 @@ jobs:

EOF

# Check overall test status
if grep -q "OK" performance_test_output.log || grep -q "Ran.*test" performance_test_output.log; then
echo "✅ **Test Status**: Performance tests completed" >> performance.md
# Check overall test status. Note: matching "OK" or "ok" would
# false-positive on per-test "test_foo ... ok" lines emitted by
# unittest -v even when other tests failed. Use the absence of
# FAIL:/ERROR: lines as the truth source (mirrors the final
# "Fail job on test failures" step).
# unittest summary: "Ran N tests in Xs".
# pytest summary line ends with " in X.XXs" prefixed by " passed", " failed",
# " error", or "no tests ran". Match either runner's summary markers.
if grep -q "Ran .* test\| passed in \| failed in \| error in \|no tests ran" performance_test_output.log; then
# unittest emits "FAIL:" / "ERROR:"; pytest emits "FAILED " / "ERROR " (no colon).
if grep -q "FAIL:\|ERROR:\|FAILED\b\|^ERROR\b" performance_test_output.log; then
echo "❌ **Test Status**: Performance tests ran but reported failures" >> performance.md
else
echo "✅ **Test Status**: Performance tests completed" >> performance.md
fi
echo "" >> performance.md

# Count successes and failures
Expand All @@ -177,7 +215,7 @@ jobs:
echo "|-------|----------|--------|" >> performance.md

# Parse timing information
grep -E "^(get_term_info|NeuronsPartHere|NeuronsSynaptic|NeuronsPresynapticHere|NeuronsPostsynapticHere|ComponentsOf|PartsOf|SubclassesOf|NeuronClassesFasciculatingHere|TractsNervesInnervatingHere|LineageClonesIn|ListAllAvailableImages):" performance_test_output.log | while read line; do
grep -E "^(get_term_info|NeuronsPartHere|NeuronsSynaptic|NeuronsPresynapticHere|NeuronsPostsynapticHere|ComponentsOf|PartsOf|SubclassesOf|NeuronClassesFasciculatingHere|TractsNervesInnervatingHere|LineageClonesIn|ListAllAvailableImages|NeuronNeuronConnectivityQuery|NeuronRegionConnectivityQuery|NeuronInputsTo|DownstreamClassConnectivity|UpstreamClassConnectivity|QueryConnectivity):" performance_test_output.log | while read line; do
QUERY=$(echo "$line" | sed 's/:.*//')
DURATION=$(echo "$line" | sed 's/.*: \([0-9.]*\)s.*/\1/')
if echo "$line" | grep -q "✅"; then
Expand Down Expand Up @@ -233,3 +271,20 @@ jobs:
git add performance.md
git diff --staged --quiet || git commit -m "Update performance test results [skip ci]"
git push origin HEAD:main

- name: Fail job on test failures
# Belt-and-braces: pipefail on the test steps should already make the
# job red on any unittest failure. This grep is a safety net in case a
# future test runner emits FAIL/ERROR lines without a non-zero exit
# (e.g. partial runs, swallowed pipelines). Runs after the report and
# commit so those still happen.
if: always()
run: |
# Match both unittest format ("FAIL:" / "ERROR:") and pytest format
# ("FAILED " / "ERROR " — no colon) so this catches either runner.
if grep -q "FAIL:\|ERROR:\|FAILED\b\|^ERROR\b" performance_test_output.log; then
echo "::error::Test run reported FAIL or ERROR lines in performance_test_output.log"
grep "FAIL:\|ERROR:\|FAILED\b\|^ERROR\b" performance_test_output.log
exit 1
fi
echo "No FAIL/ERROR lines detected."
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ requests
pysolr
get_version
aiohttp
psycopg[binary]>=3.0
psycopg[binary]>=3.0
pytest
99 changes: 99 additions & 0 deletions src/test/test_downstream_class_connectivity.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,105 @@ def test_empty_class_returns_empty_dataframe(self):
assert df.empty


class TestDownstreamClassConnectivityHierarchyRollup:
"""Regression tests for the partner-side hierarchy rollup behaviour:
connections to a child class also count toward each ancestor class within
the Neuron subtree, without double-counting under FBbt multi-inheritance.
"""

@pytest.fixture(scope='class')
def result(self):
return get_downstream_class_connectivity(
TEST_CLASS, return_dataframe=False, force_refresh=True,
)

@pytest.mark.integration
def test_parent_class_appears_with_sensible_counts(self, result):
"""A row keyed on a parent class should have connected_n at least as
large as any of its descendant rows (set-union semantics) and at most
the sum of descendant connected_n (no double-counting beyond what
multi-inheritance forces).
"""
from vfbquery.vfb_queries import vc, get_dict_cursor

rows = result["rows"]
ids = [r["id"] for r in rows]
assert ids, "Expected at least one row to test against"

# Find any (parent, child) pair among the row ids.
q = (
"MATCH (p:Class)<-[:SUBCLASSOF*1..]-(c:Class) "
"WHERE p.short_form IN %s AND c.short_form IN %s "
"RETURN p.short_form AS parent, c.short_form AS child LIMIT 1"
% (ids, ids)
)
pairs = get_dict_cursor()(vc.nc.commit_list([q]))
if not pairs:
pytest.skip("No parent/child pair among result rows for this class")

parent_id = pairs[0]["parent"]
child_id = pairs[0]["child"]
parent_row = next(r for r in rows if r["id"] == parent_id)
# Sum connected_n across all descendant rows (not just the one returned).
desc_q = (
"MATCH (p:Class {short_form: '%s'})<-[:SUBCLASSOF*1..]-(c:Class) "
"WHERE c.short_form IN %s "
"RETURN collect(DISTINCT c.short_form) AS descs"
% (parent_id, ids)
)
desc_rows = get_dict_cursor()(vc.nc.commit_list([desc_q]))
descendant_ids = desc_rows[0]["descs"] if desc_rows else [child_id]
descendant_rows = [r for r in rows if r["id"] in descendant_ids]
max_child = max(r["connected_n"] for r in descendant_rows)
sum_child = sum(r["connected_n"] for r in descendant_rows)
assert parent_row["connected_n"] >= max_child, (
f"Parent {parent_id} connected_n={parent_row['connected_n']} should "
f"be >= max descendant connected_n={max_child}"
)
assert parent_row["connected_n"] <= sum_child, (
f"Parent {parent_id} connected_n={parent_row['connected_n']} should "
f"be <= sum of descendant connected_n={sum_child}"
)

@pytest.mark.integration
def test_total_n_is_constant_across_rows(self, result):
"""`total_n` is the queried-side instance count and must be the same
for every output row (regression for the previous summed-across-
subclasses value).
"""
rows = result["rows"]
assert rows, "Expected at least one row"
total_ns = {r["total_n"] for r in rows}
assert len(total_ns) == 1, (
f"Expected total_n to be constant across rows, got: {total_ns}"
)
assert next(iter(total_ns)) > 0

@pytest.mark.integration
def test_no_rows_above_neuron_root(self, result):
"""The partner-side ancestor walk should stop at the Neuron class
(FBbt_00005106). No row id should be a class outside the Neuron
subtree.
"""
from vfbquery.vfb_queries import vc, get_dict_cursor, NEURON_ROOT_SHORT_FORM

ids = [r["id"] for r in result["rows"]]
assert ids, "Expected at least one row"
q = (
"MATCH (root:Class {short_form: '%s'})<-[:SUBCLASSOF*0..]-(c:Class) "
"WHERE c.short_form IN %s "
"RETURN collect(DISTINCT c.short_form) AS in_neuron"
% (NEURON_ROOT_SHORT_FORM, ids)
)
result_rows = get_dict_cursor()(vc.nc.commit_list([q]))
in_neuron = set(result_rows[0]["in_neuron"]) if result_rows else set()
offenders = [i for i in ids if i not in in_neuron]
assert not offenders, (
f"Found {len(offenders)} row(s) outside the Neuron subtree: "
f"{offenders[:5]}"
)


class TestDownstreamClassConnectivitySchema:
def test_schema_generation(self):
schema = DownstreamClassConnectivity_to_schema(
Expand Down
70 changes: 66 additions & 4 deletions src/test/test_query_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,15 @@
get_neuron_neuron_connectivity,
get_neuron_region_connectivity,
get_individual_neuron_inputs,
get_downstream_class_connectivity,
get_upstream_class_connectivity,
get_expression_overlaps_here,
get_anatomy_scrnaseq,
get_cluster_expression,
get_expression_cluster,
get_scrnaseq_dataset_data,
)
from vfbquery.vfb_connectivity import query_connectivity


class QueryPerformanceTest(unittest.TestCase):
Expand Down Expand Up @@ -348,7 +351,65 @@ def test_07_connectivity_queries(self):
)
print(f"NeuronRegionConnectivityQuery: {duration:.4f}s {'✅' if success else '❌'}")
self.assertLess(duration, self.THRESHOLD_SLOW, "NeuronRegionConnectivityQuery exceeded threshold")


# FBbt_00100234 = MBON01 — a specific mushroom body output neuron type
# with a small instance count (preferred over broad lineage classes for
# bounded test runtime). The class-level connectivity queries are a
# multi-step aggregation (Neo4j + batched Solr + ancestor walk), not a
# single Solr lookup, so cold-cache calls can take tens of seconds even
# on a small class.
CLASS_CONNECTIVITY_TEST_CLASS = "FBbt_00100234"

def test_07b_downstream_class_connectivity(self):
"""Test DownstreamClassConnectivity query (multi-step aggregation)"""
print("\n" + "="*80)
print("DOWNSTREAM CLASS CONNECTIVITY (multi-step aggregation)")
print("="*80)

result, duration, success = self._time_query(
"DownstreamClassConnectivity",
get_downstream_class_connectivity,
self.CLASS_CONNECTIVITY_TEST_CLASS,
return_dataframe=False,
)
print(f"DownstreamClassConnectivity: {duration:.4f}s {'✅' if success else '❌'}")
self.assertLess(duration, self.THRESHOLD_VERY_SLOW, "DownstreamClassConnectivity exceeded threshold")

def test_07b_upstream_class_connectivity(self):
"""Test UpstreamClassConnectivity query (multi-step aggregation)"""
print("\n" + "="*80)
print("UPSTREAM CLASS CONNECTIVITY (multi-step aggregation)")
print("="*80)

result, duration, success = self._time_query(
"UpstreamClassConnectivity",
get_upstream_class_connectivity,
self.CLASS_CONNECTIVITY_TEST_CLASS,
return_dataframe=False,
)
print(f"UpstreamClassConnectivity: {duration:.4f}s {'✅' if success else '❌'}")
self.assertLess(duration, self.THRESHOLD_VERY_SLOW, "UpstreamClassConnectivity exceeded threshold")

def test_07c_cross_dataset_connectivity(self):
"""Test cross-dataset query_connectivity (live, both-end filtered)"""
print("\n" + "="*80)
print("CROSS-DATASET CONNECTIVITY (live, slow)")
print("="*80)

# Both-end + group_by_class is the fastest variant per LLM guidance.
# giant fiber neuron → peripherally synapsing interneuron is a
# known-good pair with non-zero results.
result, duration, success = self._time_query(
"QueryConnectivity",
query_connectivity,
upstream_type="giant fiber neuron",
downstream_type="peripherally synapsing interneuron",
group_by_class=True,
)
print(f"QueryConnectivity: {duration:.4f}s {'✅' if success else '❌'}")
# Live cross-dataset query — allow up to 5 min per the MCP timeout.
self.assertLess(duration, 300.0, "QueryConnectivity exceeded threshold")

def test_08_similarity_queries(self):
"""Test NBLAST similarity queries"""
print("\n" + "="*80)
Expand All @@ -365,8 +426,8 @@ def test_08_similarity_queries(self):
limit=5
)
print(f"SimilarMorphologyTo: {duration:.4f}s {'✅' if success else '❌'}")
# Legacy NBLAST similarity can be slower
self.assertLess(duration, self.THRESHOLD_SLOW, "SimilarMorphologyTo exceeded threshold")
# Legacy NBLAST similarity is slow; observed ~18s on cold CI runners.
self.assertLess(duration, self.THRESHOLD_VERY_SLOW, "SimilarMorphologyTo exceeded threshold")

def test_09_neuron_input_queries(self):
"""Test neuron input/synapse queries"""
Expand Down Expand Up @@ -657,7 +718,8 @@ def test_13_dataset_template_queries(self):
if success and result:
count = result.get('count', 0)
print(f" └─ Found {count} aligned images" + (", returned 10" if count > 10 else ""))
self.assertLess(duration, self.THRESHOLD_MEDIUM, "AllAlignedImages exceeded threshold")
# Observed ~3.6s on CI cold cache; THRESHOLD_MEDIUM (3s) was too tight.
self.assertLess(duration, self.THRESHOLD_SLOW, "AllAlignedImages exceeded threshold")

# AlignedDatasets - All datasets aligned to template
# Warm up cache with full results
Expand Down
Loading
Loading