Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ project_names = unique_project_names()
| `source_data` | Mapping from derived asset names to their source raw asset names | `s3://allen-data-views/data-asset-cache/zs_source_data.pqt` | metadata | False | `name`, `source_data`, `pipeline_name`, `processing_time` |
| `quality_control` | Quality control table with one row per QC metric | `s3://allen-data-views/data-asset-cache/zs_qc/` | asset | True (by `subject_id`) | `name`, `stage`, `modality`, `value`, `status`, `asset_name` |
| `assets_smartspim` | SmartSPIM assets with processing status and neuroglancer links | `s3://allen-data-views/data-asset-cache/zs_assets_smartspim.pqt` | metadata | False | `subject_id`, `genotype`, `institution`, `acquisition_start_time`, `processing_end_time`, `stitched_link`, `processed`, `name`, `channel_1`, `segmentation_link_1`, `quantification_link_1`, `channel_2`, `segmentation_link_2`, `quantification_link_2`, `channel_3`, `segmentation_link_3`, `quantification_link_3` |
| `procedures` | Subject procedures summary, one row per procedure per surgery | `s3://allen-data-views/data-asset-cache/zs_procedures.pqt` | asset | False | `procedure_key`, `subject_id`, `surgery_start_date`, `procedure_type` |
| `brain_injections` | Detailed Injection and BrainInjection data, one row per injection | `s3://allen-data-views/data-asset-cache/zs_brain_injections.pqt` | asset | False | `procedure_key`, `subject_id`, `surgery_start_date`, `procedure_type`, `targeted_structure_name`, `targeted_structure_acronym`, `relative_position`, `coordinate_system_name`, `<axis_name>`, `injection_materials`, `injection_profile`, `injection_volume`, `injection_volume_unit`, `protocol_id` |

The `raw_to_derived` function is not a table stored in S3, instead it is used by passing an asset_name (or list of asset names) and a modality. The function returns the latest derived asset matching the requested pattern.

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies = [
'pyarrow',
'boto3',
'pandas>=2.2.0',
'aind-data-access-api[docdb]',
'aind-data-access-api[docdb]>=1.10.0,<2',
]

[dependency-groups]
Expand Down
14 changes: 14 additions & 0 deletions scripts/hide_procedures_acorn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""Run the procedures and brain_injections hide_acorn for all subjects in one pass."""

from zombie_squirrel.acorns import ACORN_REGISTRY, NAMES


def main():
"""Hide procedures and brain_injections acorns for all subjects."""
print("Fetching procedures data for all subjects...")
ACORN_REGISTRY[NAMES["procedures"]](force_update=True)
print("Procedures cache update complete.")


if __name__ == "__main__":
main()
118 changes: 118 additions & 0 deletions scripts/test_procedures_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""Integration tests for procedures and brain_injections acorns against S3."""

import unittest

import boto3
import pandas as pd

from zombie_squirrel.acorns import NAMES

BUCKET = "allen-data-views"
PROCEDURES_KEY = f"data-asset-cache/zs_{NAMES['procedures']}.pqt"
INJECTIONS_KEY = f"data-asset-cache/zs_{NAMES['injections']}.pqt"
TEST_SUBJECT = "813992"


def _s3_key_exists(key: str) -> bool:
s3 = boto3.client("s3")
try:
s3.head_object(Bucket=BUCKET, Key=key)
return True
except s3.exceptions.ClientError:
return False


class TestProceduresS3(unittest.TestCase):
"""Integration tests for the procedures acorn on S3."""

def test_file_exists(self):
self.assertTrue(_s3_key_exists(PROCEDURES_KEY), f"No procedures file found at s3://{BUCKET}/{PROCEDURES_KEY}")

def test_has_expected_columns(self):
import os

os.environ["FOREST_TYPE"] = "s3"
from zombie_squirrel.acorns import ACORN_REGISTRY

df = ACORN_REGISTRY[NAMES["procedures"]](force_update=False)
self.assertIsInstance(df, pd.DataFrame)
self.assertFalse(df.empty)
for col in ("procedure_key", "subject_id", "surgery_start_date", "procedure_type"):
self.assertIn(col, df.columns, f"Missing column: {col}")

def test_procedure_keys_are_unique(self):
import os

os.environ["FOREST_TYPE"] = "s3"
from zombie_squirrel.acorns import ACORN_REGISTRY

df = ACORN_REGISTRY[NAMES["procedures"]](force_update=False)
self.assertEqual(df["procedure_key"].nunique(), len(df), "procedure_key values are not unique")

def test_contains_test_subject(self):
import os

os.environ["FOREST_TYPE"] = "s3"
from zombie_squirrel.acorns import ACORN_REGISTRY

df = ACORN_REGISTRY[NAMES["procedures"]](force_update=False)
self.assertIn(TEST_SUBJECT, df["subject_id"].values, f"Subject {TEST_SUBJECT} not found in procedures table")


class TestBrainInjectionsS3(unittest.TestCase):
"""Integration tests for the brain_injections acorn on S3."""

def test_file_exists(self):
self.assertTrue(
_s3_key_exists(INJECTIONS_KEY), f"No brain_injections file found at s3://{BUCKET}/{INJECTIONS_KEY}"
)

def test_has_expected_columns(self):
import os

os.environ["FOREST_TYPE"] = "s3"
from zombie_squirrel.acorns import ACORN_REGISTRY

df = ACORN_REGISTRY[NAMES["injections"]](force_update=False)
self.assertIsInstance(df, pd.DataFrame)
self.assertFalse(df.empty)
for col in (
"procedure_key",
"subject_id",
"surgery_start_date",
"procedure_type",
"targeted_structure_acronym",
"injection_profile",
"injection_volume",
"injection_volume_unit",
):
self.assertIn(col, df.columns, f"Missing column: {col}")

def test_procedure_keys_join_to_procedures_table(self):
"""Every procedure_key in brain_injections must appear in procedures."""
import os

os.environ["FOREST_TYPE"] = "s3"
from zombie_squirrel.acorns import ACORN_REGISTRY

proc_df = ACORN_REGISTRY[NAMES["procedures"]](force_update=False)
inj_df = ACORN_REGISTRY[NAMES["injections"]](force_update=False)

orphans = set(inj_df["procedure_key"]) - set(proc_df["procedure_key"])
self.assertEqual(orphans, set(), f"brain_injections has procedure_keys not in procedures: {orphans}")

def test_contains_brain_injections(self):
import os

os.environ["FOREST_TYPE"] = "s3"
from zombie_squirrel.acorns import ACORN_REGISTRY

df = ACORN_REGISTRY[NAMES["injections"]](force_update=False)
self.assertTrue(
(df["procedure_type"] == "Brain injection").any(),
"No 'Brain injection' rows found in brain_injections table",
)


if __name__ == "__main__":
unittest.main()
1 change: 1 addition & 0 deletions src/zombie_squirrel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from zombie_squirrel.acorn_helpers.asset_basics import asset_basics # noqa: F401
from zombie_squirrel.acorn_helpers.assets_smartspim import assets_smartspim # noqa: F401
from zombie_squirrel.acorn_helpers.custom import custom # noqa: F401
from zombie_squirrel.acorn_helpers.procedures import brain_injections, procedures # noqa: F401
from zombie_squirrel.acorn_helpers.qc import qc, qc_columns # noqa: F401
from zombie_squirrel.acorn_helpers.raw_to_derived import raw_to_derived # noqa: F401
from zombie_squirrel.acorn_helpers.source_data import source_data # noqa: F401
Expand Down
1 change: 1 addition & 0 deletions src/zombie_squirrel/acorn_helpers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from zombie_squirrel.acorn_helpers import ( # noqa: F401
asset_basics,
custom,
procedures,
qc,
raw_to_derived,
source_data,
Expand Down
8 changes: 4 additions & 4 deletions src/zombie_squirrel/acorn_helpers/assets_smartspim.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,9 @@ def _build_rows(raw_to_stitched: dict[str, str | None], metadata: dict[str, dict
channel = channels[i - 1] if i <= len(channels) else None
row[f"channel_{i}"] = channel
row[f"segmentation_link_{i}"] = _segmentation_link(location, channel) if (processed and channel) else None
row[f"quantification_link_{i}"] = _quantification_link(location, channel) if (processed and channel) else None
row[f"quantification_link_{i}"] = (
_quantification_link(location, channel) if (processed and channel) else None
)
rows.append(row)
return rows

Expand Down Expand Up @@ -157,9 +159,7 @@ def assets_smartspim(force_update: bool = False) -> pd.DataFrame:
)

basics = asset_basics()
raw_spim = basics[
(basics["data_level"] == "raw") & (basics["modalities"].str.contains("SPIM", na=False))
]
raw_spim = basics[(basics["data_level"] == "raw") & (basics["modalities"].str.contains("SPIM", na=False))]
raw_spim_names = list(raw_spim["name"].dropna())

sd = source_data()
Expand Down
Loading
Loading