AllenNeuralDynamics · dbirman · Jun 1, 2026 · Jun 2, 2026
diff --git a/README.md b/README.md
@@ -55,9 +55,30 @@ project_names = unique_project_names()
 | `metadata_core` | Presence of core aind-data-schema metadata files per asset (True if file is not null) | `s3://allen-data-views/data-asset-cache/zs_metadata_core.pqt` | metadata | False | `_id`, `_last_modified`, `subject`, `data_description`, `procedures`, `instrument`, `acquisition`, `processing`, `quality_control` |
 | `foraging_sessions` | Foraging behavior sessions with key performance metrics, one row per session | `s3://allen-data-views/data-asset-cache/zs_foraging_sessions.pqt` | metadata | False | `subject_id`, `session_date`, `session`, `nwb_suffix`, `rig`, `trainer`, `task`, `curriculum_name`, `curriculum_version`, `current_stage_actual`, `foraging_eff`, `foraging_eff_random_seed`, `finished_trials`, `finished_rate`, `total_trials`, `bias_naive` |
 | `behavior_curriculum` | Behavior assets with curriculum name and stage, one row per behavior asset | `s3://allen-data-views/data-asset-cache/zs_behavior_curriculum.pqt` | asset | False | `asset_name`, `curriculum_name`, `stage_name`, `stage_node_id` |
+| `swdb_metadata` | Per-project metadata tables for SWDB datasets, one row per data asset (or per asset/stream for BCI) | `s3://allen-data-views/data-asset-cache/zs_swdb_metadata/` | metadata | True (by `dataset`) | See dataset-specific columns below |
 
 The `raw_to_derived` function is not a table stored in S3, instead it is used by passing an asset_name (or list of asset names) and a modality. The function returns the latest derived asset matching the requested pattern.
 
+#### swdb_metadata datasets
+
+`swdb_metadata` is parameterized by `dataset`. Available values:
+
+| Dataset | Project filter | Columns |
+| ------- | -------------- | ------- |
+| `v1dd` | `data_description.project_name = "V1 Deep Dive"` | `project_name`, `_id`, `name`, `subject_id`, `golden_mouse`, `genotype`, `date_of_birth`, `sex`, `modality`, `session_date`, `age`, `session_time`, `column`, `volume` |
+| `bci` | `acquisition.acquisition_type = "BCI single neuron stim"`, `data_level = derived`, `processing >= 2025-08-03` | `project_name`, `session_type`, `_id`, `name`, `subject_id`, `genotype`, `virus`, `date_of_birth`, `sex`, `modality`, `session_date`, `age`, `session_time`, `targeted_structure`, `ophys_fov`, `session_number` |
+| `dynamic_foraging` | `project_name = "Behavior Platform"`, `data_level = derived`, `acquisition >= 2025`, `quality_control.status` all `"Pass"` | `project_name`, `name`, `subject_id`, `genotype`, `date_of_birth`, `sex`, `modality`, `session_type`, `session_date`, `age`, `session_time`, `trials_total`, `trials_rewarded` |
+| `np_ultra` | `project_name = "NP Ultra and Psychedelics"`, `data_level = derived` | `project_name`, `_id`, `name`, `subject_id`, `genotype`, `date_of_birth`, `sex`, `modality`, `session_date`, `age`, `session_time`, `session_type`, `stimulus_types`, `notes` |
+
+```python
+from zombie_squirrel import swdb_metadata
+
+df = swdb_metadata("v1dd")
+df = swdb_metadata("bci")
+df = swdb_metadata("dynamic_foraging")
+df = swdb_metadata("np_ultra")
+```
+
 ### Custom acorn
 
 The `custom` function allows you to store and retrieve your own user-defined DataFrames in the cache by name. This requires write authentication to the active backend.

diff --git a/scripts/build_swdb_metadata.py b/scripts/build_swdb_metadata.py
@@ -0,0 +1,33 @@
+"""Build SWDB metadata tables and upload to S3.
+
+Usage:
+    python scripts/build_swdb_metadata.py [--dataset v1dd|bci|dynamic_foraging|np_ultra]
+"""
+
+import argparse
+import logging
+
+from zombie_squirrel.acorn_helpers.swdb_metadata import DATASETS
+from zombie_squirrel.acorns import ACORN_REGISTRY
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--dataset", choices=DATASETS, help="Build only this dataset")
+    args = parser.parse_args()
+
+    logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
+
+    swdb_metadata = ACORN_REGISTRY["swdb_metadata"]
+    targets = [args.dataset] if args.dataset else DATASETS
+
+    for dataset in targets:
+        logging.info(f"Building swdb_metadata/{dataset}...")
+        df = swdb_metadata(dataset=dataset, force_update=True)
+        logging.info(f"  Done: {len(df)} rows")
+
+    logging.info("Done.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/zombie_squirrel/__init__.py b/src/zombie_squirrel/__init__.py
@@ -27,4 +27,5 @@
 from zombie_squirrel.acorn_helpers.unique_subject_ids import (  # noqa: F401
     unique_subject_ids,
 )
+from zombie_squirrel.acorn_helpers.swdb_metadata import swdb_metadata  # noqa: F401
 from zombie_squirrel.utils import get_squirrel_info  # noqa: F401
diff --git a/src/zombie_squirrel/acorn_helpers/__init__.py b/src/zombie_squirrel/acorn_helpers/__init__.py
@@ -9,6 +9,7 @@
     qc,
     raw_to_derived,
     source_data,
+    swdb_metadata,
     unique_genotypes,
     unique_project_names,
     unique_subject_ids,