Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/mypy_changed.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[mypy]
python_version = 3.11
ignore_missing_imports = True
follow_imports = skip
85 changes: 81 additions & 4 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v4
Expand All @@ -54,14 +56,89 @@ jobs:
python -m pip install --upgrade pip
pip install black flake8 isort mypy

- name: Collect changed Python files
id: changed-py
shell: bash
run: |
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
git fetch --no-tags --depth=1 origin "${{ github.base_ref }}"
files=$(git diff --name-only --diff-filter=ACMRT "origin/${{ github.base_ref }}...HEAD" -- 'connectomics/**/*.py')
else
if git rev-parse --verify HEAD~1 >/dev/null 2>&1; then
files=$(git diff --name-only --diff-filter=ACMRT HEAD~1..HEAD -- 'connectomics/**/*.py')
else
files=$(git ls-files 'connectomics/**/*.py')
fi
fi

echo "files<<EOF" >> "$GITHUB_OUTPUT"
echo "$files" >> "$GITHUB_OUTPUT"
echo "EOF" >> "$GITHUB_OUTPUT"

- name: Run black
run: black --check connectomics/
env:
CHANGED_FILES: ${{ steps.changed-py.outputs.files }}
run: |
python - <<'PY'
import os
import subprocess
import sys

files = [f for f in os.environ.get("CHANGED_FILES", "").splitlines() if f]
if not files:
print("No changed Python files under connectomics/. Skipping black.")
sys.exit(0)

subprocess.check_call(["black", "--check", *files])
PY

- name: Run flake8
run: flake8 connectomics/ --max-line-length=100
env:
CHANGED_FILES: ${{ steps.changed-py.outputs.files }}
run: |
python - <<'PY'
import os
import subprocess
import sys

files = [f for f in os.environ.get("CHANGED_FILES", "").splitlines() if f]
if not files:
print("No changed Python files under connectomics/. Skipping flake8.")
sys.exit(0)

subprocess.check_call(["flake8", "--max-line-length=100", *files])
PY

- name: Run isort
run: isort --check connectomics/
env:
CHANGED_FILES: ${{ steps.changed-py.outputs.files }}
run: |
python - <<'PY'
import os
import subprocess
import sys

files = [f for f in os.environ.get("CHANGED_FILES", "").splitlines() if f]
if not files:
print("No changed Python files under connectomics/. Skipping isort.")
sys.exit(0)

subprocess.check_call(["isort", "--check", *files])
PY

- name: Run mypy
run: mypy connectomics/ --ignore-missing-imports
env:
CHANGED_FILES: ${{ steps.changed-py.outputs.files }}
run: |
python - <<'PY'
import os
import subprocess
import sys

files = [f for f in os.environ.get("CHANGED_FILES", "").splitlines() if f]
if not files:
print("No changed Python files under connectomics/. Skipping mypy.")
sys.exit(0)

subprocess.check_call(["mypy", "--config-file", ".github/mypy_changed.ini", *files])
PY
46 changes: 25 additions & 21 deletions connectomics/data/dataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,40 +10,44 @@
.claude/INFERENCE_DESIGN.md for details.
"""

# Dataset factory functions (builder pattern)
from .build import (
create_connectomics_dataset,
create_tile_data_dicts_from_json,
create_tile_dataset,
create_volume_dataset,
)

# Shared data-dict helpers
from .data_dicts import (
create_data_dicts_from_paths,
create_volume_data_dicts,
)

# MONAI base datasets
from .dataset_base import (
MonaiConnectomicsDataset,
MonaiCachedConnectomicsDataset,
MonaiConnectomicsDataset,
MonaiPersistentConnectomicsDataset,
)

# Volume datasets
from .dataset_volume import (
MonaiVolumeDataset,
MonaiCachedVolumeDataset,
# Multi-dataset utilities
from .dataset_multi import (
StratifiedConcatDataset,
UniformConcatDataset,
WeightedConcatDataset,
)

# Tile datasets
from .dataset_tile import (
MonaiTileDataset,
MonaiCachedTileDataset,
MonaiTileDataset,
)

# Multi-dataset utilities
from .dataset_multi import (
WeightedConcatDataset,
StratifiedConcatDataset,
UniformConcatDataset,
)

# Dataset factory functions (builder pattern)
from .build import (
create_data_dicts_from_paths,
create_volume_data_dicts,
create_tile_data_dicts_from_json,
create_connectomics_dataset,
create_volume_dataset,
create_tile_dataset,
# Volume datasets
from .dataset_volume import (
MonaiCachedVolumeDataset,
MonaiVolumeDataset,
)

__all__ = [
Expand Down
87 changes: 10 additions & 77 deletions connectomics/data/dataset/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,26 @@

from __future__ import annotations

from typing import Any, Dict, List, Optional, Sequence, Tuple, TYPE_CHECKING, Union
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple, Union

from monai.transforms import Compose

from .data_dicts import (
create_data_dicts_from_paths,
create_volume_data_dicts,
)
from .dataset_base import (
MonaiConnectomicsDataset,
MonaiCachedConnectomicsDataset,
MonaiConnectomicsDataset,
MonaiPersistentConnectomicsDataset,
)
from .dataset_tile import (
MonaiTileDataset,
MonaiCachedTileDataset,
MonaiTileDataset,
)

if TYPE_CHECKING:
from .dataset_volume import MonaiVolumeDataset, MonaiCachedVolumeDataset
from .dataset_volume import MonaiCachedVolumeDataset, MonaiVolumeDataset


__all__ = [
Expand All @@ -40,78 +45,6 @@
]


# ============================================================================
# Data Dictionary Creation
# ============================================================================


def create_data_dicts_from_paths(
image_paths: List[str],
label_paths: Optional[List[str]] = None,
mask_paths: Optional[List[str]] = None,
) -> List[Dict[str, str]]:
"""
Create MONAI-style data dictionaries from file paths.

Args:
image_paths: List of image file paths
label_paths: Optional list of label file paths
mask_paths: Optional list of mask file paths

Returns:
List of dictionaries with 'image', 'label', and/or 'mask' keys

Examples:
>>> image_paths = ['img1.h5', 'img2.h5']
>>> label_paths = ['lbl1.h5', 'lbl2.h5']
>>> data_dicts = create_data_dicts_from_paths(image_paths, label_paths)
>>> # [{'image': 'img1.h5', 'label': 'lbl1.h5'}, ...]
"""
data_dicts = []

for i, image_path in enumerate(image_paths):
data_dict = {"image": image_path}

if label_paths is not None:
data_dict["label"] = label_paths[i]

if mask_paths is not None:
data_dict["mask"] = mask_paths[i]

data_dicts.append(data_dict)

return data_dicts


def create_volume_data_dicts(
image_paths: List[str],
label_paths: Optional[List[str]] = None,
mask_paths: Optional[List[str]] = None,
) -> List[Dict[str, str]]:
"""
Create MONAI data dictionaries for volume datasets.

This is a convenience wrapper around create_data_dicts_from_paths
for volume-specific use cases.

Args:
image_paths: List of image volume file paths
label_paths: Optional list of label volume file paths
mask_paths: Optional list of valid mask file paths

Returns:
List of MONAI-style data dictionaries

Examples:
>>> data_dicts = create_volume_data_dicts(['vol1.tif'], ['lbl1.tif'])
"""
return create_data_dicts_from_paths(
image_paths=image_paths,
label_paths=label_paths,
mask_paths=mask_paths,
)


def create_tile_data_dicts_from_json(
volume_json: str,
label_json: Optional[str] = None,
Expand Down Expand Up @@ -403,7 +336,7 @@ def create_volume_dataset(
... )
"""
# Lazy import to avoid circular dependency during module import
from .dataset_volume import MonaiVolumeDataset, MonaiCachedVolumeDataset
from .dataset_volume import MonaiCachedVolumeDataset, MonaiVolumeDataset

if dataset_type == "cached":
return MonaiCachedVolumeDataset(
Expand Down
60 changes: 60 additions & 0 deletions connectomics/data/dataset/data_dicts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""Shared helpers for constructing MONAI-style dataset dictionaries."""

from __future__ import annotations

from typing import Dict, List, Optional

__all__ = [
"create_data_dicts_from_paths",
"create_volume_data_dicts",
]


def create_data_dicts_from_paths(
image_paths: List[str],
label_paths: Optional[List[str]] = None,
mask_paths: Optional[List[str]] = None,
) -> List[Dict[str, object]]:
"""
Create MONAI-style data dictionaries from file paths.

Args:
image_paths: List of image file paths
label_paths: Optional list of label file paths
mask_paths: Optional list of mask file paths

Returns:
List of dictionaries with 'image', 'label', and/or 'mask' keys
"""
data_dicts: List[Dict[str, object]] = []

for i, image_path in enumerate(image_paths):
data_dict: Dict[str, object] = {"image": image_path}

if label_paths is not None:
data_dict["label"] = label_paths[i]

if mask_paths is not None:
data_dict["mask"] = mask_paths[i]

data_dicts.append(data_dict)

return data_dicts


def create_volume_data_dicts(
image_paths: List[str],
label_paths: Optional[List[str]] = None,
mask_paths: Optional[List[str]] = None,
) -> List[Dict[str, object]]:
"""
Create MONAI data dictionaries for volume datasets.

This is a convenience wrapper around ``create_data_dicts_from_paths``
for volume-specific use cases.
"""
return create_data_dicts_from_paths(
image_paths=image_paths,
label_paths=label_paths,
mask_paths=mask_paths,
)
7 changes: 4 additions & 3 deletions connectomics/data/dataset/dataset_volume.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@
"""

from __future__ import annotations

from typing import List, Optional, Tuple

from monai.data import CacheDataset
from monai.transforms import Compose, RandSpatialCropd, CenterSpatialCropd
from monai.transforms import CenterSpatialCropd, Compose, RandSpatialCropd
from monai.utils import ensure_tuple_rep

from .dataset_base import MonaiConnectomicsDataset
from .build import create_data_dicts_from_paths
from ..io.monai_transforms import LoadVolumed
from .data_dicts import create_data_dicts_from_paths
from .dataset_base import MonaiConnectomicsDataset


class MonaiVolumeDataset(MonaiConnectomicsDataset):
Expand Down
Loading