Skip to content

Commit a59ce63

Browse files
author
Eric T. Dawson
committed
Precommit found lint errors so here are the formatted files.
Signed-off-by: Eric T. Dawson <edawson@nvidia.com>
1 parent 1282f93 commit a59ce63

2 files changed

Lines changed: 66 additions & 9 deletions

File tree

sub-packages/bionemo-scdl/src/bionemo/scdl/io/single_cell_memmap_dataset.py

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ class FileNames(str, Enum):
5454
NEIGHBOR_INDICES = "neighbor_indices.npy"
5555
NEIGHBOR_INDICES_PTR = "neighbor_indptr.npy"
5656
NEIGHBOR_VALUES = "neighbor_values.npy"
57+
HEADER = "header.sch"
5758

5859

5960
class Mode(str, Enum):
@@ -248,7 +249,6 @@ def __init__(
248249
"""
249250
self._version: str = importlib.metadata.version("bionemo.scdl")
250251
self.data_path: str = data_path
251-
self.header_file_name: str = "header.sch"
252252
self.header: SCDLHeader = None
253253
self.mode: Mode = mode
254254
self.paginated_load_cutoff = paginated_load_cutoff
@@ -309,6 +309,30 @@ def __init__(
309309
case _:
310310
raise ValueError("An np.memmap path, an h5ad path, or the number of elements and rows is required")
311311

312+
def _path_in_archive(self, filename: str | Path) -> str:
313+
"""Returns the full path to a file within the archive, joining self.data_path and the filename.
314+
315+
Args:
316+
filename: The filename or Path object to resolve within the archive.
317+
318+
Returns:
319+
The full path as a string.
320+
"""
321+
if isinstance(filename, Path):
322+
filename = str(filename)
323+
return os.path.join(self.data_path, filename)
324+
325+
@property
326+
def header_path(self) -> str:
327+
"""Returns the full path to the header file in the archive.
328+
329+
Example:
330+
>>> ds = SingleCellMemMapDataset(data_path="my_data")
331+
>>> ds.header_path
332+
'my_data/scdl_header.json'
333+
"""
334+
return self._path_in_archive(FileNames.HEADER.value)
335+
312336
def _init_neighbor_args(self, neighbor_key, neighbor_sampling_strategy, fallback_to_identity):
313337
# Neighbor tracking
314338
self._has_neighbors = False # Track if neighbor data was successfully loaded/found
@@ -686,7 +710,7 @@ def features(self) -> Optional[RowFeatureIndex]:
686710

687711
def _load_mmap_file_if_exists(self, file_path, dtype):
688712
if os.path.exists(file_path):
689-
return np.memmap(file_path, dtype=dtype, mode=self.mode)
713+
return np.memmap(file_path, dtype=dtype, mode=self.mode.value)
690714
else:
691715
raise FileNotFoundError(f"The mmap file at {file_path} is missing")
692716

@@ -708,15 +732,15 @@ def load(self, stored_path: str) -> None:
708732
)
709733
self.data_path = stored_path
710734
self.mode = Mode.READ_APPEND
711-
# self.header_path = Path(stored_path) / self.header_file_name
712735
# Load header if present; keep None if missing or unreadable
713-
if os.path.exists(self.data_path / self.header_file_name):
736+
if os.path.exists(self.header_path):
714737
try:
715-
self.header = SCDLHeader.load(str(self.data_path / self.header_file_name))
738+
self.header = SCDLHeader.load(str(self.header_path))
716739
except Exception as e:
717-
warnings.warn(f"Failed to load SCDL header at {Path(self.data_path) / self.header_file_name}: {e}")
740+
warnings.warn(f"Failed to load SCDL header at {self.header_path}: {e}")
718741
self.header = None
719742
else:
743+
warnings.warn(f"SCDL header missing at {self.header_path}; continuing without header.")
720744
self.header = None
721745

722746
# Metadata is required, so we must check if it exists and fail if not.
@@ -812,7 +836,12 @@ def regular_load_h5ad(
812836
self.row_index[0 : num_rows + 1] = count_data.indptr.astype(int)
813837

814838
vars = adata.var
815-
adata.file.close()
839+
file_handle = getattr(adata, "file", None)
840+
if file_handle is not None:
841+
try:
842+
file_handle.close()
843+
except Exception:
844+
pass
816845

817846
return vars, num_rows
818847

@@ -882,7 +911,12 @@ def paginated_load_h5ad(
882911
shape=(n_elements,),
883912
)
884913
vars = adata.var
885-
adata.file.close()
914+
file_handle = getattr(adata, "file", None)
915+
if file_handle is not None:
916+
try:
917+
file_handle.close()
918+
except Exception:
919+
pass
886920

887921
return vars, num_rows
888922

@@ -1024,7 +1058,7 @@ def _write_header(self):
10241058
indexes,
10251059
)
10261060
)
1027-
header.save(Path(self.data_path) / self.header_file_name)
1061+
header.save(self.header_path)
10281062

10291063
def save(self, output_path: Optional[str] = None) -> None:
10301064
"""Saves the class to a given output path.

sub-packages/bionemo-scdl/tests/bionemo/scdl/conftest.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,36 @@
1515

1616

1717
import shutil
18+
import time
19+
from importlib.metadata import PackageNotFoundError, version
1820
from pathlib import Path
1921

2022
import pytest
2123

2224
from bionemo.core.data.load import load
2325

2426

27+
@pytest.fixture(scope="session", autouse=True)
28+
def verify_bionemo_core_installed() -> None:
29+
"""Ensure bionemo-core is installed, print its version, and pause briefly.
30+
31+
Runs once before any tests. If the distribution is not installed, aborts the
32+
test session early with a clear message.
33+
"""
34+
try:
35+
core_version = version("bionemo-core")
36+
except PackageNotFoundError:
37+
pytest.exit(
38+
"bionemo-core is not installed. Please install it (e.g., `pip install -e sub-packages/bionemo-core`) before running tests.",
39+
returncode=1,
40+
)
41+
42+
print("=" * 72)
43+
print(f"BioNeMo Core (bionemo-core) version: {core_version}")
44+
print("=" * 72, flush=True)
45+
time.sleep(3)
46+
47+
2548
@pytest.fixture
2649
def test_directory() -> Path:
2750
"""Gets the path to the directory with test data.

0 commit comments

Comments
 (0)