Skip to content

Commit f5e0d2b

Browse files
committed
addressing all code rabbot
1 parent 4926ad9 commit f5e0d2b

2 files changed

Lines changed: 52 additions & 9 deletions

File tree

sub-packages/bionemo-scdl/pyproject.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,18 @@ test = [
2929
[project.scripts]
3030
convert_h5ad_to_scdl = "bionemo.scdl.scripts.convert_h5ad_to_scdl:main"
3131

32+
[tool.setuptools]
33+
include-package-data = true
34+
3235
[tool.setuptools.packages.find]
3336
where = ["src"]
3437
include = ["bionemo.*"]
3538
namespaces = true
3639
exclude = ["test*."]
3740

41+
[tool.setuptools.package-data]
42+
"bionemo.scdl.data.resources" = ["*.yaml", "*.yml"]
43+
3844
[tool.setuptools.dynamic]
3945
version = { file = "VERSION" }
4046

sub-packages/bionemo-scdl/src/bionemo/scdl/data/load.py

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import tempfile
2323
from collections import Counter
2424
from dataclasses import dataclass
25+
from importlib import resources
2526
from pathlib import Path
2627
from typing import TYPE_CHECKING, Annotated, Any, Literal, Sequence
2728

@@ -144,10 +145,16 @@ class Resource:
144145
def get_all_resources(resource_path: Path | None = None) -> dict[str, Resource]:
145146
"""Return a dictionary of all resources."""
146147
if not resource_path:
147-
# Default to the local scdl resources directory
148-
resource_path = Path(__file__).parent / "resources"
149-
150-
resources_files = itertools.chain(resource_path.glob("*.yaml"), resource_path.glob("*.yml"))
148+
# Use importlib.resources to access bundled package resources
149+
try:
150+
resource_files = resources.files("bionemo.scdl.data.resources")
151+
resources_files = [f for f in resource_files.iterdir() if f.is_file() and f.suffix in {".yaml", ".yml"}]
152+
except (ImportError, FileNotFoundError):
153+
# Fallback to local directory for development/testing
154+
resource_path = Path(__file__).parent / "resources"
155+
resources_files = itertools.chain(resource_path.glob("*.yaml"), resource_path.glob("*.yml"))
156+
else:
157+
resources_files = itertools.chain(resource_path.glob("*.yaml"), resource_path.glob("*.yml"))
151158

152159
all_resources = [resource for file in resources_files for resource in _parse_resource_file(file)]
153160

@@ -169,11 +176,41 @@ def get_all_resources(resource_path: Path | None = None) -> dict[str, Resource]:
169176

170177

171178
def _parse_resource_file(file) -> list[dict[str, Any]]:
172-
with file.open("r") as f:
173-
resources = yaml.safe_load(f)
174-
for resource in resources:
175-
resource["tag"] = f"{file.stem}/{resource['tag']}"
176-
return resources
179+
# Handle both Path objects and importlib.resources Traversable objects
180+
if hasattr(file, "read_text"):
181+
# importlib.resources Traversable
182+
content = file.read_text(encoding="utf-8")
183+
filename = file.name
184+
else:
185+
# Regular Path object
186+
with file.open("r") as f:
187+
content = f.read()
188+
filename = file.name
189+
190+
# Parse YAML content
191+
resources = yaml.safe_load(content)
192+
193+
# Validate YAML content
194+
if resources is None:
195+
raise ValueError(f"Empty YAML file: {filename}")
196+
197+
if not isinstance(resources, list):
198+
raise TypeError(f"Expected list in YAML file {filename}, got {type(resources).__name__}")
199+
200+
# Validate each resource entry
201+
for i, resource in enumerate(resources):
202+
if not isinstance(resource, dict):
203+
raise ValueError(f"Resource at index {i} in {filename} is not a dict: {resource}")
204+
205+
if "tag" not in resource:
206+
raise ValueError(f"Resource at index {i} in {filename} missing required 'tag' key: {resource}")
207+
208+
# Update tags with file stem prefix
209+
stem = Path(filename).stem
210+
for resource in resources:
211+
resource["tag"] = f"{stem}/{resource['tag']}"
212+
213+
return resources
177214

178215

179216
__all__: Sequence[str] = (

0 commit comments

Comments
 (0)