Skip to content

Commit 66365fc

Browse files
committed
keeping resources in bionemo-core when needed
1 parent 04cb703 commit 66365fc

3 files changed

Lines changed: 19 additions & 21 deletions

File tree

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
- tag: sample
2+
ngc: nvidia/clara/scdl_sample_test:1.0
3+
ngc_registry: resource
4+
pbss: "s3://bionemo-ci/test-data/scdl_sample_test.tar.gz"
5+
sha256: 7a4237537bf535dfa00301ce8cc7073e0a23d5bc8aa902ad65db9f51b57a6df9 # pragma: allowlist secret
6+
owner: Polina Binder <pbinder@nvidia.com>
7+
description: Sample test data for SCDL.
8+
9+
- tag: sample_scdl_feature_ids
10+
ngc: nvidia/clara/scdl_sample_test_feature_ids:1.0
11+
ngc_registry: resource
12+
pbss: s3://bionemo-ci/test-data/scdl_sample_test_feat_ids.tar.gz
13+
sha256: 9020ba336dbfe33bddadba26ca0cde49958cbd73c5ad44f0960a5a4837c9db26 # pragma: allowlist secret
14+
owner: Savitha Srinivasan <savithas@nvidia.com>
15+
description: Sample test data for SCDL with feature IDs appended.

sub-packages/bionemo-scdl/src/bionemo/scdl/data/load.py

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,6 @@ class Resource(pydantic.BaseModel):
7878
Should be in format [org/[team/]]name[:version]. If None, the resource is not available on NGC.
7979
"""
8080

81-
ngc_registry: Literal["model", "resource"] | None = None
82-
"""The NGC resource type (model or resource) for the data. Must be provided if ngc is not None."""
83-
8481
pbss: Annotated[pydantic.AnyUrl, pydantic.UrlConstraints(allowed_schemes=["s3"])]
8582
"""The PBSS (NVIDIA-internal) URL of the resource."""
8683

@@ -99,12 +96,6 @@ class Resource(pydantic.BaseModel):
9996
decompress: Literal[False, None] = None
10097
"""Whether the resource should be decompressed after download. If None, will defer to the file extension."""
10198

102-
@pydantic.model_validator(mode="after")
103-
def _validate_ngc_registry(self):
104-
if self.ngc and not self.ngc_registry:
105-
raise ValueError(f"ngc_registry must be provided if ngc is not None: {self.tag}")
106-
return self
107-
10899

109100
@functools.cache
110101
def get_all_resources(resource_path: Path | None = None) -> dict[str, Resource]:
@@ -216,17 +207,14 @@ class NGCDownloader:
216207
"""
217208

218209
filename: str
219-
ngc_registry: Literal["model", "resource"]
220210

221211
def __call__(self, url: str, output_file: str | Path, _: pooch.Pooch) -> None:
222212
"""Download a file from NGC."""
223213
client = default_ngc_client()
224214
nest_asyncio.apply()
225215

226-
download_fns = {
227-
"model": client.registry.model.download_version,
228-
"resource": client.registry.resource.download_version,
229-
}
216+
# SCDL only uses NGC resources, never models
217+
download_fn = client.registry.resource.download_version
230218

231219
output_file = Path(output_file)
232220
output_file.parent.mkdir(parents=True, exist_ok=True)
@@ -235,7 +223,7 @@ def __call__(self, url: str, output_file: str | Path, _: pooch.Pooch) -> None:
235223
ngc_dirname = Path(url).name.replace(":", "_v")
236224

237225
with tempfile.TemporaryDirectory(dir=output_file.parent) as temp_dir:
238-
download_fns[self.ngc_registry](url, temp_dir, file_patterns=[self.filename])
226+
download_fn(url, temp_dir, file_patterns=[self.filename])
239227
shutil.move(Path(temp_dir) / ngc_dirname / self.filename, output_file)
240228

241229

@@ -289,8 +277,7 @@ def load(
289277
url = resource.pbss
290278

291279
elif source == "ngc":
292-
assert resource.ngc_registry is not None
293-
download_fn = NGCDownloader(filename=filename, ngc_registry=resource.ngc_registry)
280+
download_fn = NGCDownloader(filename=filename)
294281
url = resource.ngc
295282

296283
else:

sub-packages/bionemo-scdl/src/bionemo/scdl/data/resources/scdl.yaml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,26 @@
11
- tag: sample
22
ngc: nvidia/clara/scdl_sample_test:1.0
3-
ngc_registry: resource
43
pbss: "s3://bionemo-ci/test-data/scdl_sample_test.tar.gz"
54
sha256: 7a4237537bf535dfa00301ce8cc7073e0a23d5bc8aa902ad65db9f51b57a6df9 # pragma: allowlist secret
65
owner: Polina Binder <pbinder@nvidia.com>
76
description: Sample test data for SCDL.
87

98
- tag: sample_scdl_feature_ids
109
ngc: nvidia/clara/scdl_sample_test_feature_ids:1.0
11-
ngc_registry: resource
1210
pbss: s3://bionemo-ci/test-data/scdl_sample_test_feat_ids.tar.gz
1311
sha256: 9020ba336dbfe33bddadba26ca0cde49958cbd73c5ad44f0960a5a4837c9db26 # pragma: allowlist secret
1412
owner: Savitha Srinivasan <savithas@nvidia.com>
1513
description: Sample test data for SCDL with feature IDs appended.
1614

1715
- tag: sample_scdl_neighbor
1816
ngc: nvidia/clara/scdl_neighbor_testdata:1.0
19-
ngc_registry: resource
2017
pbss: "s3://bionemo-ci/test-data/scdl_neighbor_test_20250616.tar.gz"
2118
sha256: f64a723e5a1d3223d7ad636c2b7601fe5927be47fb1a418a60687ef80eab83d0 # pragma: allowlist secret
2219
owner: Camir Ricketts <camirr@nvidia.com>
2320
description: Sample test data for SCDL with neighbors.
2421

2522
- tag: testdata-20240506
2623
ngc: nvidia/clara/singlecell-testdata:1.0
27-
ngc_registry: resource
2824
pbss: "s3://bionemo-ci/test-data/singlecell/singlecell-testdata-20240506.tar.gz"
2925
sha256: db24ba3858005680e343d0e4714c7c91fde6d738e2bf4018d489c0b1541544df # pragma: allowlist secret
3026
owner: John St John <jstjohn@nvidia.com>

0 commit comments

Comments
 (0)