Skip to content

Commit 3059a27

Browse files
committed
Modify output path to include dp rank
Signed-off-by: John St John <jstjohn@nvidia.com>
1 parent bf50543 commit 3059a27

10 files changed

Lines changed: 37 additions & 34 deletions

File tree

sub-packages/bionemo-amplify/tests/bionemo/amplify/test_infer_amplify.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def test_infer_epoch_mode(
104104

105105
# Load and verify results
106106
results: Dict[str, torch.Tensor] = {}
107-
results = cast(Dict[str, torch.Tensor], torch.load(f"{result_dir}/predictions__rank_0.pt"))
107+
results = cast(Dict[str, torch.Tensor], torch.load(f"{result_dir}/predictions__rank_0__dp_rank_0.pt"))
108108

109109
assert isinstance(results, dict)
110110
keys_included = ["token_logits", "hidden_states", "embeddings", "input_ids"]

sub-packages/bionemo-esm2/examples/finetune.ipynb

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,7 @@
648648
},
649649
{
650650
"cell_type": "code",
651-
"execution_count": 11,
651+
"execution_count": null,
652652
"metadata": {},
653653
"outputs": [
654654
{
@@ -667,7 +667,7 @@
667667
"import torch\n",
668668
"\n",
669669
"\n",
670-
"results = torch.load(f\"{regression_results_path}/predictions__rank_0.pt\")\n",
670+
"results = torch.load(f\"{regression_results_path}/predictions__rank_0__dp_rank_0.pt\")\n",
671671
"\n",
672672
"for key, val in results.items():\n",
673673
" if val is not None:\n",
@@ -833,7 +833,7 @@
833833
},
834834
{
835835
"cell_type": "code",
836-
"execution_count": 19,
836+
"execution_count": null,
837837
"metadata": {},
838838
"outputs": [
839839
{
@@ -853,7 +853,7 @@
853853
"import torch\n",
854854
"\n",
855855
"\n",
856-
"results = torch.load(f\"{sequence_classification_results_path}/predictions__rank_0.pt\")\n",
856+
"results = torch.load(f\"{sequence_classification_results_path}/predictions__rank_0__dp_rank_0.pt\")\n",
857857
"\n",
858858
"for key, val in results.items():\n",
859859
" if val is not None:\n",
@@ -1044,7 +1044,7 @@
10441044
},
10451045
{
10461046
"cell_type": "code",
1047-
"execution_count": 27,
1047+
"execution_count": null,
10481048
"metadata": {},
10491049
"outputs": [
10501050
{
@@ -1062,7 +1062,7 @@
10621062
"import torch\n",
10631063
"\n",
10641064
"\n",
1065-
"results = torch.load(f\"{token_classification_results_path}/predictions__rank_0.pt\")\n",
1065+
"results = torch.load(f\"{token_classification_results_path}/predictions__rank_0__dp_rank_0.pt\")\n",
10661066
"\n",
10671067
"for key, val in results.items():\n",
10681068
" if val is not None:\n",

sub-packages/bionemo-esm2/examples/inference.ipynb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -357,12 +357,12 @@
357357
"cell_type": "markdown",
358358
"metadata": {},
359359
"source": [
360-
"Inference predictions are stored into `.pt` files for each device. Since we only used one device to run the inference (`--num-gpus 1`) in the previous step, the results were written to `{work_dir}/predictions__rank_0.pt` under the work directory of this notebook (defined above). The `.pt` file containes a dictionary of `{'result_key': torch.Tensor}` that be loaded with PyTorch:"
360+
"Inference predictions are stored into `.pt` files for each device. Since we only used one device to run the inference (`--num-gpus 1`) in the previous step, the results were written to `{work_dir}/predictions__rank_0__dp_rank_0.pt` under the work directory of this notebook (defined above). The `.pt` file containes a dictionary of `{'result_key': torch.Tensor}` that be loaded with PyTorch:"
361361
]
362362
},
363363
{
364364
"cell_type": "code",
365-
"execution_count": 54,
365+
"execution_count": null,
366366
"metadata": {},
367367
"outputs": [
368368
{
@@ -377,7 +377,7 @@
377377
}
378378
],
379379
"source": [
380-
"results = torch.load(f\"{work_dir}/predictions__rank_0.pt\")\n",
380+
"results = torch.load(f\"{work_dir}/predictions__rank_0__dp_rank_0.pt\")\n",
381381
"\n",
382382
"for key, val in results.items():\n",
383383
" if val is not None:\n",
@@ -609,7 +609,7 @@
609609
},
610610
{
611611
"cell_type": "code",
612-
"execution_count": 64,
612+
"execution_count": null,
613613
"metadata": {},
614614
"outputs": [
615615
{
@@ -625,7 +625,7 @@
625625
}
626626
],
627627
"source": [
628-
"results = torch.load(f\"{work_dir}/predictions__rank_0.pt\")\n",
628+
"results = torch.load(f\"{work_dir}/predictions__rank_0__dp_rank_0.pt\")\n",
629629
"\n",
630630
"for key, val in results.items():\n",
631631
" if val is not None:\n",

sub-packages/bionemo-esm2/examples/mutant-design.ipynb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -400,12 +400,12 @@
400400
"id": "67d09581-e784-4ccc-be88-194c8909068c",
401401
"metadata": {},
402402
"source": [
403-
"This will write the output of ESM-2 inference into a python dictionary and save that into `predictions__rank_0.pt` which can be loaded via PyTorch. DDP inference is supported in BioNeMo Framework and can be utilized by setting `--num-gpus n` to use `n` devices. The output predictions are then written to n distinct files `predictions__rank_<0...n-1>.pt`. Please refer to [ESM-2 Inference Tutorial](./inference.ipynb) for more information regarding the DDP support and how to interpret the prediction outputs."
403+
"This will write the output of ESM-2 inference into a python dictionary and save that into `predictions__rank_0__dp_rank_0.pt` which can be loaded via PyTorch. DDP inference is supported in BioNeMo Framework and can be utilized by setting `--num-gpus n` to use `n` devices. The output predictions are then written to n distinct files `predictions__rank_<0...n-1>.pt`. Please refer to [ESM-2 Inference Tutorial](./inference.ipynb) for more information regarding the DDP support and how to interpret the prediction outputs."
404404
]
405405
},
406406
{
407407
"cell_type": "code",
408-
"execution_count": 9,
408+
"execution_count": null,
409409
"id": "2b48c5a7",
410410
"metadata": {},
411411
"outputs": [
@@ -421,7 +421,7 @@
421421
}
422422
],
423423
"source": [
424-
"results = torch.load(f\"{example_dir}/predictions__rank_0.pt\")\n",
424+
"results = torch.load(f\"{example_dir}/predictions__rank_0__dp_rank_0.pt\")\n",
425425
"\n",
426426
"for key, val in results.items():\n",
427427
" if val is not None:\n",
@@ -749,7 +749,7 @@
749749
},
750750
{
751751
"cell_type": "code",
752-
"execution_count": 18,
752+
"execution_count": null,
753753
"id": "8ec1e825",
754754
"metadata": {},
755755
"outputs": [
@@ -762,7 +762,7 @@
762762
}
763763
],
764764
"source": [
765-
"results = torch.load(f\"{work_dir}/predictions__rank_0.pt\")\n",
765+
"results = torch.load(f\"{work_dir}/predictions__rank_0__dp_rank_0.pt\")\n",
766766
"\n",
767767
"# cast to FP32 since BFloat16 is an unsupported ScalarType in numpy\n",
768768
"logits = results[\"token_logits\"].transpose(0, 1).to(dtype=torch.float32) # s, b, h -> b, s, h\n",

sub-packages/bionemo-esm2/tests/bionemo/esm2/scripts/test_esm2_lora.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,20 +115,20 @@ def test_different_results_with_peft(
115115
)
116116

117117
if prediction_interval == "epoch":
118-
results_original = torch.load(f"{result_dir_original}/predictions__rank_0.pt")
119-
results_peft = torch.load(f"{result_dir_peft}/predictions__rank_0.pt")
118+
results_original = torch.load(f"{result_dir_original}/predictions__rank_0__dp_rank_0.pt")
119+
results_peft = torch.load(f"{result_dir_peft}/predictions__rank_0__dp_rank_0.pt")
120120

121121
elif prediction_interval == "batch":
122122
results_original = batch_collator(
123123
[
124124
torch.load(f, map_location="cpu")
125-
for f in glob.glob(f"{result_dir_original}/predictions__rank_0__batch_*.pt")
125+
for f in glob.glob(f"{result_dir_original}/predictions__rank_0__dp_rank_0__batch_*.pt")
126126
]
127127
)
128128
results_peft = batch_collator(
129129
[
130130
torch.load(f, map_location="cpu")
131-
for f in glob.glob(f"{result_dir_peft}/predictions__rank_0__batch_*.pt")
131+
for f in glob.glob(f"{result_dir_peft}/predictions__rank_0__dp_rank_0__batch_*.pt")
132132
]
133133
)
134134
assert (results_original["embeddings"] != results_peft["embeddings"]).any()

sub-packages/bionemo-esm2/tests/bionemo/esm2/scripts/test_infer_esm2.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,13 @@ def test_infer_without_finetune_head(
9999
assert result_dir.exists(), "Could not find test results directory."
100100

101101
if prediction_interval == "epoch":
102-
results = torch.load(f"{result_dir}/predictions__rank_0.pt")
102+
results = torch.load(f"{result_dir}/predictions__rank_0__dp_rank_0.pt")
103103
elif prediction_interval == "batch":
104104
results = batch_collator(
105-
[torch.load(f, map_location="cpu") for f in glob.glob(f"{result_dir}/predictions__rank_0__batch_*.pt")]
105+
[
106+
torch.load(f, map_location="cpu")
107+
for f in glob.glob(f"{result_dir}/predictions__rank_0__dp_rank_0__batch_*.pt")
108+
]
106109
)
107110
assert isinstance(results, dict)
108111
keys_included = ["token_logits", "hidden_states", "embeddings", "binary_logits", "input_ids"]

sub-packages/bionemo-geneformer/examples/geneformer-celltype-classification.ipynb

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -926,35 +926,35 @@
926926
},
927927
{
928928
"cell_type": "code",
929-
"execution_count": 12,
929+
"execution_count": null,
930930
"metadata": {},
931931
"outputs": [],
932932
"source": [
933933
"import torch\n",
934934
"\n",
935935
"\n",
936-
"infer_Xs_10m = torch.load(result_path_10m / \"predictions__rank_0.pt\")[\"embeddings\"].float().cpu().numpy()\n",
936+
"infer_Xs_10m = torch.load(result_path_10m / \"predictions__rank_0__dp_rank_0.pt\")[\"embeddings\"].float().cpu().numpy()\n",
937937
"assert len(adata) == len(infer_Xs_10m), (len(adata), len(infer_Xs_10m))"
938938
]
939939
},
940940
{
941941
"cell_type": "code",
942-
"execution_count": 13,
942+
"execution_count": null,
943943
"metadata": {},
944944
"outputs": [],
945945
"source": [
946-
"infer_Xs_106m = torch.load(result_path_106m / \"predictions__rank_0.pt\")[\"embeddings\"].float().cpu().numpy()\n",
946+
"infer_Xs_106m = torch.load(result_path_106m / \"predictions__rank_0__dp_rank_0.pt\")[\"embeddings\"].float().cpu().numpy()\n",
947947
"assert len(adata) == len(infer_Xs_106m), (len(adata), len(infer_Xs_106m))"
948948
]
949949
},
950950
{
951951
"cell_type": "code",
952-
"execution_count": 14,
952+
"execution_count": null,
953953
"metadata": {},
954954
"outputs": [],
955955
"source": [
956956
"infer_Xs_10m_random = (\n",
957-
" torch.load(results_path_10m_random / \"predictions__rank_0.pt\")[\"embeddings\"].float().cpu().numpy()\n",
957+
" torch.load(results_path_10m_random / \"predictions__rank_0__dp_rank_0.pt\")[\"embeddings\"].float().cpu().numpy()\n",
958958
")\n",
959959
"assert len(adata) == len(infer_Xs_10m_random), (len(adata), len(infer_Xs_10m_random))"
960960
]

sub-packages/bionemo-geneformer/examples/geneformer-gene-embedding-GRN.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@
441441
},
442442
{
443443
"cell_type": "code",
444-
"execution_count": 29,
444+
"execution_count": null,
445445
"metadata": {},
446446
"outputs": [
447447
{
@@ -456,7 +456,7 @@
456456
"import torch\n",
457457
"\n",
458458
"\n",
459-
"predictions = torch.load(result_path_10m / \"predictions__rank_0.pt\", weights_only=False)\n",
459+
"predictions = torch.load(result_path_10m / \"predictions__rank_0__dp_rank_0.pt\", weights_only=False)\n",
460460
"\n",
461461
"print(predictions.keys())"
462462
]

sub-packages/bionemo-geneformer/examples/geneformer_cellxgene_tutorial.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1345,7 +1345,7 @@
13451345
},
13461346
{
13471347
"cell_type": "code",
1348-
"execution_count": 14,
1348+
"execution_count": null,
13491349
"id": "c64ebc1a",
13501350
"metadata": {},
13511351
"outputs": [
@@ -1363,7 +1363,7 @@
13631363
],
13641364
"source": [
13651365
"!ls -altrh {tutorial_output_dir}/\n",
1366-
"tutorial_output_inference_pickle = f\"{tutorial_output_dir}/predictions__rank_0.pt\"\n",
1366+
"tutorial_output_inference_pickle = f\"{tutorial_output_dir}/predictions__rank_0__dp_rank_0.pt\"\n",
13671367
"!ls -altrh {tutorial_output_inference_pickle}"
13681368
]
13691369
},

sub-packages/bionemo-geneformer/src/bionemo/geneformer/scripts/celltype_classification_bench/bench.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def load_data_run_benchmark(result_path, adata_path, write_results=True):
154154

155155
adata = read_h5ad(adata_path)
156156

157-
infer_Xs = torch.load(result_path / "predictions__rank_0.pt")["embeddings"].float().cpu().numpy()
157+
infer_Xs = torch.load(result_path / "predictions__rank_0__dp_rank_0.pt")["embeddings"].float().cpu().numpy()
158158
assert len(adata) == len(infer_Xs), (len(adata), len(infer_Xs))
159159

160160
infer_metadata = adata.obs

0 commit comments

Comments
 (0)