diff --git a/bionemo-recipes/recipes/opengenome2_llama_native_te/DATASET.md b/bionemo-recipes/recipes/opengenome2_llama_native_te/DATASET.md index 18a49838d4..30bf5b813a 100644 --- a/bionemo-recipes/recipes/opengenome2_llama_native_te/DATASET.md +++ b/bionemo-recipes/recipes/opengenome2_llama_native_te/DATASET.md @@ -162,6 +162,7 @@ TO 'output' (FORMAT PARQUET, PER_THREAD_OUTPUT true, FILE_SIZE_BYTES '200MB'); ```yaml dataset: load_dataset_kwargs: + data_files: null path: "/path/to/your/resharded_parquet_dir" split: "train" streaming: true