From 3ac8f4e0feddba93692e08d3d214b0382d2a9c02 Mon Sep 17 00:00:00 2001 From: SuryanshT211 Date: Sun, 22 Feb 2026 14:22:51 -0500 Subject: [PATCH] Improve error message for deprecated dataset scripts with migration guidance --- src/datasets/load.py | 60 +++++++++++++++++++++++++++++++++++++++++--- tests/test_load.py | 16 ++++++++++++ 2 files changed, 73 insertions(+), 3 deletions(-) diff --git a/src/datasets/load.py b/src/datasets/load.py index 12aeae8b6af..9c769a8f071 100644 --- a/src/datasets/load.py +++ b/src/datasets/load.py @@ -912,9 +912,45 @@ def dataset_module_factory( ).get_module() # Try locally elif path.endswith(filename): - raise RuntimeError(f"Dataset scripts are no longer supported, but found {filename}") + raise RuntimeError(f""" +Dataset scripts are no longer supported in the Hugging Face `datasets` library. + +Detected script file: {filename} + +As of the new architecture, dataset loading scripts must be hosted directly +in the dataset repository on the Hugging Face Hub. + +To migrate your dataset: + +1. Move your loading script into the dataset repository on the Hub. +2. Ensure the script is named `.py`. +3. Push it to the Hub repository. +4. Load it using: load_dataset("username/dataset_name") + +See migration guide: +https://huggingface.co/docs/datasets/main/en/share_dataset + +""") elif os.path.isfile(combined_path): - raise RuntimeError(f"Dataset scripts are no longer supported, but found {filename}") + raise RuntimeError(f""" +Dataset scripts are no longer supported in the Hugging Face `datasets` library. + +Detected script file: {filename} + +As of the new architecture, dataset loading scripts must be hosted directly +in the dataset repository on the Hugging Face Hub. + +To migrate your dataset: + +1. Move your loading script into the dataset repository on the Hub. +2. Ensure the script is named `.py`. +3. Push it to the Hub repository. +4. Load it using: load_dataset("username/dataset_name") + +See migration guide: +https://huggingface.co/docs/datasets/main/en/share_dataset + +""") elif os.path.isdir(path): return LocalDatasetModuleFactory( path, data_dir=data_dir, data_files=data_files, download_mode=download_mode @@ -986,7 +1022,25 @@ def dataset_module_factory( revision=commit_hash, proxies=download_config.proxies, ) - raise RuntimeError(f"Dataset scripts are no longer supported, but found {filename}") + raise RuntimeError(f""" +Dataset scripts are no longer supported in the Hugging Face `datasets` library. + +Detected script file: {filename} + +As of the new architecture, dataset loading scripts must be hosted directly +in the dataset repository on the Hugging Face Hub. + +To migrate your dataset: + +1. Move your loading script into the dataset repository on the Hub. +2. Ensure the script is named `.py`. +3. Push it to the Hub repository. +4. Load it using: load_dataset("username/dataset_name") + +See migration guide: +https://huggingface.co/docs/datasets/main/en/share_dataset + +""") except EntryNotFoundError: # Use the infos from the parquet export except in some cases: if data_dir or data_files or (revision and revision != "main"): diff --git a/tests/test_load.py b/tests/test_load.py index 06d4f4d2b8d..4bac9281649 100644 --- a/tests/test_load.py +++ b/tests/test_load.py @@ -76,6 +76,22 @@ SAMPLE_NOT_EXISTING_DATASET_IDENTIFIER = "hf-internal-testing/_dummy" SAMPLE_DATASET_NAME_THAT_DOESNT_EXIST = "_dummy" +from datasets.load import dataset_module_factory + + +def test_dataset_script_error_message(tmp_path): + # create fake dataset script + fake_script = tmp_path / "dummy.py" + fake_script.write_text("print('hello')") + + # pass the actual file path (not directory) + with pytest.raises(RuntimeError) as exc: + dataset_module_factory(str(fake_script)) + + message = str(exc.value) + + assert "no longer supported" in message + assert "dataset repository on the Hugging Face Hub" in message @pytest.fixture def data_dir(tmp_path):