Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 57 additions & 3 deletions src/datasets/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -912,9 +912,45 @@ def dataset_module_factory(
).get_module()
# Try locally
elif path.endswith(filename):
raise RuntimeError(f"Dataset scripts are no longer supported, but found {filename}")
raise RuntimeError(f"""
Dataset scripts are no longer supported in the Hugging Face `datasets` library.

Detected script file: {filename}

As of the new architecture, dataset loading scripts must be hosted directly
in the dataset repository on the Hugging Face Hub.

To migrate your dataset:

1. Move your loading script into the dataset repository on the Hub.
2. Ensure the script is named `<dataset_name>.py`.
3. Push it to the Hub repository.
4. Load it using: load_dataset("username/dataset_name")

See migration guide:
https://huggingface.co/docs/datasets/main/en/share_dataset

""")
elif os.path.isfile(combined_path):
raise RuntimeError(f"Dataset scripts are no longer supported, but found {filename}")
raise RuntimeError(f"""
Dataset scripts are no longer supported in the Hugging Face `datasets` library.

Detected script file: {filename}

As of the new architecture, dataset loading scripts must be hosted directly
in the dataset repository on the Hugging Face Hub.

To migrate your dataset:

1. Move your loading script into the dataset repository on the Hub.
2. Ensure the script is named `<dataset_name>.py`.
3. Push it to the Hub repository.
4. Load it using: load_dataset("username/dataset_name")

See migration guide:
https://huggingface.co/docs/datasets/main/en/share_dataset

""")
elif os.path.isdir(path):
return LocalDatasetModuleFactory(
path, data_dir=data_dir, data_files=data_files, download_mode=download_mode
Expand Down Expand Up @@ -986,7 +1022,25 @@ def dataset_module_factory(
revision=commit_hash,
proxies=download_config.proxies,
)
raise RuntimeError(f"Dataset scripts are no longer supported, but found {filename}")
raise RuntimeError(f"""
Dataset scripts are no longer supported in the Hugging Face `datasets` library.

Detected script file: {filename}

As of the new architecture, dataset loading scripts must be hosted directly
in the dataset repository on the Hugging Face Hub.

To migrate your dataset:

1. Move your loading script into the dataset repository on the Hub.
2. Ensure the script is named `<dataset_name>.py`.
3. Push it to the Hub repository.
4. Load it using: load_dataset("username/dataset_name")

See migration guide:
https://huggingface.co/docs/datasets/main/en/share_dataset

""")
except EntryNotFoundError:
# Use the infos from the parquet export except in some cases:
if data_dir or data_files or (revision and revision != "main"):
Expand Down
16 changes: 16 additions & 0 deletions tests/test_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,22 @@
SAMPLE_NOT_EXISTING_DATASET_IDENTIFIER = "hf-internal-testing/_dummy"
SAMPLE_DATASET_NAME_THAT_DOESNT_EXIST = "_dummy"

from datasets.load import dataset_module_factory


def test_dataset_script_error_message(tmp_path):
# create fake dataset script
fake_script = tmp_path / "dummy.py"
fake_script.write_text("print('hello')")

# pass the actual file path (not directory)
with pytest.raises(RuntimeError) as exc:
dataset_module_factory(str(fake_script))

message = str(exc.value)

assert "no longer supported" in message
assert "dataset repository on the Hugging Face Hub" in message

@pytest.fixture
def data_dir(tmp_path):
Expand Down