diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py index 59451a640e6..3430cc08d8c 100644 --- a/src/datasets/arrow_dataset.py +++ b/src/datasets/arrow_dataset.py @@ -43,6 +43,7 @@ from pathlib import Path from random import sample from typing import ( + Sequence, TYPE_CHECKING, Any, BinaryIO, @@ -1290,7 +1291,7 @@ def from_list( @staticmethod def from_csv( - path_or_paths: Union[PathLike, list[PathLike]], + path_or_paths: Union[PathLike, Sequence[PathLike]], split: Optional[NamedSplit] = None, features: Optional[Features] = None, cache_dir: str = None, @@ -1430,7 +1431,7 @@ def from_generator( @staticmethod def from_json( - path_or_paths: Union[PathLike, list[PathLike]], + path_or_paths: Union[PathLike, Sequence[PathLike]], split: Optional[NamedSplit] = None, features: Optional[Features] = None, cache_dir: str = None, @@ -1489,12 +1490,12 @@ def from_json( @staticmethod def from_parquet( - path_or_paths: Union[PathLike, list[PathLike]], + path_or_paths: Union[PathLike, Sequence[PathLike]], split: Optional[NamedSplit] = None, features: Optional[Features] = None, cache_dir: str = None, keep_in_memory: bool = False, - columns: Optional[list[str]] = None, + columns: Optional[Sequence[str]] = None, num_proc: Optional[int] = None, filters: Optional[Union[pds.Expression, list[tuple], list[list[tuple]]]] = None, fragment_scan_options: Optional[pds.ParquetFragmentScanOptions] = None, @@ -1586,7 +1587,7 @@ def from_parquet( @staticmethod def from_text( - path_or_paths: Union[PathLike, list[PathLike]], + path_or_paths: Union[PathLike, Sequence[PathLike]], split: Optional[NamedSplit] = None, features: Optional[Features] = None, cache_dir: str = None, @@ -3671,11 +3672,11 @@ def _map_single( function: Optional[Callable] = None, with_indices: bool = False, with_rank: bool = False, - input_columns: Optional[list[str]] = None, + input_columns: Optional[Sequence[str]] = None, batched: bool = False, batch_size: Optional[int] = 1000, drop_last_batch: bool = False, - remove_columns: Optional[list[str]] = None, + remove_columns: Optional[Sequence[str]] = None, keep_in_memory: bool = False, cache_file_name: Optional[str] = None, writer_batch_size: Optional[int] = 1000, diff --git a/src/datasets/dataset_dict.py b/src/datasets/dataset_dict.py index 4abea0a381a..4edff938ecd 100644 --- a/src/datasets/dataset_dict.py +++ b/src/datasets/dataset_dict.py @@ -1534,7 +1534,7 @@ def from_parquet( features: Optional[Features] = None, cache_dir: str = None, keep_in_memory: bool = False, - columns: Optional[list[str]] = None, + columns: Optional[Sequence[str]] = None, **kwargs, ) -> "DatasetDict": """Create [`DatasetDict`] from Parquet file(s). diff --git a/src/datasets/iterable_dataset.py b/src/datasets/iterable_dataset.py index 92fdea2ad4d..dd364ff7ec1 100644 --- a/src/datasets/iterable_dataset.py +++ b/src/datasets/iterable_dataset.py @@ -15,7 +15,7 @@ from functools import partial from itertools import cycle, islice from pathlib import Path -from typing import TYPE_CHECKING, Any, BinaryIO, Callable, Optional, Union +from typing import TYPE_CHECKING, Any, BinaryIO, Callable, Optional, Union, Sequence import fsspec.asyn import multiprocess as mp @@ -1347,11 +1347,11 @@ def __init__( ex_iterable: _BaseExamplesIterable, function: Callable, with_indices: bool = False, - input_columns: Optional[list[str]] = None, + input_columns: Optional[Sequence[str]] = None, batched: bool = False, batch_size: Optional[int] = 1000, drop_last_batch: bool = False, - remove_columns: Optional[list[str]] = None, + remove_columns: Optional[Sequence[str]] = None, fn_kwargs: Optional[dict] = None, formatting: Optional["FormattingConfig"] = None, features: Optional[Features] = None, @@ -1799,7 +1799,7 @@ def __init__( ex_iterable: _BaseExamplesIterable, function: Callable, with_indices: bool = False, - input_columns: Optional[list[str]] = None, + input_columns: Optional[Sequence[str]] = None, batched: bool = False, batch_size: Optional[int] = 1000, fn_kwargs: Optional[dict] = None, @@ -3157,7 +3157,7 @@ def from_list( @staticmethod def from_csv( - path_or_paths: Union[PathLike, list[PathLike]], + path_or_paths: Union[PathLike, Sequence[PathLike]], split: Optional[NamedSplit] = None, features: Optional[Features] = None, keep_in_memory: bool = False, @@ -3200,7 +3200,7 @@ def from_csv( @staticmethod def from_json( - path_or_paths: Union[PathLike, list[PathLike]], + path_or_paths: Union[PathLike, Sequence[PathLike]], split: Optional[NamedSplit] = None, features: Optional[Features] = None, keep_in_memory: bool = False, @@ -3247,11 +3247,11 @@ def from_json( @staticmethod def from_parquet( - path_or_paths: Union[PathLike, list[PathLike]], + path_or_paths: Union[PathLike, Sequence[PathLike]], split: Optional[NamedSplit] = None, features: Optional[Features] = None, keep_in_memory: bool = False, - columns: Optional[list[str]] = None, + columns: Optional[Sequence[str]] = None, filters: Optional[Union[pds.Expression, list[tuple], list[list[tuple]]]] = None, fragment_scan_options: Optional[pds.ParquetFragmentScanOptions] = None, on_bad_files: Literal["error", "warn", "skip"] = "error", @@ -3332,7 +3332,7 @@ def from_parquet( @staticmethod def from_text( - path_or_paths: Union[PathLike, list[PathLike]], + path_or_paths: Union[PathLike, Sequence[PathLike]], split: Optional[NamedSplit] = None, features: Optional[Features] = None, keep_in_memory: bool = False,