11"""Defines the Dataset class and related utilities for handling datasets."""
2+ from __future__ import annotations
23
34import json
45import logging
56from collections import defaultdict
67from functools import cached_property
78from pathlib import Path
8- from typing import Any , TypedDict
9+ from typing import TYPE_CHECKING , Any , TypedDict
910
1011from datasets import Dataset as HFDataset
1112from datasets import Sequence , get_dataset_config_names , load_dataset
1213
13- from autointent .custom_types import LabelWithOOS , Split
14- from autointent .schemas import Intent , Tag
14+ from autointent .custom_types import Split
15+ from autointent .schemas import Tag
16+
17+ if TYPE_CHECKING :
18+ from autointent .custom_types import LabelWithOOS
19+ from autointent .schemas import Intent
1520
1621logger = logging .getLogger (__name__ )
1722
@@ -72,7 +77,7 @@ def n_classes(self) -> int:
7277 return len (self .intents )
7378
7479 @classmethod
75- def from_dict (cls , mapping : dict [str , Any ]) -> " Dataset" :
80+ def from_dict (cls , mapping : dict [str , Any ]) -> Dataset :
7681 """Creates a dataset from a dictionary mapping.
7782
7883 Args:
@@ -83,7 +88,7 @@ def from_dict(cls, mapping: dict[str, Any]) -> "Dataset":
8388 return DictReader ().read (mapping )
8489
8590 @classmethod
86- def from_json (cls , filepath : str | Path ) -> " Dataset" :
91+ def from_json (cls , filepath : str | Path ) -> Dataset :
8792 """Loads a dataset from a JSON file.
8893
8994 Args:
@@ -96,7 +101,7 @@ def from_json(cls, filepath: str | Path) -> "Dataset":
96101 @classmethod
97102 def from_hub (
98103 cls , repo_name : str , data_split : str = "default" , intent_subset_name : str = Split .INTENTS
99- ) -> " Dataset" :
104+ ) -> Dataset :
100105 """Loads a dataset from the Hugging Face Hub.
101106
102107 Args:
@@ -113,7 +118,7 @@ def from_hub(
113118
114119 return DictReader ().read (mapping )
115120
116- def to_multilabel (self ) -> " Dataset" :
121+ def to_multilabel (self ) -> Dataset :
117122 """Converts dataset labels to multilabel format."""
118123 for split_name , split in self .items ():
119124 self [split_name ] = split .map (self ._to_multilabel )
0 commit comments