-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path__init__.py
More file actions
63 lines (45 loc) · 2.02 KB
/
__init__.py
File metadata and controls
63 lines (45 loc) · 2.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
"""Dynamically discovers and registers all available datasets.
This module iterates through the subdirectories of the `codesectools/datasets`
directory. For each subdirectory that represents a dataset (i.e., contains a
`dataset.py` file and is not the `core` directory), it dynamically imports
the dataset module and adds the dataset class to the `DATASETS_ALL` dictionary.
Attributes:
DATASETS_ALL (dict): A dictionary mapping dataset names to their
corresponding metadata, including the dataset class and its cache status.
"""
import importlib
from typing import Any, Type
from codesectools.datasets.core.dataset import Dataset
from codesectools.utils import DATASETS_DIR
class LazyDatasetLoader:
"""Lazily load a dataset class to avoid premature imports."""
def __init__(self, name: str) -> None:
"""Initialize the lazy loader.
Args:
name: The name of the dataset to load.
"""
self.name = name
self.loaded = False
def _load(self) -> None:
"""Import the dataset module and class on first access."""
if not self.loaded:
self.dataset_module = importlib.import_module(
f"codesectools.datasets.{self.name}.dataset"
)
self.dataset: Type[Dataset] = getattr(self.dataset_module, self.name)
self.loaded = True
def __call__(self, *args: Any, **kwargs: Any) -> Dataset:
"""Create an instance of the loaded dataset class."""
self._load()
return self.dataset(*args, **kwargs)
def __getattr__(self, name: str) -> Any: # noqa: ANN401
"""Proxy attribute access to the loaded dataset class."""
self._load()
return getattr(self.dataset, name)
DATASETS_ALL = {}
for child in DATASETS_DIR.iterdir():
if child.is_dir():
if list(child.glob("dataset.py")) and child.name != "core":
dataset_name = child.name
DATASETS_ALL[dataset_name] = LazyDatasetLoader(dataset_name)
DATASETS_ALL = dict(sorted(DATASETS_ALL.items()))