|
46 | 46 |
|
47 | 47 | @define |
48 | 48 | class DBMetadata: |
| 49 | + """A dataclass to represent dataset metadata. |
| 50 | +
|
| 51 | + Attributes: |
| 52 | + data_root (str): Root directory path. |
| 53 | + dataset_id (str): Unique dataset ID. |
| 54 | + version (str | None): Dataset version. |
| 55 | + """ |
| 56 | + |
49 | 57 | data_root: str |
50 | 58 | dataset_id: str |
51 | 59 | version: str | None |
52 | 60 |
|
53 | 61 |
|
54 | | -def load_metadata(db_root: str) -> DBMetadata: |
| 62 | +def load_metadata(db_root: str, revision: str | None = None) -> DBMetadata: |
55 | 63 | """Load metadata of T4 dataset including root directory path, dataset ID, and version. |
56 | 64 |
|
57 | 65 | Args: |
58 | 66 | db_root (str): Path to root directory of database. |
| 67 | + revision (str | None, optional): Specify version of the dataset. |
| 68 | + If None, search the latest one. |
59 | 69 |
|
60 | 70 | Returns: |
61 | 71 | Metadata of T4 dataset. |
62 | 72 | """ |
63 | 73 | db_root_path = Path(db_root) |
| 74 | + dataset_id = db_root_path.name |
64 | 75 |
|
65 | 76 | version_pattern = re.compile(r".*/\d+$") |
66 | 77 | versions = [d.name for d in db_root_path.iterdir() if version_pattern.match(d.as_posix())] |
67 | 78 |
|
68 | | - if versions: |
69 | | - version = sorted(versions)[-1] |
70 | | - data_root = db_root_path.joinpath(version).as_posix() |
| 79 | + if revision is None: |
| 80 | + if versions: |
| 81 | + version = sorted(versions)[-1] |
| 82 | + data_root = db_root_path.joinpath(version).as_posix() |
| 83 | + else: |
| 84 | + version = None |
| 85 | + data_root = db_root_path.as_posix() |
71 | 86 | else: |
72 | | - version = None |
73 | | - data_root = db_root_path.as_posix() |
| 87 | + if revision not in versions: |
| 88 | + raise ValueError(f"The version: {revision} is not included in {dataset_id}") |
| 89 | + version = revision |
| 90 | + data_root = db_root_path.joinpath(version).as_posix() |
| 91 | + |
| 92 | + if version is None: |
| 93 | + warnings.warn(f"{dataset_id} does't contain any versions.", DeprecationWarning) |
74 | 94 |
|
75 | | - return DBMetadata(data_root=data_root, dataset_id=db_root_path.name, version=version) |
| 95 | + return DBMetadata(data_root=data_root, dataset_id=dataset_id, version=version) |
76 | 96 |
|
77 | 97 |
|
78 | 98 | class Tier4: |
79 | 99 | """Database class for T4 dataset to help query and retrieve information from the database.""" |
80 | 100 |
|
81 | 101 | schema_dir: str = "annotation" |
82 | 102 |
|
83 | | - def __init__(self, data_root: str, verbose: bool = True) -> None: |
| 103 | + def __init__( |
| 104 | + self, |
| 105 | + data_root: str, |
| 106 | + revision: str | None = None, |
| 107 | + verbose: bool = True, |
| 108 | + ) -> None: |
84 | 109 | """Load database and creates reverse indexes and shortcuts. |
85 | 110 |
|
86 | 111 | Args: |
87 | 112 | data_root (str): Path to the root directory of dataset. |
| 113 | + revision (str | None, optional): You can specify any specific version if you want. |
| 114 | + If None, search the latest one. |
88 | 115 | verbose (bool, optional): Whether to display status during load. |
89 | 116 |
|
90 | 117 | Examples: |
@@ -115,7 +142,7 @@ def __init__(self, data_root: str, verbose: bool = True) -> None: |
115 | 142 | ====== |
116 | 143 |
|
117 | 144 | """ |
118 | | - self._metadata = load_metadata(data_root) |
| 145 | + self._metadata = load_metadata(data_root, revision) |
119 | 146 |
|
120 | 147 | if not osp.exists(self.data_root): |
121 | 148 | raise FileNotFoundError(f"Database directory is not found: {self.data_root}") |
|
0 commit comments