diff --git a/docs/tutorials/cli/t4sanity.md b/docs/tutorials/cli/t4sanity.md index f0bf2ce..65e8cb8 100644 --- a/docs/tutorials/cli/t4sanity.md +++ b/docs/tutorials/cli/t4sanity.md @@ -45,6 +45,13 @@ As an example, we have the following the dataset structure: ... ``` +Then, you can run sanity checks with `t4sanity `: + +```shell +>>>Sanity checking...: 1it [00:00, 9.70it/s] +✅ No exceptions occurred!! +``` + ### Exclude Warnings To run sanity check ignoring warnings, providing the path to the parent directory of the datasets: @@ -52,13 +59,14 @@ To run sanity check ignoring warnings, providing the path to the parent director ```shell $ t4sanity ->>> Sanity checking...: 97it [00:03, 26.60it/s] -+--------------------------------------+---------+------------------------------------------------------------------------------------------------+ -| DatasetID | Version | Message | -+--------------------------------------+---------+------------------------------------------------------------------------------------------------+ -| 96200480-ae59-44cb-9e4e-dd9021e250e8 | 2 | bbox must be (xmin, ymin, xmax, ymax) and xmin <= xmax && ymin <= ymax: (1671, 198, 1440, 229) | -| ca346afb-ea1a-4c5c-8117-544bd9ff6aca | 2 | bbox must be (xmin, ymin, xmax, ymax) and xmin <= xmax && ymin <= ymax: (1793, 99, 1440, 222) | -... +>>>Sanity checking...: 2it [00:00, 18.69it/s] +⚠️ Encountered some exceptions!! ++-----------+---------+--------+------------------------------------------------------------------------------------------------+ +| DatasetID | Version | status | Message | ++-----------+---------+--------+------------------------------------------------------------------------------------------------+ +| dataset1 | 2 | ERROR | bbox must be (xmin, ymin, xmax, ymax) and xmin <= xmax && ymin <= ymax: (1532, 198, 1440, 265) | +| dataset2 | 1 | OK | | ++-----------+---------+--------+------------------------------------------------------------------------------------------------+ ``` ### Include Warnings @@ -68,12 +76,12 @@ To run sanity check and report any warnings, use the `-iw; --include-warning` op ```shell $ t4sanity -iw ->>> Sanity checking...: 97it [00:03, 29.31it/s] -+--------------------------------------+---------+------------------------------------------------------------------------------------------------+ -| DatasetID | Version | Message | -+--------------------------------------+---------+------------------------------------------------------------------------------------------------+ -| 96200480-ae59-44cb-9e4e-dd9021e250e8 | 2 | bbox must be (xmin, ymin, xmax, ymax) and xmin <= xmax && ymin <= ymax: (1671, 198, 1440, 229) | -| ca346afb-ea1a-4c5c-8117-544bd9ff6aca | 2 | bbox must be (xmin, ymin, xmax, ymax) and xmin <= xmax && ymin <= ymax: (1793, 99, 1440, 222) | -| ed96b707-e7f4-4a71-9e6b-571ffd56c4c4 | 2 | level: Not available is not supported, Visibility.UNAVAILABLE will be assigned. | -... +>>>Sanity checking...: 2it [00:00, 21.54it/s] +⚠️ Encountered some exceptions!! ++-----------+---------+---------+------------------------------------------------------------------------------------------------+ +| DatasetID | Version | status | Message | ++-----------+---------+---------+------------------------------------------------------------------------------------------------+ +| dataset1 | 2 | ERROR | bbox must be (xmin, ymin, xmax, ymax) and xmin <= xmax && ymin <= ymax: (1532, 198, 1440, 265) | +| dataset2 | 1 | WARNING | Category token is empty for surface ann: 0c15d9c143fb2723c16ac7e0c735b0a8 | ++-----------+---------+---------+------------------------------------------------------------------------------------------------+ ``` diff --git a/t4_devkit/cli/sanity.py b/t4_devkit/cli/sanity.py index b83e064..a54ee78 100644 --- a/t4_devkit/cli/sanity.py +++ b/t4_devkit/cli/sanity.py @@ -24,13 +24,10 @@ def _run_sanity_check( revision: str | None = None, include_warning: bool = False, ) -> list[DBException]: - exceptions: list[DBException] = [] - - for db_root in tqdm(Path(db_parent).glob("*"), desc=">>>Sanity checking..."): - result = sanity_check(db_root, revision=revision, include_warning=include_warning) - if result: - exceptions.append(result) - return exceptions + return [ + sanity_check(db_root, revision=revision, include_warning=include_warning) + for db_root in tqdm(Path(db_parent).glob("*"), desc=">>>Sanity checking...") + ] @cli.command() @@ -53,10 +50,10 @@ def main( ) -> None: exceptions = _run_sanity_check(db_parent, revision=revision, include_warning=include_warning) - if not exceptions: + if all(e.is_ok() for e in exceptions): print("✅ No exceptions occurred!!") else: print("⚠️ Encountered some exceptions!!") - headers = ["DatasetID", "Version", "Message"] - table = [[e.dataset_id, e.version, e.message] for e in exceptions] + headers = ["DatasetID", "Version", "status", "Message"] + table = [[e.dataset_id, e.version, e.status, e.message] for e in exceptions] print(tabulate(table, headers=headers, tablefmt="pretty")) diff --git a/t4_devkit/common/sanity.py b/t4_devkit/common/sanity.py index 8460a0b..7dcf70f 100644 --- a/t4_devkit/common/sanity.py +++ b/t4_devkit/common/sanity.py @@ -1,6 +1,7 @@ from __future__ import annotations import warnings +from enum import Enum, unique from pathlib import Path from attrs import define @@ -12,11 +13,30 @@ @define class DBException: - """A dataclass to store error message of the corresponding dataset.""" + """A dataclass to store error message of the corresponding dataset. + + Attributes: + dataset_id (str): Dataset ID. + version (str | None): Dataset version. + status (DBStatus): Status of the dataset. + message (str): Error or warning message. + """ dataset_id: str version: str | None - message: str + status: DBStatus + message: str | None = None + + def is_ok(self) -> bool: + """Return True if the status is OK.""" + return self.status == DBStatus.OK + + +@unique +class DBStatus(str, Enum): + OK = "OK" + WARNING = "WARNING" + ERROR = "ERROR" def sanity_check( @@ -24,7 +44,7 @@ def sanity_check( *, revision: str | None = None, include_warning: bool = False, -) -> DBException | None: +) -> DBException: """Perform sanity check and report exception or warning encountered while loading the dataset. Args: @@ -44,14 +64,28 @@ def sanity_check( warnings.filterwarnings("ignore") try: - _ = Tier4(data_root=db_root, revision=revision, verbose=False) - exception = None + t4 = Tier4(data_root=db_root, revision=revision, verbose=False) + exception = DBException( + dataset_id=t4.dataset_id, + version=t4.version, + status=DBStatus.OK, + ) + except Warning as w: + metadata = load_metadata(db_root) + + exception = DBException( + dataset_id=metadata.dataset_id, + version=metadata.version, + status=DBStatus.WARNING, + message=str(w), + ) except Exception as e: metadata = load_metadata(db_root) exception = DBException( dataset_id=metadata.dataset_id, version=metadata.version, + status=DBStatus.ERROR, message=str(e), ) return exception diff --git a/t4_devkit/tier4.py b/t4_devkit/tier4.py index fee1eda..0888adb 100644 --- a/t4_devkit/tier4.py +++ b/t4_devkit/tier4.py @@ -86,14 +86,9 @@ def load_metadata(db_root: str, revision: str | None = None) -> DBMetadata: version = None data_root = db_root_path.as_posix() else: - if int(revision) not in version_candidates: - raise ValueError(f"The version: {revision} is not included in {dataset_id}") version = revision data_root = db_root_path.joinpath(version).as_posix() - if version is None: - warnings.warn(f"{dataset_id} does't contain any versions.", DeprecationWarning) - return DBMetadata(data_root=data_root, dataset_id=dataset_id, version=version) @@ -149,6 +144,11 @@ def __init__( if not osp.exists(self.data_root): raise FileNotFoundError(f"Database directory is not found: {self.data_root}") + if self.version is None: + warnings.warn( + f"DatasetID: {self.dataset_id} does't contain any versions.", DeprecationWarning + ) + start_time = time.time() if verbose: print(f"======\nLoading T4 tables in `{self.schema_dir}`...")