Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 23 additions & 15 deletions docs/tutorials/cli/t4sanity.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,20 +45,28 @@ As an example, we have the following the dataset structure:
...
```

Then, you can run sanity checks with `t4sanity <DATA_ROOT>`:

```shell
>>>Sanity checking...: 1it [00:00, 9.70it/s]
✅ No exceptions occurred!!
```

### Exclude Warnings

To run sanity check ignoring warnings, providing the path to the parent directory of the datasets:

```shell
$ t4sanity <DATA_ROOT>

>>> Sanity checking...: 97it [00:03, 26.60it/s]
+--------------------------------------+---------+------------------------------------------------------------------------------------------------+
| DatasetID | Version | Message |
+--------------------------------------+---------+------------------------------------------------------------------------------------------------+
| 96200480-ae59-44cb-9e4e-dd9021e250e8 | 2 | bbox must be (xmin, ymin, xmax, ymax) and xmin <= xmax && ymin <= ymax: (1671, 198, 1440, 229) |
| ca346afb-ea1a-4c5c-8117-544bd9ff6aca | 2 | bbox must be (xmin, ymin, xmax, ymax) and xmin <= xmax && ymin <= ymax: (1793, 99, 1440, 222) |
...
>>>Sanity checking...: 2it [00:00, 18.69it/s]
⚠️ Encountered some exceptions!!
+-----------+---------+--------+------------------------------------------------------------------------------------------------+
| DatasetID | Version | status | Message |
+-----------+---------+--------+------------------------------------------------------------------------------------------------+
| dataset1 | 2 | ERROR | bbox must be (xmin, ymin, xmax, ymax) and xmin <= xmax && ymin <= ymax: (1532, 198, 1440, 265) |
| dataset2 | 1 | OK | |
+-----------+---------+--------+------------------------------------------------------------------------------------------------+
```

### Include Warnings
Expand All @@ -68,12 +76,12 @@ To run sanity check and report any warnings, use the `-iw; --include-warning` op
```shell
$ t4sanity <DATA_ROOT> -iw

>>> Sanity checking...: 97it [00:03, 29.31it/s]
+--------------------------------------+---------+------------------------------------------------------------------------------------------------+
| DatasetID | Version | Message |
+--------------------------------------+---------+------------------------------------------------------------------------------------------------+
| 96200480-ae59-44cb-9e4e-dd9021e250e8 | 2 | bbox must be (xmin, ymin, xmax, ymax) and xmin <= xmax && ymin <= ymax: (1671, 198, 1440, 229) |
| ca346afb-ea1a-4c5c-8117-544bd9ff6aca | 2 | bbox must be (xmin, ymin, xmax, ymax) and xmin <= xmax && ymin <= ymax: (1793, 99, 1440, 222) |
| ed96b707-e7f4-4a71-9e6b-571ffd56c4c4 | 2 | level: Not available is not supported, Visibility.UNAVAILABLE will be assigned. |
...
>>>Sanity checking...: 2it [00:00, 21.54it/s]
⚠️ Encountered some exceptions!!
+-----------+---------+---------+------------------------------------------------------------------------------------------------+
| DatasetID | Version | status | Message |
+-----------+---------+---------+------------------------------------------------------------------------------------------------+
| dataset1 | 2 | ERROR | bbox must be (xmin, ymin, xmax, ymax) and xmin <= xmax && ymin <= ymax: (1532, 198, 1440, 265) |
| dataset2 | 1 | WARNING | Category token is empty for surface ann: 0c15d9c143fb2723c16ac7e0c735b0a8 |
+-----------+---------+---------+------------------------------------------------------------------------------------------------+
```
17 changes: 7 additions & 10 deletions t4_devkit/cli/sanity.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,10 @@ def _run_sanity_check(
revision: str | None = None,
include_warning: bool = False,
) -> list[DBException]:
exceptions: list[DBException] = []

for db_root in tqdm(Path(db_parent).glob("*"), desc=">>>Sanity checking..."):
result = sanity_check(db_root, revision=revision, include_warning=include_warning)
if result:
exceptions.append(result)
return exceptions
return [
sanity_check(db_root, revision=revision, include_warning=include_warning)
for db_root in tqdm(Path(db_parent).glob("*"), desc=">>>Sanity checking...")
]


@cli.command()
Expand All @@ -53,10 +50,10 @@ def main(
) -> None:
exceptions = _run_sanity_check(db_parent, revision=revision, include_warning=include_warning)

if not exceptions:
if all(e.is_ok() for e in exceptions):
print("✅ No exceptions occurred!!")
else:
print("⚠️ Encountered some exceptions!!")
headers = ["DatasetID", "Version", "Message"]
table = [[e.dataset_id, e.version, e.message] for e in exceptions]
headers = ["DatasetID", "Version", "status", "Message"]
table = [[e.dataset_id, e.version, e.status, e.message] for e in exceptions]
print(tabulate(table, headers=headers, tablefmt="pretty"))
44 changes: 39 additions & 5 deletions t4_devkit/common/sanity.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import warnings
from enum import Enum, unique
from pathlib import Path

from attrs import define
Expand All @@ -12,19 +13,38 @@

@define
class DBException:
"""A dataclass to store error message of the corresponding dataset."""
"""A dataclass to store error message of the corresponding dataset.

Attributes:
dataset_id (str): Dataset ID.
version (str | None): Dataset version.
status (DBStatus): Status of the dataset.
message (str): Error or warning message.
"""

dataset_id: str
version: str | None
message: str
status: DBStatus
message: str | None = None

def is_ok(self) -> bool:
"""Return True if the status is OK."""
return self.status == DBStatus.OK


@unique
class DBStatus(str, Enum):
OK = "OK"
WARNING = "WARNING"
ERROR = "ERROR"


def sanity_check(
db_root: str | Path,
*,
revision: str | None = None,
include_warning: bool = False,
) -> DBException | None:
) -> DBException:
"""Perform sanity check and report exception or warning encountered while loading the dataset.

Args:
Expand All @@ -44,14 +64,28 @@ def sanity_check(
warnings.filterwarnings("ignore")

try:
_ = Tier4(data_root=db_root, revision=revision, verbose=False)
exception = None
t4 = Tier4(data_root=db_root, revision=revision, verbose=False)
exception = DBException(
dataset_id=t4.dataset_id,
version=t4.version,
status=DBStatus.OK,
)
except Warning as w:
metadata = load_metadata(db_root)

exception = DBException(
dataset_id=metadata.dataset_id,
version=metadata.version,
status=DBStatus.WARNING,
message=str(w),
)
except Exception as e:
metadata = load_metadata(db_root)

exception = DBException(
dataset_id=metadata.dataset_id,
version=metadata.version,
status=DBStatus.ERROR,
message=str(e),
)
return exception
10 changes: 5 additions & 5 deletions t4_devkit/tier4.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,9 @@ def load_metadata(db_root: str, revision: str | None = None) -> DBMetadata:
version = None
data_root = db_root_path.as_posix()
else:
if int(revision) not in version_candidates:
raise ValueError(f"The version: {revision} is not included in {dataset_id}")
version = revision
data_root = db_root_path.joinpath(version).as_posix()

if version is None:
warnings.warn(f"{dataset_id} does't contain any versions.", DeprecationWarning)

return DBMetadata(data_root=data_root, dataset_id=dataset_id, version=version)


Expand Down Expand Up @@ -149,6 +144,11 @@ def __init__(
if not osp.exists(self.data_root):
raise FileNotFoundError(f"Database directory is not found: {self.data_root}")

if self.version is None:
warnings.warn(
f"DatasetID: {self.dataset_id} does't contain any versions.", DeprecationWarning
)

start_time = time.time()
if verbose:
print(f"======\nLoading T4 tables in `{self.schema_dir}`...")
Expand Down
Loading