From d8714900fcebe00b9c022c027df34fd4434575fc Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Fri, 10 Oct 2025 14:58:54 +0900 Subject: [PATCH 01/21] feat: add detailed sanity checking Signed-off-by: ktro2828 --- docs/schema/requirement.md | 71 +++++++++++++++++++ pyproject.toml | 1 + t4_devkit/cli/sanity.py | 50 +++++++++++--- t4_devkit/sanity/__init__.py | 9 +++ t4_devkit/sanity/checker.py | 45 ++++++++++++ t4_devkit/sanity/context.py | 75 ++++++++++++++++++++ t4_devkit/sanity/format/__init__.py | 20 ++++++ t4_devkit/sanity/format/fmt001.py | 38 +++++++++++ t4_devkit/sanity/format/fmt002.py | 38 +++++++++++ t4_devkit/sanity/format/fmt003.py | 38 +++++++++++ t4_devkit/sanity/format/fmt004.py | 38 +++++++++++ t4_devkit/sanity/format/fmt005.py | 38 +++++++++++ t4_devkit/sanity/format/fmt006.py | 38 +++++++++++ t4_devkit/sanity/format/fmt007.py | 38 +++++++++++ t4_devkit/sanity/format/fmt008.py | 38 +++++++++++ t4_devkit/sanity/format/fmt009.py | 38 +++++++++++ t4_devkit/sanity/format/fmt010.py | 38 +++++++++++ t4_devkit/sanity/format/fmt011.py | 38 +++++++++++ t4_devkit/sanity/format/fmt012.py | 38 +++++++++++ t4_devkit/sanity/format/fmt013.py | 38 +++++++++++ t4_devkit/sanity/format/fmt014.py | 38 +++++++++++ t4_devkit/sanity/format/fmt015.py | 38 +++++++++++ t4_devkit/sanity/format/fmt016.py | 38 +++++++++++ t4_devkit/sanity/format/fmt017.py | 38 +++++++++++ t4_devkit/sanity/format/fmt018.py | 38 +++++++++++ t4_devkit/sanity/format/utility.py | 23 +++++++ t4_devkit/sanity/reference/__init__.py | 13 ++++ t4_devkit/sanity/reference/ref001.py | 40 +++++++++++ t4_devkit/sanity/reference/ref002.py | 40 +++++++++++ t4_devkit/sanity/reference/ref003.py | 40 +++++++++++ t4_devkit/sanity/reference/ref004.py | 40 +++++++++++ t4_devkit/sanity/reference/ref005.py | 40 +++++++++++ t4_devkit/sanity/reference/ref006.py | 40 +++++++++++ t4_devkit/sanity/reference/ref007.py | 42 ++++++++++++ t4_devkit/sanity/reference/ref008.py | 40 +++++++++++ t4_devkit/sanity/reference/ref009.py | 40 +++++++++++ t4_devkit/sanity/reference/ref010.py | 42 ++++++++++++ t4_devkit/sanity/reference/ref011.py | 42 ++++++++++++ t4_devkit/sanity/registry.py | 63 +++++++++++++++++ t4_devkit/sanity/result.py | 94 ++++++++++++++++++++++++++ t4_devkit/sanity/run.py | 31 +++++++++ t4_devkit/sanity/safety.py | 18 +++++ t4_devkit/sanity/schema/__init__.py | 8 +++ t4_devkit/sanity/schema/sch001.py | 42 ++++++++++++ t4_devkit/sanity/schema/sch002.py | 39 +++++++++++ t4_devkit/sanity/schema/sch003.py | 38 +++++++++++ t4_devkit/sanity/schema/sch004.py | 38 +++++++++++ t4_devkit/sanity/schema/sch005.py | 40 +++++++++++ t4_devkit/sanity/schema/sch006.py | 38 +++++++++++ t4_devkit/sanity/structure/__init__.py | 11 +++ t4_devkit/sanity/structure/str001.py | 30 ++++++++ t4_devkit/sanity/structure/str002.py | 34 ++++++++++ t4_devkit/sanity/structure/str003.py | 34 ++++++++++ t4_devkit/sanity/structure/str004.py | 30 ++++++++ t4_devkit/sanity/structure/str005.py | 32 +++++++++ t4_devkit/sanity/structure/str006.py | 34 ++++++++++ t4_devkit/sanity/structure/str007.py | 36 ++++++++++ t4_devkit/sanity/structure/str008.py | 36 ++++++++++ t4_devkit/sanity/structure/str009.py | 36 ++++++++++ t4_devkit/schema/builder.py | 10 +++ 60 files changed, 2198 insertions(+), 11 deletions(-) create mode 100644 docs/schema/requirement.md create mode 100644 t4_devkit/sanity/__init__.py create mode 100644 t4_devkit/sanity/checker.py create mode 100644 t4_devkit/sanity/context.py create mode 100644 t4_devkit/sanity/format/__init__.py create mode 100644 t4_devkit/sanity/format/fmt001.py create mode 100644 t4_devkit/sanity/format/fmt002.py create mode 100644 t4_devkit/sanity/format/fmt003.py create mode 100644 t4_devkit/sanity/format/fmt004.py create mode 100644 t4_devkit/sanity/format/fmt005.py create mode 100644 t4_devkit/sanity/format/fmt006.py create mode 100644 t4_devkit/sanity/format/fmt007.py create mode 100644 t4_devkit/sanity/format/fmt008.py create mode 100644 t4_devkit/sanity/format/fmt009.py create mode 100644 t4_devkit/sanity/format/fmt010.py create mode 100644 t4_devkit/sanity/format/fmt011.py create mode 100644 t4_devkit/sanity/format/fmt012.py create mode 100644 t4_devkit/sanity/format/fmt013.py create mode 100644 t4_devkit/sanity/format/fmt014.py create mode 100644 t4_devkit/sanity/format/fmt015.py create mode 100644 t4_devkit/sanity/format/fmt016.py create mode 100644 t4_devkit/sanity/format/fmt017.py create mode 100644 t4_devkit/sanity/format/fmt018.py create mode 100644 t4_devkit/sanity/format/utility.py create mode 100644 t4_devkit/sanity/reference/__init__.py create mode 100644 t4_devkit/sanity/reference/ref001.py create mode 100644 t4_devkit/sanity/reference/ref002.py create mode 100644 t4_devkit/sanity/reference/ref003.py create mode 100644 t4_devkit/sanity/reference/ref004.py create mode 100644 t4_devkit/sanity/reference/ref005.py create mode 100644 t4_devkit/sanity/reference/ref006.py create mode 100644 t4_devkit/sanity/reference/ref007.py create mode 100644 t4_devkit/sanity/reference/ref008.py create mode 100644 t4_devkit/sanity/reference/ref009.py create mode 100644 t4_devkit/sanity/reference/ref010.py create mode 100644 t4_devkit/sanity/reference/ref011.py create mode 100644 t4_devkit/sanity/registry.py create mode 100644 t4_devkit/sanity/result.py create mode 100644 t4_devkit/sanity/run.py create mode 100644 t4_devkit/sanity/safety.py create mode 100644 t4_devkit/sanity/schema/__init__.py create mode 100644 t4_devkit/sanity/schema/sch001.py create mode 100644 t4_devkit/sanity/schema/sch002.py create mode 100644 t4_devkit/sanity/schema/sch003.py create mode 100644 t4_devkit/sanity/schema/sch004.py create mode 100644 t4_devkit/sanity/schema/sch005.py create mode 100644 t4_devkit/sanity/schema/sch006.py create mode 100644 t4_devkit/sanity/structure/__init__.py create mode 100644 t4_devkit/sanity/structure/str001.py create mode 100644 t4_devkit/sanity/structure/str002.py create mode 100644 t4_devkit/sanity/structure/str003.py create mode 100644 t4_devkit/sanity/structure/str004.py create mode 100644 t4_devkit/sanity/structure/str005.py create mode 100644 t4_devkit/sanity/structure/str006.py create mode 100644 t4_devkit/sanity/structure/str007.py create mode 100644 t4_devkit/sanity/structure/str008.py create mode 100644 t4_devkit/sanity/structure/str009.py diff --git a/docs/schema/requirement.md b/docs/schema/requirement.md new file mode 100644 index 0000000..12d6c5f --- /dev/null +++ b/docs/schema/requirement.md @@ -0,0 +1,71 @@ +# Dataset Requirements + +## Structure (`STR`) + +| ID | Name | Severity | Description | +| -------- | ----------------------------- | -------- | -------------------------------------------------------------------- | +| `STR001` | `version-dir-presence` | `Warn` | `version/` directory exists under the dataset root directory. | +| `STR002` | `annotation-dir-presence` | `Error` | `annotation/` directory exists under the dataset root directory. | +| `STR003` | `data-dir-presence` | `Error` | `data/` directory exists under the dataset root directory. | +| `STR004` | `map-dir-presence` | `Error` | `map/` directory exists under the dataset root directory. | +| `STR005` | `bag-dir-presence` | `Error` | `input_bag/` directory exists under the dataset root directory. | +| `STR006` | `status-file-presence` | `Error` | `status.json` file exists under the dataset root directory. | +| `STR007` | `schema-files-presence` | `Error` | Mandatory schema JSON files exist under the `annotation/` directory. | +| `STR008` | `lanelet-file-presence` | `Warn` | `lanelet2_map.osm` file exists under the `map/` directory. | +| `STR009` | `pointcloud-map-dir-presence` | `Warn` | `pointcloud_map.pcd` directory exists under the `map/` directory. | + +## Schema (`SCH`) + +| ID | Name | Severity | Description | +| -------- | ----------------------------- | -------- | --------------------------------------- | +| `SCH001` | `scene-single` | `Error` | `Scene` record is a single. | +| `SCH002` | `sample-not-empty` | `Error` | `Sample` record is not empty. | +| `SCH003` | `sample-data-not-empty` | `Error` | `SampleData` record is not empty. | +| `SCH004` | `ego-pose-not-empty` | `Error` | `EgoPose` record is not empty. | +| `SCH005` | `calibrated-sensor-non-empty` | `Error` | `CalibratedSensor` record is not empty. | +| `SCH006` | `instance-not-empty` | `Error` | `Instance` record is not empty. | + +## Reference (`REF`) + +| ID | Name | Severity | Description | +| -------- | ------------------------------------- | -------- | ------------------------------------------------------------------------- | +| `REF001` | `scene-to-log` | `Error` | `Scene.log_token` refers to `Log` record. | +| `REF002` | `scene-to-first-sample` | `Error` | `Scene.first_sample_token` refers to `Sample` record. | +| `REF003` | `scene-to-last-sample` | `Error` | `Scene.last_sample_token` refers to `Sample` record. | +| `REF004` | `sample-to-scene` | `Error` | `Sample.scene_token` refers to `Scene` record. | +| `REF005` | `sample-data-to-sample` | `Error` | `SampleData.sample_token` refers to `Sample` record. | +| `REF006` | `sample-data-to-ego-pose` | `Error` | `SampleData.ego_pose_token` refers to `EgoPose` record. | +| `REF007` | `sample-data-to-calibrated-sensor` | `Error` | `SampleData.calibrated_sensor_token` refers to `CalibratedSensor` record. | +| `REF008` | `calibrated-sensor-to-sensor` | `Error` | `CalibratedSensor.sensor_token` refers to `Sensor` record. | +| `REF009` | `instance-to-category` | `Error` | `Instance.category_token` refers to `Category` record. | +| `REF010` | `instance-to-first-sample-annotation` | `Error` | `Instance.first_annotation_token` refers to `SampleAnnotation` record. | +| `REF011` | `instance-to-last-sample-annotation` | `Error` | `Instance.last_annotation_token` refers to `SampleAnnotation` record. | + +## Format (`FMT`) + +| ID | Name | Severity | Description | +| -------- | ------------------------- | -------- | ------------------------------------------------- | +| `FMT001` | `attribute-field` | `Error` | All types of `Attribute` fields are valid. | +| `FMT002` | `calibrated-sensor-field` | `Error` | All types of `CalibratedSensor` fields are valid. | +| `FMT003` | `category-field` | `Error` | All types of `Category` fields are valid. | +| `FMT004` | `ego-pose-field` | `Error` | All types of `EgoPose` fields are valid. | +| `FMT005` | `instance-field` | `Error` | All types of `Instance` fields are valid. | +| `FMT006` | `log-field` | `Error` | All types of `Log` fields are valid. | +| `FMT007` | `map-field` | `Error` | All types of `Map` fields are valid. | +| `FMT008` | `sample-field` | `Error` | All types of `Sample` fields are valid. | +| `FMT009` | `sample-annotation-field` | `Error` | All types of `SampleAnnotation` fields are valid. | +| `FMT010` | `sample-data-field` | `Error` | All types of `SampleData` fields are valid. | +| `FMT011` | `scene-field` | `Error` | All types of `Scene` fields are valid. | +| `FMT012` | `sensor-field` | `Error` | All types of `Sensor` fields are valid. | +| `FMT013` | `visibility-field` | `Error` | All types of `Visibility` fields are valid. | +| `FMT014` | `lidarseg-field` | `Error` | All types of `Lidarseg` fields are valid. | +| `FMT015` | `object-ann-field` | `Error` | All types of `ObjectAnn` fields are valid. | +| `FMT016` | `surface-ann-field` | `Error` | All types of `SurfaceAnn` fields are valid. | +| `FMT017` | `keypoint-field` | `Error` | All types of `Keypoint` fields are valid. | +| `FMT018` | `vehicle-state-field` | `Error` | All types of `VehicleState` fields are valid. | + +## Tier4 Instance (`TIV`) + +| ID | Name | Severity | Description | +| -------- | ------------ | -------- | --------------------------------------- | +| `TIV001` | `load-tier4` | `Error` | Success to initialize `Tier4` instance. | diff --git a/pyproject.toml b/pyproject.toml index 4d3831a..7ff0d0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ dependencies = [ "typer>=0.15.3", "tabulate>=0.9.0", "tqdm>=4.67.1", + "returns>=0.26.0", ] [dependency-groups] diff --git a/t4_devkit/cli/sanity.py b/t4_devkit/cli/sanity.py index 72400b4..2a1b2f8 100644 --- a/t4_devkit/cli/sanity.py +++ b/t4_devkit/cli/sanity.py @@ -7,8 +7,8 @@ from tqdm import tqdm from t4_devkit.common.io import save_json -from t4_devkit.common.sanity import DBException, sanity_check from t4_devkit.common.serialize import serialize_dataclasses +from t4_devkit.sanity import sanity_check, SanityResult from .version import version_callback @@ -25,13 +25,44 @@ def _run_sanity_check( *, revision: str | None = None, include_warning: bool = False, -) -> list[DBException]: +) -> list[SanityResult]: return [ sanity_check(db_root, revision=revision, include_warning=include_warning) for db_root in tqdm(Path(db_parent).glob("*"), desc=">>>Sanity checking...") ] +def _pretty_print(results: list[SanityResult], *, detail: bool = False) -> str: + summary_rows = [] + for result in results: + success = sum(1 for rp in result.reports.values() if rp.is_success()) + failures = sum(1 for rp in result.reports.values() if rp.is_failure()) + skips = sum(1 for rp in result.reports.values() if rp.is_skipped()) + summary_rows.append( + [ + result.dataset_id, + result.version, + "FAILURE" if failures > 0 else "SUCCESS", + len(result.reports), + success, + failures, + skips, + ] + ) + + if detail: + print(result) + + print(f"\n{'=' * 20} Summary {'=' * 20}") + print( + tabulate( + summary_rows, + headers=["DatasetID", "Version", "Overall", "Rules", "Success", "Failures", "Skips"], + tablefmt="pretty", + ), + ) + + @cli.command() def main( version: bool = typer.Option( @@ -50,17 +81,14 @@ def main( include_warning: bool = typer.Option( False, "-iw", "--include-warning", help="Indicates whether to report any warnings." ), + detail: bool = typer.Option( + False, "-d", "--detail", help="Indicates whether to display detailed reports." + ), ) -> None: - exceptions = _run_sanity_check(db_parent, revision=revision, include_warning=include_warning) + results = _run_sanity_check(db_parent, revision=revision, include_warning=include_warning) - if all(e.is_ok() for e in exceptions): - print("✅ No exceptions occurred!!") - else: - print("⚠️ Encountered some exceptions!!") - headers = ["DatasetID", "Version", "Status", "Message"] - table = [[e.dataset_id, e.version, e.status, e.message] for e in exceptions] - print(tabulate(table, headers=headers, tablefmt="pretty")) + _pretty_print(results, detail=detail) if output: - serialized = serialize_dataclasses(exceptions) + serialized = serialize_dataclasses(results) save_json(serialized, output) diff --git a/t4_devkit/sanity/__init__.py b/t4_devkit/sanity/__init__.py new file mode 100644 index 0000000..2446981 --- /dev/null +++ b/t4_devkit/sanity/__init__.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from .registry import * # noqa +from .run import * # noqa +from .result import * # noqa +from .structure import * # noqa +from .schema import * # noqa +from .reference import * # noqa +from .format import * # noqa diff --git a/t4_devkit/sanity/checker.py b/t4_devkit/sanity/checker.py new file mode 100644 index 0000000..f434b0c --- /dev/null +++ b/t4_devkit/sanity/checker.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, NewType + +from returns.maybe import Maybe, Nothing, Some + +from .result import make_failure, make_success, make_skipped + +if TYPE_CHECKING: + from .context import SanityContext + from .result import Reason, Report + + +RuleID = NewType("RuleID", str) +RuleName = NewType("RuleName", str) + + +class Checker(ABC): + """Base class for sanity checkers.""" + + name: RuleName + description: str + + def __init__(self, id: RuleID) -> None: + self.id = id + + def __call__(self, context: SanityContext) -> Report: + match self.can_skip(context): + case Some(skip): + return make_skipped(self.id, self.name, self.description, skip) + + reasons = self.check(context) + if reasons: + return make_failure(self.id, self.name, self.description, reasons) + else: + return make_success(self.id, self.name, self.description) + + def can_skip(self, _: SanityContext) -> Maybe[Reason]: + """Return a skip reason if the checker should be skipped.""" + return Nothing + + @abstractmethod + def check(self, context: SanityContext) -> list[Reason]: + pass diff --git a/t4_devkit/sanity/context.py b/t4_devkit/sanity/context.py new file mode 100644 index 0000000..2ba355f --- /dev/null +++ b/t4_devkit/sanity/context.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +from pathlib import Path + +from attrs import define +from returns.maybe import Maybe +from returns.pipeline import is_successful +from returns.result import Result, safe +from typing_extensions import Self + +from t4_devkit import DBMetadata, load_metadata +from t4_devkit.schema.name import SchemaName + + +@define +class SanityContext: + metadata: Maybe[DBMetadata] + + @classmethod + def from_path(cls, data_root: str, revision: str | None = None) -> Self: + metadata_result = _load_metadata_safe(data_root, revision=revision) + metadata = metadata_result.unwrap() if is_successful(metadata_result) else None + return cls(Maybe.from_optional(metadata)) + + @property + def data_root(self) -> Maybe[Path]: + """Return the path to dataset root directory.""" + return self.metadata.map(lambda m: Path(m.data_root)) + + @property + def dataset_id(self) -> Maybe[str]: + """Return the dataset ID.""" + return self.metadata.map(lambda m: m.dataset_id) + + @property + def version(self) -> Maybe[str]: + """Return the dataset version.""" + return self.metadata.bind_optional(lambda m: m.version) + + @property + def annotation_dir(self) -> Maybe[Path]: + """Return the path to annotation directory, which is 'annotation'.""" + return self.metadata.map(lambda m: Path(m.data_root).joinpath("annotation")) + + @property + def sensor_data_dir(self) -> Maybe[Path]: + """Return the path to sensor data directory, which is 'data'.""" + return self.metadata.map(lambda m: Path(m.data_root).joinpath("data")) + + @property + def map_dir(self) -> Maybe[Path]: + """Return the path to map directory, which is 'map'.""" + return self.metadata.map(lambda m: Path(m.data_root).joinpath("map")) + + @property + def bag_dir(self) -> Maybe[Path]: + """Return the path to bag directory, which is 'input_bag'.""" + return self.metadata.map(lambda m: Path(m.data_root).joinpath("input_bag")) + + @property + def status_json(self) -> Maybe[Path]: + """Return the path to status JSON file, which is 'status.json'.""" + return self.metadata.map(lambda m: Path(m.data_root).joinpath("status.json")) + + def to_schema_file(self, schema: SchemaName) -> Maybe[Path]: + """Convert schema name to file path, which is /annotation/.json.""" + return self.annotation_dir.map(lambda ann: ann.joinpath(schema.filename)) + + +@safe +def _load_metadata_safe( + data_root: str, + revision: str | None = None, +) -> Result[DBMetadata, Exception]: + return load_metadata(data_root, revision=revision) diff --git a/t4_devkit/sanity/format/__init__.py b/t4_devkit/sanity/format/__init__.py new file mode 100644 index 0000000..be5cabd --- /dev/null +++ b/t4_devkit/sanity/format/__init__.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from .fmt001 import * # noqa +from .fmt002 import * # noqa +from .fmt003 import * # noqa +from .fmt004 import * # noqa +from .fmt005 import * # noqa +from .fmt006 import * # noqa +from .fmt007 import * # noqa +from .fmt008 import * # noqa +from .fmt009 import * # noqa +from .fmt010 import * # noqa +from .fmt011 import * # noqa +from .fmt012 import * # noqa +from .fmt013 import * # noqa +from .fmt014 import * # noqa +from .fmt015 import * # noqa +from .fmt016 import * # noqa +from .fmt017 import * # noqa +from .fmt018 import * # noqa diff --git a/t4_devkit/sanity/format/fmt001.py b/t4_devkit/sanity/format/fmt001.py new file mode 100644 index 0000000..bfce937 --- /dev/null +++ b/t4_devkit/sanity/format/fmt001.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .utility import build_records + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["FMT001"] + + +@CHECKERS.register(RuleID("FMT001")) +class FMT001(Checker): + """A checker of FMT001.""" + + name = RuleName("attribute-field") + description = "All types of 'Attribute' fields are valid." + + def check(self, context: SanityContext) -> list[Reason]: + schema = SchemaName.ATTRIBUTE + match context.to_schema_file(schema): + case Some(x): + records = load_json_safe(x.as_posix()) + if not is_successful(records): + return [Reason("Invalid `Attribute` file")] + return build_records(schema, records.unwrap()) + case _: + return [Reason("No `Attribute` file found")] diff --git a/t4_devkit/sanity/format/fmt002.py b/t4_devkit/sanity/format/fmt002.py new file mode 100644 index 0000000..da08a57 --- /dev/null +++ b/t4_devkit/sanity/format/fmt002.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .utility import build_records + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["FMT002"] + + +@CHECKERS.register(RuleID("FMT002")) +class FMT002(Checker): + """A checker of FMT002.""" + + name = RuleName("calibrated-sensor-field") + description = "All types of 'CalibratedSensor' fields are valid." + + def check(self, context: SanityContext) -> list[Reason]: + schema = SchemaName.CALIBRATED_SENSOR + match context.to_schema_file(schema): + case Some(x): + records = load_json_safe(x.as_posix()) + if not is_successful(records): + return [Reason("Invalid `CalibratedSensor` file")] + return build_records(schema, records.unwrap()) + case _: + return [Reason("No `CalibratedSensor` file found")] diff --git a/t4_devkit/sanity/format/fmt003.py b/t4_devkit/sanity/format/fmt003.py new file mode 100644 index 0000000..dfb54b2 --- /dev/null +++ b/t4_devkit/sanity/format/fmt003.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .utility import build_records + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["FMT003"] + + +@CHECKERS.register(RuleID("FMT003")) +class FMT003(Checker): + """A checker of FMT003.""" + + name = RuleName("category-field") + description = "All types of 'Category' fields are valid." + + def check(self, context: SanityContext) -> list[Reason]: + schema = SchemaName.CATEGORY + match context.to_schema_file(schema): + case Some(x): + records = load_json_safe(x.as_posix()) + if not is_successful(records): + return [Reason("Invalid `Category` file")] + return build_records(schema, records.unwrap()) + case _: + return [Reason("No `Category` file found")] diff --git a/t4_devkit/sanity/format/fmt004.py b/t4_devkit/sanity/format/fmt004.py new file mode 100644 index 0000000..129e481 --- /dev/null +++ b/t4_devkit/sanity/format/fmt004.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .utility import build_records + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["FMT004"] + + +@CHECKERS.register(RuleID("FMT004")) +class FMT004(Checker): + """A checker of FMT004.""" + + name = RuleName("ego-pose-field") + description = "All types of 'EgoPose' fields are valid." + + def check(self, context: SanityContext) -> list[Reason]: + schema = SchemaName.EGO_POSE + match context.to_schema_file(schema): + case Some(x): + records = load_json_safe(x.as_posix()) + if not is_successful(records): + return [Reason("Invalid `EgoPose` file")] + return build_records(schema, records.unwrap()) + case _: + return [Reason("No `EgoPose` file found")] diff --git a/t4_devkit/sanity/format/fmt005.py b/t4_devkit/sanity/format/fmt005.py new file mode 100644 index 0000000..c510b5f --- /dev/null +++ b/t4_devkit/sanity/format/fmt005.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .utility import build_records + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["FMT005"] + + +@CHECKERS.register(RuleID("FMT005")) +class FMT005(Checker): + """A checker of FMT005.""" + + name = RuleName("instance-field") + description = "All types of 'Instance' fields are valid." + + def check(self, context: SanityContext) -> list[Reason]: + schema = SchemaName.INSTANCE + match context.to_schema_file(schema): + case Some(x): + records = load_json_safe(x.as_posix()) + if not is_successful(records): + return [Reason("Invalid `Instance` file")] + return build_records(schema, records.unwrap()) + case _: + return [Reason("No `Instance` file found")] diff --git a/t4_devkit/sanity/format/fmt006.py b/t4_devkit/sanity/format/fmt006.py new file mode 100644 index 0000000..c77381b --- /dev/null +++ b/t4_devkit/sanity/format/fmt006.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .utility import build_records + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["FMT006"] + + +@CHECKERS.register(RuleID("FMT006")) +class FMT006(Checker): + """A checker of FMT006.""" + + name = RuleName("log-field") + description = "All types of 'Log' fields are valid." + + def check(self, context: SanityContext) -> list[Reason]: + schema = SchemaName.LOG + match context.to_schema_file(schema): + case Some(x): + records = load_json_safe(x.as_posix()) + if not is_successful(records): + return [Reason("Invalid `Log` file")] + return build_records(schema, records.unwrap()) + case _: + return [Reason("No `Log` file found")] diff --git a/t4_devkit/sanity/format/fmt007.py b/t4_devkit/sanity/format/fmt007.py new file mode 100644 index 0000000..d8eadb0 --- /dev/null +++ b/t4_devkit/sanity/format/fmt007.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .utility import build_records + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["FMT007"] + + +@CHECKERS.register(RuleID("FMT007")) +class FMT007(Checker): + """A checker of FMT007.""" + + name = RuleName("map-field") + description = "All types of 'Map' fields are valid." + + def check(self, context: SanityContext) -> list[Reason]: + schema = SchemaName.MAP + match context.to_schema_file(schema): + case Some(x): + records = load_json_safe(x.as_posix()) + if not is_successful(records): + return [Reason("Invalid `Map` file")] + return build_records(schema, records.unwrap()) + case _: + return [Reason("No `Map` file found")] diff --git a/t4_devkit/sanity/format/fmt008.py b/t4_devkit/sanity/format/fmt008.py new file mode 100644 index 0000000..e6dd797 --- /dev/null +++ b/t4_devkit/sanity/format/fmt008.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .utility import build_records + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["FMT008"] + + +@CHECKERS.register(RuleID("FMT008")) +class FMT008(Checker): + """A checker of FMT008.""" + + name = RuleName("sample-field") + description = "All types of 'Sample' fields are valid." + + def check(self, context: SanityContext) -> list[Reason]: + schema = SchemaName.SAMPLE + match context.to_schema_file(schema): + case Some(x): + records = load_json_safe(x.as_posix()) + if not is_successful(records): + return [Reason("Invalid `Sample` file")] + return build_records(schema, records.unwrap()) + case _: + return [Reason("No `Sample` file found")] diff --git a/t4_devkit/sanity/format/fmt009.py b/t4_devkit/sanity/format/fmt009.py new file mode 100644 index 0000000..abfcc5e --- /dev/null +++ b/t4_devkit/sanity/format/fmt009.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .utility import build_records + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["FMT009"] + + +@CHECKERS.register(RuleID("FMT009")) +class FMT009(Checker): + """A checker of FMT009.""" + + name = RuleName("sample-annotation-field") + description = "All types of 'SampleAnnotation' fields are valid." + + def check(self, context: SanityContext) -> list[Reason]: + schema = SchemaName.SAMPLE_ANNOTATION + match context.to_schema_file(schema): + case Some(x): + records = load_json_safe(x.as_posix()) + if not is_successful(records): + return [Reason("Invalid `SampleAnnotation` file")] + return build_records(schema, records.unwrap()) + case _: + return [Reason("No `SampleAnnotation` file found")] diff --git a/t4_devkit/sanity/format/fmt010.py b/t4_devkit/sanity/format/fmt010.py new file mode 100644 index 0000000..cc74e24 --- /dev/null +++ b/t4_devkit/sanity/format/fmt010.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .utility import build_records + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["FMT010"] + + +@CHECKERS.register(RuleID("FMT010")) +class FMT010(Checker): + """A checker of FMT010.""" + + name = RuleName("sample-data-field") + description = "All types of 'SampleData' fields are valid." + + def check(self, context: SanityContext) -> list[Reason]: + schema = SchemaName.SAMPLE_DATA + match context.to_schema_file(schema): + case Some(x): + records = load_json_safe(x.as_posix()) + if not is_successful(records): + return [Reason("Invalid `SampleData` file")] + return build_records(schema, records.unwrap()) + case _: + return [Reason("No `SampleData` file found")] diff --git a/t4_devkit/sanity/format/fmt011.py b/t4_devkit/sanity/format/fmt011.py new file mode 100644 index 0000000..35a571b --- /dev/null +++ b/t4_devkit/sanity/format/fmt011.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .utility import build_records + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["FMT011"] + + +@CHECKERS.register(RuleID("FMT011")) +class FMT011(Checker): + """A checker of FMT011.""" + + name = RuleName("scene-field") + description = "All types of 'Scene' fields are valid." + + def check(self, context: SanityContext) -> list[Reason]: + schema = SchemaName.SCENE + match context.to_schema_file(schema): + case Some(x): + records = load_json_safe(x.as_posix()) + if not is_successful(records): + return [Reason("Invalid `Scene` file")] + return build_records(schema, records.unwrap()) + case _: + return [Reason("No `Scene` file found")] diff --git a/t4_devkit/sanity/format/fmt012.py b/t4_devkit/sanity/format/fmt012.py new file mode 100644 index 0000000..1ce130e --- /dev/null +++ b/t4_devkit/sanity/format/fmt012.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .utility import build_records + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["FMT012"] + + +@CHECKERS.register(RuleID("FMT012")) +class FMT012(Checker): + """A checker of FMT012.""" + + name = RuleName("sensor-field") + description = "All types of 'Sensor' fields are valid." + + def check(self, context: SanityContext) -> list[Reason]: + schema = SchemaName.SENSOR + match context.to_schema_file(schema): + case Some(x): + records = load_json_safe(x.as_posix()) + if not is_successful(records): + return [Reason("Invalid `Sensor` file")] + return build_records(schema, records.unwrap()) + case _: + return [Reason("No `Sensor` file found")] diff --git a/t4_devkit/sanity/format/fmt013.py b/t4_devkit/sanity/format/fmt013.py new file mode 100644 index 0000000..3c693ce --- /dev/null +++ b/t4_devkit/sanity/format/fmt013.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .utility import build_records + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["FMT013"] + + +@CHECKERS.register(RuleID("FMT013")) +class FMT013(Checker): + """A checker of FMT013.""" + + name = RuleName("visibility-field") + description = "All types of 'Visibility' fields are valid." + + def check(self, context: SanityContext) -> list[Reason]: + schema = SchemaName.VISIBILITY + match context.to_schema_file(schema): + case Some(x): + records = load_json_safe(x.as_posix()) + if not is_successful(records): + return [Reason("Invalid `Visibility` file")] + return build_records(schema, records.unwrap()) + case _: + return [Reason("No `Visibility` file found")] diff --git a/t4_devkit/sanity/format/fmt014.py b/t4_devkit/sanity/format/fmt014.py new file mode 100644 index 0000000..f3bfe66 --- /dev/null +++ b/t4_devkit/sanity/format/fmt014.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .utility import build_records + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["FMT014"] + + +@CHECKERS.register(RuleID("FMT014")) +class FMT014(Checker): + """A checker of FMT014.""" + + name = RuleName("lidarseg-field") + description = "All types of 'LidarSeg' fields are valid." + + def check(self, context: SanityContext) -> list[Reason]: + schema = SchemaName.LIDARSEG + match context.to_schema_file(schema): + case Some(x): + records = load_json_safe(x.as_posix()) + if not is_successful(records): + return [Reason("Invalid `LidarSeg` file")] + return build_records(schema, records.unwrap()) + case _: + return [Reason("No `LidarSeg` file found")] diff --git a/t4_devkit/sanity/format/fmt015.py b/t4_devkit/sanity/format/fmt015.py new file mode 100644 index 0000000..d3980cd --- /dev/null +++ b/t4_devkit/sanity/format/fmt015.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .utility import build_records + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["FMT015"] + + +@CHECKERS.register(RuleID("FMT015")) +class FMT015(Checker): + """A checker of FMT015.""" + + name = RuleName("object-ann-field") + description = "All types of 'ObjectAnn' fields are valid." + + def check(self, context: SanityContext) -> list[Reason]: + schema = SchemaName.OBJECT_ANN + match context.to_schema_file(schema): + case Some(x): + records = load_json_safe(x.as_posix()) + if not is_successful(records): + return [Reason("Invalid `ObjectAnn` file")] + return build_records(schema, records.unwrap()) + case _: + return [Reason("No `ObjectAnn` file found")] diff --git a/t4_devkit/sanity/format/fmt016.py b/t4_devkit/sanity/format/fmt016.py new file mode 100644 index 0000000..98234b0 --- /dev/null +++ b/t4_devkit/sanity/format/fmt016.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .utility import build_records + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["FMT016"] + + +@CHECKERS.register(RuleID("FMT016")) +class FMT016(Checker): + """A checker of FMT016.""" + + name = RuleName("surface-ann-field") + description = "All types of 'SurfaceAnn' fields are valid." + + def check(self, context: SanityContext) -> list[Reason]: + schema = SchemaName.SURFACE_ANN + match context.to_schema_file(schema): + case Some(x): + records = load_json_safe(x.as_posix()) + if not is_successful(records): + return [Reason("Invalid `SurfaceAnn` file")] + return build_records(schema, records.unwrap()) + case _: + return [Reason("No `SurfaceAnn` file found")] diff --git a/t4_devkit/sanity/format/fmt017.py b/t4_devkit/sanity/format/fmt017.py new file mode 100644 index 0000000..d949547 --- /dev/null +++ b/t4_devkit/sanity/format/fmt017.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .utility import build_records + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["FMT017"] + + +@CHECKERS.register(RuleID("FMT017")) +class FMT017(Checker): + """A checker of FMT017.""" + + name = RuleName("keypoint-field") + description = "All types of 'Keypoint' fields are valid." + + def check(self, context: SanityContext) -> list[Reason]: + schema = SchemaName.KEYPOINT + match context.to_schema_file(schema): + case Some(x): + records = load_json_safe(x.as_posix()) + if not is_successful(records): + return [Reason("Invalid `Keypoint` file")] + return build_records(schema, records.unwrap()) + case _: + return [Reason("No `Keypoint` file found")] diff --git a/t4_devkit/sanity/format/fmt018.py b/t4_devkit/sanity/format/fmt018.py new file mode 100644 index 0000000..7667fd6 --- /dev/null +++ b/t4_devkit/sanity/format/fmt018.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .utility import build_records + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["FMT018"] + + +@CHECKERS.register(RuleID("FMT018")) +class FMT018(Checker): + """A checker of FMT018.""" + + name = RuleName("vehicle-state-field") + description = "All types of 'VehicleState' fields are valid." + + def check(self, context: SanityContext) -> list[Reason]: + schema = SchemaName.VEHICLE_STATE + match context.to_schema_file(schema): + case Some(x): + records = load_json_safe(x.as_posix()) + if not is_successful(records): + return [Reason("Invalid `VehicleState` file")] + return build_records(schema, records.unwrap()) + case _: + return [Reason("No `VehicleState` file found")] diff --git a/t4_devkit/sanity/format/utility.py b/t4_devkit/sanity/format/utility.py new file mode 100644 index 0000000..aecdb39 --- /dev/null +++ b/t4_devkit/sanity/format/utility.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from returns.pipeline import is_successful +from returns.result import safe + +from t4_devkit.schema import SCHEMAS, SchemaName, SchemaBase + +from ..result import Reason + + +def build_records(schema: SchemaName, records: list[dict]) -> list[Reason]: + module = SCHEMAS.get(schema) + failures = [] + for record in records: + conversion = _safe_from_dict(module, record) + if not is_successful(conversion): + failures.append(Reason(f"[{schema.name}] {record['token']}: {conversion.failure()}")) + return failures + + +@safe +def _safe_from_dict(module: SchemaBase, record: dict): + return module.from_dict(record) diff --git a/t4_devkit/sanity/reference/__init__.py b/t4_devkit/sanity/reference/__init__.py new file mode 100644 index 0000000..15f583d --- /dev/null +++ b/t4_devkit/sanity/reference/__init__.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from .ref001 import * # noqa +from .ref002 import * # noqa +from .ref003 import * # noqa +from .ref004 import * # noqa +from .ref005 import * # noqa +from .ref006 import * # noqa +from .ref007 import * # noqa +from .ref008 import * # noqa +from .ref009 import * # noqa +from .ref010 import * # noqa +from .ref011 import * # noqa diff --git a/t4_devkit/sanity/reference/ref001.py b/t4_devkit/sanity/reference/ref001.py new file mode 100644 index 0000000..6d76ce2 --- /dev/null +++ b/t4_devkit/sanity/reference/ref001.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["REF001"] + + +@CHECKERS.register(RuleID("REF001")) +class REF001(Checker): + """A checker of REF001.""" + + name = RuleName("scene-to-log") + description = "'Scene.log_token' refers to 'Log' record." + + def check(self, context: SanityContext) -> list[Reason]: + scene_file = context.to_schema_file(SchemaName.SCENE) + log_file = context.to_schema_file(SchemaName.LOG) + match (scene_file, log_file): + case Some(x), Some(y): + scene = load_json_safe(x).unwrap() + log_tokens = [item["token"] for item in load_json_safe(y).unwrap()] + return [ + Reason(f"No reference to `Scene.log_token`: {s['log_token']}") + for s in scene + if s["log_token"] not in log_tokens + ] + case _: + return [Reason("Missing `Scene` or `Log` file")] diff --git a/t4_devkit/sanity/reference/ref002.py b/t4_devkit/sanity/reference/ref002.py new file mode 100644 index 0000000..e11f089 --- /dev/null +++ b/t4_devkit/sanity/reference/ref002.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["REF002"] + + +@CHECKERS.register(RuleID("REF002")) +class REF002(Checker): + """A checker of REF002.""" + + name = RuleName("scene-to-first-sample") + description = "'Scene.first_sample_token' refers to 'Sample' record." + + def check(self, context: SanityContext) -> list[Reason]: + scene_file = context.to_schema_file(SchemaName.SCENE) + sample_file = context.to_schema_file(SchemaName.SAMPLE) + match (scene_file, sample_file): + case Some(x), Some(y): + scene = load_json_safe(x).unwrap() + sample_tokens = [item["token"] for item in load_json_safe(y).unwrap()] + return [ + Reason(f"No reference to `Scene.first_sample_token`: {s['first_sample_token']}") + for s in scene + if s["first_sample_token"] not in sample_tokens + ] + case _: + return [Reason("Missing `Scene` or `Sample` file")] diff --git a/t4_devkit/sanity/reference/ref003.py b/t4_devkit/sanity/reference/ref003.py new file mode 100644 index 0000000..4a18b65 --- /dev/null +++ b/t4_devkit/sanity/reference/ref003.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["REF003"] + + +@CHECKERS.register(RuleID("REF003")) +class REF003(Checker): + """A checker of REF003.""" + + name = RuleName("scene-to-last-sample") + description = "'Scene.last_sample_token' refers to 'Sample' record." + + def check(self, context: SanityContext) -> list[Reason]: + scene_file = context.to_schema_file(SchemaName.SCENE) + sample_file = context.to_schema_file(SchemaName.SAMPLE) + match (scene_file, sample_file): + case Some(x), Some(y): + scene = load_json_safe(x).unwrap() + sample_tokens = [item["token"] for item in load_json_safe(y).unwrap()] + return [ + Reason(f"No reference to `Scene.last_sample_token`: {s['last_sample_token']}") + for s in scene + if s["last_sample_token"] not in sample_tokens + ] + case _: + return [Reason("Missing `Scene` or `Sample` file")] diff --git a/t4_devkit/sanity/reference/ref004.py b/t4_devkit/sanity/reference/ref004.py new file mode 100644 index 0000000..90b6e50 --- /dev/null +++ b/t4_devkit/sanity/reference/ref004.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["REF004"] + + +@CHECKERS.register(RuleID("REF004")) +class REF004(Checker): + """A checker of REF004.""" + + name = RuleName("sample-to-scene") + description = "'Sample.scene_token' refers to 'Scene' record." + + def check(self, context: SanityContext) -> list[Reason]: + sample_file = context.to_schema_file(SchemaName.SAMPLE) + scene_file = context.to_schema_file(SchemaName.SCENE) + match (sample_file, scene_file): + case Some(x), Some(y): + sample = load_json_safe(x).unwrap() + scene_tokens = [item["token"] for item in load_json_safe(y).unwrap()] + return [ + Reason(f"No reference to `Sample.scene_token`: {s['scene_token']}") + for s in sample + if s["scene_token"] not in scene_tokens + ] + case _: + return [Reason("Missing `Scene` or `Sample` file")] diff --git a/t4_devkit/sanity/reference/ref005.py b/t4_devkit/sanity/reference/ref005.py new file mode 100644 index 0000000..c409f8d --- /dev/null +++ b/t4_devkit/sanity/reference/ref005.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["REF005"] + + +@CHECKERS.register(RuleID("REF005")) +class REF005(Checker): + """A checker of REF005.""" + + name = RuleName("sample-data-to-sample") + description = "'SampleData.sample_token' refers to 'Sample' record." + + def check(self, context: SanityContext) -> list[Reason]: + sample_data_file = context.to_schema_file(SchemaName.SAMPLE_DATA) + sample_file = context.to_schema_file(SchemaName.SAMPLE) + match (sample_data_file, sample_file): + case Some(x), Some(y): + sample_data = load_json_safe(x).unwrap() + sample_tokens = [item["token"] for item in load_json_safe(y).unwrap()] + return [ + Reason(f"No reference to 'SampleData.sample_token': {s['sample_token']}") + for s in sample_data + if s["sample_token"] not in sample_tokens and s["is_valid"] + ] + case _: + return [Reason("Missing 'SampleData' or 'Sample' file")] diff --git a/t4_devkit/sanity/reference/ref006.py b/t4_devkit/sanity/reference/ref006.py new file mode 100644 index 0000000..a23ee68 --- /dev/null +++ b/t4_devkit/sanity/reference/ref006.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["REF006"] + + +@CHECKERS.register(RuleID("REF006")) +class REF006(Checker): + """A checker of REF006.""" + + name = RuleName("sample-data-to-ego-pose") + description = "'SampleData.ego_pose_token' refers to 'EgoPose' record." + + def check(self, context: SanityContext) -> list[Reason]: + sample_data_file = context.to_schema_file(SchemaName.SAMPLE_DATA) + ego_pose_file = context.to_schema_file(SchemaName.EGO_POSE) + match (sample_data_file, ego_pose_file): + case Some(x), Some(y): + sample_data = load_json_safe(x).unwrap() + ego_pose_tokens = [item["token"] for item in load_json_safe(y).unwrap()] + return [ + Reason(f"No reference to `SampleData.ego_pose_token`: {s['ego_pose_token']}") + for s in sample_data + if s["ego_pose_token"] not in ego_pose_tokens + ] + case _: + return [Reason("Missing `SampleData` or `EgoPose` file")] diff --git a/t4_devkit/sanity/reference/ref007.py b/t4_devkit/sanity/reference/ref007.py new file mode 100644 index 0000000..9ccfea8 --- /dev/null +++ b/t4_devkit/sanity/reference/ref007.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["REF007"] + + +@CHECKERS.register(RuleID("REF007")) +class REF007(Checker): + """A checker of REF007.""" + + name = RuleName("sample-data-to-calibrated-sensor") + description = "'SampleData.calibrated_sensor_token' refers to 'CalibratedSensor' record." + + def check(self, context: SanityContext) -> list[Reason]: + sample_data_file = context.to_schema_file(SchemaName.SAMPLE_DATA) + calibrated_sensor_file = context.to_schema_file(SchemaName.CALIBRATED_SENSOR) + match (sample_data_file, calibrated_sensor_file): + case Some(x), Some(y): + sample_data = load_json_safe(x).unwrap() + calibrated_sensor_tokens = [item["token"] for item in load_json_safe(y).unwrap()] + return [ + Reason( + f"No reference to `SampleData.calibrated_sensor_token`: {s['calibrated_sensor_token']}" + ) + for s in sample_data + if s["calibrated_sensor_token"] not in calibrated_sensor_tokens + ] + case _: + return [Reason("Missing `SampleData` or `CalibratedSensor` file")] diff --git a/t4_devkit/sanity/reference/ref008.py b/t4_devkit/sanity/reference/ref008.py new file mode 100644 index 0000000..4739c2b --- /dev/null +++ b/t4_devkit/sanity/reference/ref008.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["REF008"] + + +@CHECKERS.register(RuleID("REF008")) +class REF008(Checker): + """A checker of REF008.""" + + name = RuleName("calibrated-sensor-to-sensor") + description = "'CalibratedSensor.sensor_token' refers to 'Sensor' record." + + def check(self, context: SanityContext) -> list[Reason]: + calibrated_sensor_file = context.to_schema_file(SchemaName.CALIBRATED_SENSOR) + sensor_file = context.to_schema_file(SchemaName.SENSOR) + match (calibrated_sensor_file, sensor_file): + case Some(x), Some(y): + calibrated_sensor = load_json_safe(x).unwrap() + sensor_tokens = [item["token"] for item in load_json_safe(y).unwrap()] + return [ + Reason(f"No reference to `CalibratedSensor.sensor_token`: {cs['sensor_token']}") + for cs in calibrated_sensor + if cs["sensor_token"] not in sensor_tokens + ] + case _: + return [Reason("Missing `CalibratedSensor` or `Sensor` file")] diff --git a/t4_devkit/sanity/reference/ref009.py b/t4_devkit/sanity/reference/ref009.py new file mode 100644 index 0000000..4d275d8 --- /dev/null +++ b/t4_devkit/sanity/reference/ref009.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["REF009"] + + +@CHECKERS.register(RuleID("REF009")) +class REF009(Checker): + """A checker of REF009.""" + + name = RuleName("instance-to-category") + description = "'Instance.category_token' refers to 'Category' record." + + def check(self, context: SanityContext) -> list[Reason]: + instance_file = context.to_schema_file(SchemaName.INSTANCE) + category_file = context.to_schema_file(SchemaName.CATEGORY) + match (instance_file, category_file): + case Some(x), Some(y): + instance = load_json_safe(x).unwrap() + category_tokens = [item["token"] for item in load_json_safe(y).unwrap()] + return [ + Reason(f"No reference to `Instance.category_token`: {ins['category_token']}") + for ins in instance + if ins["category_token"] not in category_tokens + ] + case _: + return [Reason("Missing `Instance` or `Category` file")] diff --git a/t4_devkit/sanity/reference/ref010.py b/t4_devkit/sanity/reference/ref010.py new file mode 100644 index 0000000..2e01d29 --- /dev/null +++ b/t4_devkit/sanity/reference/ref010.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["REF010"] + + +@CHECKERS.register(RuleID("REF010")) +class REF010(Checker): + """A checker of REF010.""" + + name = RuleName("instance-to-first-sample-annotation") + description = "'Instance.first_annotation_token' refers to 'SampleAnnotation' record." + + def check(self, context: SanityContext) -> list[Reason]: + instance_file = context.to_schema_file(SchemaName.INSTANCE) + sample_ann_file = context.to_schema_file(SchemaName.SAMPLE_ANNOTATION) + match (instance_file, sample_ann_file): + case Some(x), Some(y): + instance = load_json_safe(x).unwrap() + sample_ann_tokens = [item["token"] for item in load_json_safe(y).unwrap()] + return [ + Reason( + f"No reference to `Instance.first_annotation_token`: {ins['first_annotation_token']}" + ) + for ins in instance + if ins["first_annotation_token"] not in sample_ann_tokens + ] + case _: + return [Reason("Missing `Instance` or `SampleAnnotation` file")] diff --git a/t4_devkit/sanity/reference/ref011.py b/t4_devkit/sanity/reference/ref011.py new file mode 100644 index 0000000..103c55c --- /dev/null +++ b/t4_devkit/sanity/reference/ref011.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["REF011"] + + +@CHECKERS.register(RuleID("REF011")) +class REF011(Checker): + """A checker of REF011.""" + + name = RuleName("instance-to-last-sample-annotation") + description = "'Instance.last_annotation_token' refers to 'SampleAnnotation' record." + + def check(self, context: SanityContext) -> list[Reason]: + instance_file = context.to_schema_file(SchemaName.INSTANCE) + sample_ann_file = context.to_schema_file(SchemaName.SAMPLE_ANNOTATION) + match (instance_file, sample_ann_file): + case Some(x), Some(y): + instance = load_json_safe(x).unwrap() + sample_ann_tokens = [item["token"] for item in load_json_safe(y).unwrap()] + return [ + Reason( + f"No reference to `Instance.last_annotation_token`: {ins['last_annotation_token']}" + ) + for ins in instance + if ins["last_annotation_token"] not in sample_ann_tokens + ] + case _: + return [Reason("Missing `Instance` or `SampleAnnotation` file")] diff --git a/t4_devkit/sanity/registry.py b/t4_devkit/sanity/registry.py new file mode 100644 index 0000000..00a2e54 --- /dev/null +++ b/t4_devkit/sanity/registry.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +import inspect +from collections.abc import Callable, Sequence +from enum import Enum, unique + +from .checker import Checker, RuleID + + +@unique +class RuleGroup(Enum): + STRUCTURE = "STR" + SCHEMA = "SCH" + REFERENCE = "REF" + FORMAT = "FMT" + + @classmethod + def values(cls) -> list[str]: + return [group.value for group in cls] + + +class CheckerRegistry(dict[RuleGroup, dict[RuleID, type[Checker]]]): + def register(self, id: RuleID) -> Callable: + # TODO(ktro2828); Need to validate code after splitting code into group and number + group = None + for g in RuleGroup: + if g.value in id: + group = g + + if group is None: + raise ValueError(f"'{id}' doesn't belong to any rule groups: {RuleGroup.values()}") + + def _register_decorator(module: type[Checker]) -> type[Checker]: + self._add_module(module, group, id) + return module + + return _register_decorator + + def _add_module(self, module: type[Checker], group: RuleGroup, id: RuleID) -> None: + if not inspect.isclass(module): + raise TypeError(f"module must be a class, but got {type(module)}.") + + if group not in self: + self[group] = {} + + if id in self[group]: + raise ValueError(f"'{id}' has already been registered.") + + self[group][id] = module + + def build(self, excludes: Sequence[str] | None = None) -> list[Checker]: + if excludes is None: + excludes = [] + + return [ + checker(rule) + for _, values in self.items() + for rule, checker in values.items() + if rule not in excludes + ] + + +CHECKERS = CheckerRegistry() diff --git a/t4_devkit/sanity/result.py b/t4_devkit/sanity/result.py new file mode 100644 index 0000000..a7db36a --- /dev/null +++ b/t4_devkit/sanity/result.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +from enum import Enum +from typing import TYPE_CHECKING, NewType + +from attrs import define, field +from typing_extensions import Self + +if TYPE_CHECKING: + from .checker import RuleID, RuleName + from .context import SanityContext + +__all__ = ["Status", "Report", "SanityResult"] + + +class Status(str, Enum): + SUCCESS = "SUCCESS" + FAILURE = "FAILURE" + SKIPPED = "SKIPPED" + + def is_success(self) -> bool: + return self == Status.SUCCESS + + def is_failure(self) -> bool: + return self == Status.FAILURE + + def is_skipped(self) -> bool: + return self == Status.SKIPPED + + +Reason = NewType("Reason", str) + + +@define +class Report: + id: RuleID + name: RuleName + description: str + status: Status + reasons: list[Reason] | None = field(default=None) + + def is_success(self) -> bool: + return self.status == Status.SUCCESS + + def is_failure(self) -> bool: + return self.status == Status.FAILURE + + def is_skipped(self) -> bool: + return self.status == Status.SKIPPED + + +def make_success(id: RuleID, name: RuleName, description: str) -> Report: + """Make a success report for the given rule.""" + return Report(id, name, description, Status.SUCCESS) + + +def make_skipped(id: RuleID, name: RuleName, description: str, reason: Reason) -> Report: + """Make a skipped report for the given rule.""" + return Report(id, name, description, Status.SKIPPED, [reason]) + + +def make_failure(id: RuleID, name: RuleName, description: str, reasons: list[Reason]) -> Report: + """Make a failure report for the given rule.""" + return Report(id, name, description, Status.FAILURE, reasons) + + +@define +class SanityResult: + dataset_id: str + version: str | None + reports: dict[str, Report] + + @classmethod + def from_context(cls, context: SanityContext, reports: dict[RuleID, Report]) -> Self: + return cls( + dataset_id=context.dataset_id.value_or("UNKNOWN"), + version=context.version.value_or(None), + reports=reports, + ) + + def __repr__(self) -> str: + string = f"=== DatasetID: {self.dataset_id} ===\n" + for id, report in self.reports.items(): + if report.is_failure(): + string += f"\033[31m {id}:\033[0m\n" + for reason in report.reasons or []: + string += f"\033[31m - {reason}\033[0m\n" + elif report.is_skipped(): + string += f"\033[33m {id}:\033[0m\n" + for reason in report.reasons or []: + string += f"\033[33m - {reason}\033[0m\n" + else: + string += f"\033[32m {id}: ✅\033[0m\n" + return string diff --git a/t4_devkit/sanity/run.py b/t4_devkit/sanity/run.py new file mode 100644 index 0000000..416db8a --- /dev/null +++ b/t4_devkit/sanity/run.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +import warnings +from typing import Sequence + +from .context import SanityContext +from .registry import CHECKERS +from .result import SanityResult + +__all__ = ["sanity_check"] + + +def sanity_check( + data_root: str, + revision: str | None = None, + *, + include_warning: bool = False, + excludes: Sequence[str] | None = None, +) -> SanityResult: + with warnings.catch_warnings(): + if include_warning: + warnings.simplefilter("error") + else: + warnings.simplefilter("ignore") + + context = SanityContext.from_path(data_root, revision=revision) + + checkers = CHECKERS.build(excludes=excludes) + reports = {checker.id: checker(context) for checker in checkers} + + return SanityResult.from_context(context, reports) diff --git a/t4_devkit/sanity/safety.py b/t4_devkit/sanity/safety.py new file mode 100644 index 0000000..1aa146f --- /dev/null +++ b/t4_devkit/sanity/safety.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from returns.result import Result, safe + +from t4_devkit.common.io import load_json +from t4_devkit import Tier4 + + +@safe +def load_json_safe(filename: str) -> Result[list[dict], Exception]: + """Load JSON file safely.""" + return load_json(filename) + + +@safe +def init_tier4_safe(data_root: str, revision: str | None = None) -> Result[Tier4, Exception]: + """Initialize Tier4 instance safely.""" + return Tier4(data_root, revision=revision, verbose=False) diff --git a/t4_devkit/sanity/schema/__init__.py b/t4_devkit/sanity/schema/__init__.py new file mode 100644 index 0000000..f58dd5d --- /dev/null +++ b/t4_devkit/sanity/schema/__init__.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from .sch001 import * # noqa +from .sch002 import * # noqa +from .sch003 import * # noqa +from .sch004 import * # noqa +from .sch005 import * # noqa +from .sch006 import * # noqa diff --git a/t4_devkit/sanity/schema/sch001.py b/t4_devkit/sanity/schema/sch001.py new file mode 100644 index 0000000..4b45705 --- /dev/null +++ b/t4_devkit/sanity/schema/sch001.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["SCH001"] + + +@CHECKERS.register(RuleID("SCH001")) +class SCH001(Checker): + """A checker of SCH001.""" + + name = RuleName("scene-single") + description = "'Scene' record is a single." + + def check(self, context: SanityContext) -> list[Reason]: + match context.to_schema_file(SchemaName.SCENE): + case Some(x): + result = load_json_safe(x.as_posix()) + if not is_successful(result): + return [Reason("Failed to load 'Scene' file")] + else: + num_scene = len(result.unwrap()) + return ( + [] + if num_scene == 1 + else [Reason(f"'Scene' must contain exactly one element: {num_scene}")] + ) + case _: + return [Reason("Failed to load 'Scene' file")] diff --git a/t4_devkit/sanity/schema/sch002.py b/t4_devkit/sanity/schema/sch002.py new file mode 100644 index 0000000..da362d3 --- /dev/null +++ b/t4_devkit/sanity/schema/sch002.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["SCH002"] + + +@CHECKERS.register(RuleID("SCH002")) +class SCH002(Checker): + """A checker of SCH002.""" + + name = RuleName("sample-not-empty") + description = "'Sample' record is not empty." + + def check(self, context: SanityContext): + match context.to_schema_file(SchemaName.SAMPLE): + case Some(x): + result = load_json_safe(x.as_posix()) + if not is_successful(result): + return [Reason("Failed to load 'Sample' file")] + else: + num_sample = len(result.unwrap()) + return [] if num_sample > 0 else [Reason("No 'Sample' found")] + case _: + return [Reason("Failed to load 'Sample' file")] diff --git a/t4_devkit/sanity/schema/sch003.py b/t4_devkit/sanity/schema/sch003.py new file mode 100644 index 0000000..fde6671 --- /dev/null +++ b/t4_devkit/sanity/schema/sch003.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["SCH003"] + + +@CHECKERS.register(RuleID("SCH003")) +class SCH003(Checker): + """A checker of SCH003.""" + + name = RuleName("sample-data-not-empty") + description = "'SampleData' record is not empty." + + def check(self, context: SanityContext) -> list[Reason]: + match context.to_schema_file(SchemaName.SAMPLE_DATA): + case Some(x): + result = load_json_safe(x.as_posix()) + if not is_successful(result): + return [Reason("Failed to load 'SampleData' file")] + else: + num_sample_data = len(result.unwrap()) + return [] if num_sample_data > 0 else [Reason("No 'SampleData' found")] + case _: + return [Reason("Failed to load 'SampleData' file")] diff --git a/t4_devkit/sanity/schema/sch004.py b/t4_devkit/sanity/schema/sch004.py new file mode 100644 index 0000000..c1b2090 --- /dev/null +++ b/t4_devkit/sanity/schema/sch004.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["SCH004"] + + +@CHECKERS.register(RuleID("SCH004")) +class SCH004(Checker): + """A checker of SCH004.""" + + name = RuleName("ego-pose-not-empty") + description = "'EgoPose' record is not empty." + + def check(self, context: SanityContext) -> list[Reason]: + match context.to_schema_file(SchemaName.EGO_POSE): + case Some(x): + result = load_json_safe(x.as_posix()) + if not is_successful(result): + return [Reason("Failed to load 'EgoPose' file")] + else: + num_ego_pose = len(result.unwrap()) + return [] if num_ego_pose > 0 else [Reason("No 'EgoPose' found")] + case _: + return [Reason("Failed to load 'EgoPose' file")] diff --git a/t4_devkit/sanity/schema/sch005.py b/t4_devkit/sanity/schema/sch005.py new file mode 100644 index 0000000..60a3c52 --- /dev/null +++ b/t4_devkit/sanity/schema/sch005.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["SCH005"] + + +@CHECKERS.register(RuleID("SCH005")) +class SCH005(Checker): + """A checker of SCH005.""" + + name = RuleName("calibrated-sensor-not-empty") + description = "'CalibratedSensor' record is not empty." + + def check(self, context: SanityContext) -> list[Reason]: + match context.to_schema_file(SchemaName.CALIBRATED_SENSOR): + case Some(x): + result = load_json_safe(x.as_posix()) + if not is_successful(result): + return [Reason("Failed to load 'CalibratedSensor' file")] + else: + num_calibrated_sensor = len(result.unwrap()) + return ( + [] if num_calibrated_sensor > 0 else [Reason("No 'CalibratedSensor' found")] + ) + case _: + return [Reason("Failed to load 'CalibratedSensor' file")] diff --git a/t4_devkit/sanity/schema/sch006.py b/t4_devkit/sanity/schema/sch006.py new file mode 100644 index 0000000..6ecb737 --- /dev/null +++ b/t4_devkit/sanity/schema/sch006.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some +from returns.pipeline import is_successful + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["SCH006"] + + +@CHECKERS.register(RuleID("SCH006")) +class SCH006(Checker): + """A checker of SCH006.""" + + name = RuleName("instance-not-empty") + description = "'Instance' record is not empty." + + def check(self, context: SanityContext) -> list[Reason]: + match context.to_schema_file(SchemaName.INSTANCE): + case Some(x): + result = load_json_safe(x.as_posix()) + if not is_successful(result): + return [Reason("Failed to load 'Instance' file")] + else: + num_instance = len(result.unwrap()) + return [] if num_instance > 0 else [Reason("No 'Instance' found")] + case _: + return [Reason("Failed to load 'Instance' file")] diff --git a/t4_devkit/sanity/structure/__init__.py b/t4_devkit/sanity/structure/__init__.py new file mode 100644 index 0000000..0d1224d --- /dev/null +++ b/t4_devkit/sanity/structure/__init__.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from .str001 import * # noqa +from .str002 import * # noqa +from .str003 import * # noqa +from .str004 import * # noqa +from .str005 import * # noqa +from .str006 import * # noqa +from .str007 import * # noqa +from .str008 import * # noqa +from .str009 import * # noqa diff --git a/t4_devkit/sanity/structure/str001.py b/t4_devkit/sanity/structure/str001.py new file mode 100644 index 0000000..7a0dbf3 --- /dev/null +++ b/t4_devkit/sanity/structure/str001.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason + +if TYPE_CHECKING: + from ..context import SanityContext + + +__all__ = ["STR001"] + + +@CHECKERS.register(RuleID("STR001")) +class STR001(Checker): + """A checker of STR001.""" + + name = RuleName("version-dir-presence") + description = "'version/' directory exists under the dataset root directory." + + def check(self, context: SanityContext) -> list[Reason]: + match context.version: + case Some(_): + return [] + case _: + return [Reason("'version' directory doesn't exist")] diff --git a/t4_devkit/sanity/structure/str002.py b/t4_devkit/sanity/structure/str002.py new file mode 100644 index 0000000..5b8581e --- /dev/null +++ b/t4_devkit/sanity/structure/str002.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason + +if TYPE_CHECKING: + from ..context import SanityContext + + +__all__ = ["STR002"] + + +@CHECKERS.register(RuleID("STR002")) +class STR002(Checker): + """A checker of STR002.""" + + name = RuleName("annotation-dir-presence") + description = "'annotation/' directory exists under the dataset root directory." + + def check(self, context: SanityContext) -> list[Reason]: + match context.annotation_dir: + case Some(x): + return ( + [] + if x.exists() + else [Reason(f"Path to 'annotation' not found: {x.as_posix()}")] + ) + case _: + return [Reason("dataset directory does't contain 'annotation' directory")] diff --git a/t4_devkit/sanity/structure/str003.py b/t4_devkit/sanity/structure/str003.py new file mode 100644 index 0000000..1d5e466 --- /dev/null +++ b/t4_devkit/sanity/structure/str003.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason + +if TYPE_CHECKING: + from ..context import SanityContext + + +__all__ = ["STR003"] + + +@CHECKERS.register(RuleID("STR003")) +class STR003(Checker): + """A checker of STR003.""" + + name = RuleName("data-dir-presence") + description = "'data/' directory exists under the dataset root directory." + + def check(self, context: SanityContext) -> list[Reason]: + match context.sensor_data_dir: + case Some(x): + return ( + [] + if x.exists() + else [Reason(f"Path to 'data' directory not found: {x.as_posix()}")] + ) + case _: + return [Reason("dataset directory does't contain 'data' directory")] diff --git a/t4_devkit/sanity/structure/str004.py b/t4_devkit/sanity/structure/str004.py new file mode 100644 index 0000000..b13c9f9 --- /dev/null +++ b/t4_devkit/sanity/structure/str004.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason + +if TYPE_CHECKING: + from ..context import SanityContext + + +__all__ = ["STR004"] + + +@CHECKERS.register(RuleID("STR004")) +class STR004(Checker): + """A checker of STR004.""" + + name = RuleName("map-dir-presence") + description = "'map/' directory exists under the dataset root directory." + + def check(self, context: SanityContext) -> list[Reason]: + match context.map_dir: + case Some(x): + return [] if x.exists() else [Reason(f"Path to 'map' not found: {x.as_posix()}")] + case _: + return [Reason("dataset directory doesn't contain 'map' directory")] diff --git a/t4_devkit/sanity/structure/str005.py b/t4_devkit/sanity/structure/str005.py new file mode 100644 index 0000000..6bd43e8 --- /dev/null +++ b/t4_devkit/sanity/structure/str005.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason + +if TYPE_CHECKING: + from ..context import SanityContext + + +__all__ = ["STR005"] + + +@CHECKERS.register(RuleID("STR005")) +class STR005(Checker): + """A checker of STR005.""" + + name = RuleName("bag-dir-presence") + description = "'input_bag/' directory exists under the dataset root directory." + + def check(self, context: SanityContext) -> list[Reason]: + match context.bag_dir: + case Some(x): + return ( + [] if x.exists() else [Reason(f"Path to 'input_bag' not found: {x.as_posix()}")] + ) + case _: + return [Reason("dataset directory doesn't contain 'input_bag' directory")] diff --git a/t4_devkit/sanity/structure/str006.py b/t4_devkit/sanity/structure/str006.py new file mode 100644 index 0000000..992e94a --- /dev/null +++ b/t4_devkit/sanity/structure/str006.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason + +if TYPE_CHECKING: + from ..context import SanityContext + + +__all__ = ["STR006"] + + +@CHECKERS.register(RuleID("STR006")) +class STR006(Checker): + """A checker of STR006.""" + + name = RuleName("status-json-presence") + description = "'status.json' file exists under the dataset root directory." + + def check(self, context: SanityContext) -> list[Reason]: + match context.status_json: + case Some(x): + return ( + [] + if x.exists() + else [Reason(f"Path to 'status.json' not found: {x.as_posix()}")] + ) + case _: + return [Reason("dataset directory doesn't contain 'status.json' file")] diff --git a/t4_devkit/sanity/structure/str007.py b/t4_devkit/sanity/structure/str007.py new file mode 100644 index 0000000..fac9bbf --- /dev/null +++ b/t4_devkit/sanity/structure/str007.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from t4_devkit.schema import SchemaName + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason + +if TYPE_CHECKING: + from ..context import SanityContext + + +__all__ = ["STR007"] + + +@CHECKERS.register(RuleID("STR007")) +class STR007(Checker): + """A checker of STR007.""" + + name = RuleName("schema-file-presence") + description = "Mandatory schema JSON files exist under the `annotation/` directory." + + def check(self, context: SanityContext) -> list[Reason]: + failures = [] + for schema in SchemaName: + match context.to_schema_file(schema): + case Some(x): + if not x.exists() and not schema.is_optional(): + failures.append(Reason(f"schema file '{schema.filename}' not found")) + case _: + failures.append(Reason(f"schema file '{schema.filename}' not found")) + return failures diff --git a/t4_devkit/sanity/structure/str008.py b/t4_devkit/sanity/structure/str008.py new file mode 100644 index 0000000..196fa78 --- /dev/null +++ b/t4_devkit/sanity/structure/str008.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["STR008"] + + +@CHECKERS.register(RuleID("STR008")) +class STR008(Checker): + """A checker of STR008.""" + + name = RuleName("lanelet-file-presence") + description = "'lanelet2_map.osm' file exists under the 'map/' directory." + + def check(self, context: SanityContext) -> list[Reason]: + match context.map_dir: + case Some(x): + if not x.exists(): + return [Reason(f"Path to 'map' directory not found: {x.as_posix()}")] + lanelet_file = x.joinpath("lanelet2_map.osm") + return ( + [Reason(f"Lanelet2 map file not found: {lanelet_file.as_posix()}")] + if not lanelet_file.exists() + else [] + ) + case _: + return [Reason("dataset directory doesn't contain 'map' directory")] diff --git a/t4_devkit/sanity/structure/str009.py b/t4_devkit/sanity/structure/str009.py new file mode 100644 index 0000000..df23c9c --- /dev/null +++ b/t4_devkit/sanity/structure/str009.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from ..checker import Checker, RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["STR009"] + + +@CHECKERS.register(RuleID("STR009")) +class STR009(Checker): + """A checker of STR009.""" + + name = RuleName("pointcloud-map-dir-presence") + description = "'pointcloud_map.pcd' directory exists under the 'map/' directory." + + def check(self, context: SanityContext) -> list[Reason]: + match context.map_dir: + case Some(x): + if not x.exists(): + return [Reason(f"Path to 'map' directory not found: {x.as_posix()}")] + pointcloud_map_dir = x.joinpath("pointcloud_map.pcd") + return ( + [Reason(f"PCD map directory not found: {pointcloud_map_dir.as_posix()}")] + if not pointcloud_map_dir.exists() + else [] + ) + case _: + return [Reason("dataset directory doesn't contain 'map' directory")] diff --git a/t4_devkit/schema/builder.py b/t4_devkit/schema/builder.py index bcf7dcf..c0ae7b9 100644 --- a/t4_devkit/schema/builder.py +++ b/t4_devkit/schema/builder.py @@ -22,3 +22,13 @@ def build_schema(name: str | SchemaName, filepath: str) -> list[SchemaTable]: List of schema dataclasses. """ return SCHEMAS.build_from_json(name, filepath) + + +def build_schema_safe(name: str | SchemaName, filepath: str) -> list[SchemaTable]: + """Build schema dataclass from json file path safely. + + Args: + name (str | SchemaName): Name of schema table. + filepath (str): Path to json file. + """ + return SCHEMAS.build_from_json_safe(name, filepath) From 0a2d03fa8756457151235f609b81dbe11c41a807 Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Fri, 24 Oct 2025 10:10:01 +0900 Subject: [PATCH 02/21] feat: skip if the schema is optional Signed-off-by: ktro2828 --- t4_devkit/sanity/format/fmt014.py | 2 ++ t4_devkit/sanity/format/fmt015.py | 2 ++ t4_devkit/sanity/format/fmt016.py | 2 ++ t4_devkit/sanity/format/fmt017.py | 2 ++ t4_devkit/sanity/format/fmt018.py | 2 ++ 5 files changed, 10 insertions(+) diff --git a/t4_devkit/sanity/format/fmt014.py b/t4_devkit/sanity/format/fmt014.py index f3bfe66..bdb2a3b 100644 --- a/t4_devkit/sanity/format/fmt014.py +++ b/t4_devkit/sanity/format/fmt014.py @@ -30,6 +30,8 @@ def check(self, context: SanityContext) -> list[Reason]: schema = SchemaName.LIDARSEG match context.to_schema_file(schema): case Some(x): + if not x.exists() and schema.is_optional(): + return [] records = load_json_safe(x.as_posix()) if not is_successful(records): return [Reason("Invalid `LidarSeg` file")] diff --git a/t4_devkit/sanity/format/fmt015.py b/t4_devkit/sanity/format/fmt015.py index d3980cd..dd41e0a 100644 --- a/t4_devkit/sanity/format/fmt015.py +++ b/t4_devkit/sanity/format/fmt015.py @@ -30,6 +30,8 @@ def check(self, context: SanityContext) -> list[Reason]: schema = SchemaName.OBJECT_ANN match context.to_schema_file(schema): case Some(x): + if not x.exists() and schema.is_optional(): + return [] records = load_json_safe(x.as_posix()) if not is_successful(records): return [Reason("Invalid `ObjectAnn` file")] diff --git a/t4_devkit/sanity/format/fmt016.py b/t4_devkit/sanity/format/fmt016.py index 98234b0..6c736d9 100644 --- a/t4_devkit/sanity/format/fmt016.py +++ b/t4_devkit/sanity/format/fmt016.py @@ -30,6 +30,8 @@ def check(self, context: SanityContext) -> list[Reason]: schema = SchemaName.SURFACE_ANN match context.to_schema_file(schema): case Some(x): + if not x.exists() and schema.is_optional(): + return [] records = load_json_safe(x.as_posix()) if not is_successful(records): return [Reason("Invalid `SurfaceAnn` file")] diff --git a/t4_devkit/sanity/format/fmt017.py b/t4_devkit/sanity/format/fmt017.py index d949547..8498421 100644 --- a/t4_devkit/sanity/format/fmt017.py +++ b/t4_devkit/sanity/format/fmt017.py @@ -30,6 +30,8 @@ def check(self, context: SanityContext) -> list[Reason]: schema = SchemaName.KEYPOINT match context.to_schema_file(schema): case Some(x): + if not x.exists() and schema.is_optional(): + return [] records = load_json_safe(x.as_posix()) if not is_successful(records): return [Reason("Invalid `Keypoint` file")] diff --git a/t4_devkit/sanity/format/fmt018.py b/t4_devkit/sanity/format/fmt018.py index 7667fd6..42e6081 100644 --- a/t4_devkit/sanity/format/fmt018.py +++ b/t4_devkit/sanity/format/fmt018.py @@ -30,6 +30,8 @@ def check(self, context: SanityContext) -> list[Reason]: schema = SchemaName.VEHICLE_STATE match context.to_schema_file(schema): case Some(x): + if not x.exists() and schema.is_optional(): + return [] records = load_json_safe(x.as_posix()) if not is_successful(records): return [Reason("Invalid `VehicleState` file")] From 36a3623507122507a3609518feead2cf691c6a39 Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Fri, 24 Oct 2025 10:19:02 +0900 Subject: [PATCH 03/21] refactor: update display format Signed-off-by: ktro2828 --- t4_devkit/cli/sanity.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/t4_devkit/cli/sanity.py b/t4_devkit/cli/sanity.py index 2a1b2f8..5a94e3b 100644 --- a/t4_devkit/cli/sanity.py +++ b/t4_devkit/cli/sanity.py @@ -32,7 +32,7 @@ def _run_sanity_check( ] -def _pretty_print(results: list[SanityResult], *, detail: bool = False) -> str: +def _print_table(results: list[SanityResult], *, detail: bool = False) -> str: summary_rows = [] for result in results: success = sum(1 for rp in result.reports.values() if rp.is_success()) @@ -42,7 +42,7 @@ def _pretty_print(results: list[SanityResult], *, detail: bool = False) -> str: [ result.dataset_id, result.version, - "FAILURE" if failures > 0 else "SUCCESS", + "\033[31mFAILURE\033[0m" if failures > 0 else "\033[32mSUCCESS\033[0m", len(result.reports), success, failures, @@ -53,11 +53,11 @@ def _pretty_print(results: list[SanityResult], *, detail: bool = False) -> str: if detail: print(result) - print(f"\n{'=' * 20} Summary {'=' * 20}") + print(f"\n{'=' * 40} Summary {'=' * 40}") print( tabulate( summary_rows, - headers=["DatasetID", "Version", "Overall", "Rules", "Success", "Failures", "Skips"], + headers=["DatasetID", "Version", "Status", "Rules", "Success", "Failures", "Skips"], tablefmt="pretty", ), ) @@ -87,7 +87,7 @@ def main( ) -> None: results = _run_sanity_check(db_parent, revision=revision, include_warning=include_warning) - _pretty_print(results, detail=detail) + _print_table(results, detail=detail) if output: serialized = serialize_dataclasses(results) From 64618c047d77a62a3cd43c2da6db7513db7534fe Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Fri, 24 Oct 2025 14:47:27 +0900 Subject: [PATCH 04/21] chore: fix typo and add type hinting Signed-off-by: ktro2828 --- t4_devkit/sanity/schema/sch002.py | 3 +-- t4_devkit/sanity/structure/str002.py | 2 +- t4_devkit/sanity/structure/str003.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/t4_devkit/sanity/schema/sch002.py b/t4_devkit/sanity/schema/sch002.py index da362d3..173c45c 100644 --- a/t4_devkit/sanity/schema/sch002.py +++ b/t4_devkit/sanity/schema/sch002.py @@ -5,7 +5,6 @@ from returns.maybe import Some from returns.pipeline import is_successful - from t4_devkit.schema import SchemaName from ..checker import Checker, RuleID, RuleName @@ -26,7 +25,7 @@ class SCH002(Checker): name = RuleName("sample-not-empty") description = "'Sample' record is not empty." - def check(self, context: SanityContext): + def check(self, context: SanityContext) -> list[Reason]: match context.to_schema_file(SchemaName.SAMPLE): case Some(x): result = load_json_safe(x.as_posix()) diff --git a/t4_devkit/sanity/structure/str002.py b/t4_devkit/sanity/structure/str002.py index 5b8581e..356f13a 100644 --- a/t4_devkit/sanity/structure/str002.py +++ b/t4_devkit/sanity/structure/str002.py @@ -31,4 +31,4 @@ def check(self, context: SanityContext) -> list[Reason]: else [Reason(f"Path to 'annotation' not found: {x.as_posix()}")] ) case _: - return [Reason("dataset directory does't contain 'annotation' directory")] + return [Reason("dataset directory doesn't contain 'annotation' directory")] diff --git a/t4_devkit/sanity/structure/str003.py b/t4_devkit/sanity/structure/str003.py index 1d5e466..b3a012f 100644 --- a/t4_devkit/sanity/structure/str003.py +++ b/t4_devkit/sanity/structure/str003.py @@ -31,4 +31,4 @@ def check(self, context: SanityContext) -> list[Reason]: else [Reason(f"Path to 'data' directory not found: {x.as_posix()}")] ) case _: - return [Reason("dataset directory does't contain 'data' directory")] + return [Reason("dataset directory doesn't contain 'data' directory")] From 1f432a5b2c59d1543b13a38ae45cd6f034136251 Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Fri, 24 Oct 2025 15:08:26 +0900 Subject: [PATCH 05/21] refactor: add base class for field type checker Signed-off-by: ktro2828 --- t4_devkit/sanity/format/base.py | 55 ++++++++++++++++++++++++++++++ t4_devkit/sanity/format/fmt001.py | 25 +++----------- t4_devkit/sanity/format/fmt002.py | 25 +++----------- t4_devkit/sanity/format/fmt003.py | 25 +++----------- t4_devkit/sanity/format/fmt004.py | 25 +++----------- t4_devkit/sanity/format/fmt005.py | 25 +++----------- t4_devkit/sanity/format/fmt006.py | 25 +++----------- t4_devkit/sanity/format/fmt007.py | 25 +++----------- t4_devkit/sanity/format/fmt008.py | 25 +++----------- t4_devkit/sanity/format/fmt009.py | 25 +++----------- t4_devkit/sanity/format/fmt010.py | 25 +++----------- t4_devkit/sanity/format/fmt011.py | 25 +++----------- t4_devkit/sanity/format/fmt012.py | 25 +++----------- t4_devkit/sanity/format/fmt013.py | 25 +++----------- t4_devkit/sanity/format/fmt014.py | 27 +++------------ t4_devkit/sanity/format/fmt015.py | 27 +++------------ t4_devkit/sanity/format/fmt016.py | 27 +++------------ t4_devkit/sanity/format/fmt017.py | 27 +++------------ t4_devkit/sanity/format/fmt018.py | 27 +++------------ t4_devkit/sanity/format/utility.py | 23 ------------- t4_devkit/schema/builder.py | 10 ------ 21 files changed, 145 insertions(+), 403 deletions(-) create mode 100644 t4_devkit/sanity/format/base.py delete mode 100644 t4_devkit/sanity/format/utility.py diff --git a/t4_devkit/sanity/format/base.py b/t4_devkit/sanity/format/base.py new file mode 100644 index 0000000..d041491 --- /dev/null +++ b/t4_devkit/sanity/format/base.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Maybe, Nothing, Some +from returns.pipeline import is_successful +from returns.result import Result, safe + +from t4_devkit.schema import SCHEMAS, SchemaBase, SchemaName + +from ..checker import Checker +from ..result import Reason +from ..safety import load_json_safe + +if TYPE_CHECKING: + from ..context import SanityContext + + +class FieldTypeChecker(Checker): + """Base class for format checkers.""" + + schema: SchemaName + + def can_skip(self, context: SanityContext) -> Maybe[Reason]: + match context.to_schema_file(self.schema): + case Some(x): + if not x.exists() and not self.schema.is_optional(): + return Maybe.from_value(Reason(f"No '{self.schema.filename}' found")) + return Nothing + case _: + return Nothing + + def check(self, context: SanityContext) -> list[Reason]: + filepath = context.to_schema_file(self.schema).unwrap() + + if self.schema.is_optional() and not filepath.exists(): + return [] + + records = load_json_safe(filepath) + return build_records(self.schema, records.unwrap()) + + +def build_records(schema: SchemaName, records: list[dict]) -> list[Reason]: + module = SCHEMAS.get(schema) + failures = [] + for record in records: + conversion = _safe_from_dict(module, record) + if not is_successful(conversion): + failures.append(Reason(f"[{schema.name}] {record['token']}: {conversion.failure()}")) + return failures + + +@safe +def _safe_from_dict(module: type[SchemaBase], record: dict) -> Result[SchemaBase, Exception]: + return module.from_dict(record) diff --git a/t4_devkit/sanity/format/fmt001.py b/t4_devkit/sanity/format/fmt001.py index bfce937..04acc3b 100644 --- a/t4_devkit/sanity/format/fmt001.py +++ b/t4_devkit/sanity/format/fmt001.py @@ -2,37 +2,22 @@ from typing import TYPE_CHECKING -from returns.maybe import Some -from returns.pipeline import is_successful - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe -from .utility import build_records +from .base import FieldTypeChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["FMT001"] @CHECKERS.register(RuleID("FMT001")) -class FMT001(Checker): +class FMT001(FieldTypeChecker): """A checker of FMT001.""" name = RuleName("attribute-field") description = "All types of 'Attribute' fields are valid." - - def check(self, context: SanityContext) -> list[Reason]: - schema = SchemaName.ATTRIBUTE - match context.to_schema_file(schema): - case Some(x): - records = load_json_safe(x.as_posix()) - if not is_successful(records): - return [Reason("Invalid `Attribute` file")] - return build_records(schema, records.unwrap()) - case _: - return [Reason("No `Attribute` file found")] + schema = SchemaName.ATTRIBUTE diff --git a/t4_devkit/sanity/format/fmt002.py b/t4_devkit/sanity/format/fmt002.py index da08a57..525ebbf 100644 --- a/t4_devkit/sanity/format/fmt002.py +++ b/t4_devkit/sanity/format/fmt002.py @@ -2,37 +2,22 @@ from typing import TYPE_CHECKING -from returns.maybe import Some -from returns.pipeline import is_successful - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe -from .utility import build_records +from .base import FieldTypeChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["FMT002"] @CHECKERS.register(RuleID("FMT002")) -class FMT002(Checker): +class FMT002(FieldTypeChecker): """A checker of FMT002.""" name = RuleName("calibrated-sensor-field") description = "All types of 'CalibratedSensor' fields are valid." - - def check(self, context: SanityContext) -> list[Reason]: - schema = SchemaName.CALIBRATED_SENSOR - match context.to_schema_file(schema): - case Some(x): - records = load_json_safe(x.as_posix()) - if not is_successful(records): - return [Reason("Invalid `CalibratedSensor` file")] - return build_records(schema, records.unwrap()) - case _: - return [Reason("No `CalibratedSensor` file found")] + schema = SchemaName.CALIBRATED_SENSOR diff --git a/t4_devkit/sanity/format/fmt003.py b/t4_devkit/sanity/format/fmt003.py index dfb54b2..c524e87 100644 --- a/t4_devkit/sanity/format/fmt003.py +++ b/t4_devkit/sanity/format/fmt003.py @@ -2,37 +2,22 @@ from typing import TYPE_CHECKING -from returns.maybe import Some -from returns.pipeline import is_successful - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe -from .utility import build_records +from .base import FieldTypeChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["FMT003"] @CHECKERS.register(RuleID("FMT003")) -class FMT003(Checker): +class FMT003(FieldTypeChecker): """A checker of FMT003.""" name = RuleName("category-field") description = "All types of 'Category' fields are valid." - - def check(self, context: SanityContext) -> list[Reason]: - schema = SchemaName.CATEGORY - match context.to_schema_file(schema): - case Some(x): - records = load_json_safe(x.as_posix()) - if not is_successful(records): - return [Reason("Invalid `Category` file")] - return build_records(schema, records.unwrap()) - case _: - return [Reason("No `Category` file found")] + schema = SchemaName.CATEGORY diff --git a/t4_devkit/sanity/format/fmt004.py b/t4_devkit/sanity/format/fmt004.py index 129e481..26b0a60 100644 --- a/t4_devkit/sanity/format/fmt004.py +++ b/t4_devkit/sanity/format/fmt004.py @@ -2,37 +2,22 @@ from typing import TYPE_CHECKING -from returns.maybe import Some -from returns.pipeline import is_successful - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe -from .utility import build_records +from .base import FieldTypeChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["FMT004"] @CHECKERS.register(RuleID("FMT004")) -class FMT004(Checker): +class FMT004(FieldTypeChecker): """A checker of FMT004.""" name = RuleName("ego-pose-field") description = "All types of 'EgoPose' fields are valid." - - def check(self, context: SanityContext) -> list[Reason]: - schema = SchemaName.EGO_POSE - match context.to_schema_file(schema): - case Some(x): - records = load_json_safe(x.as_posix()) - if not is_successful(records): - return [Reason("Invalid `EgoPose` file")] - return build_records(schema, records.unwrap()) - case _: - return [Reason("No `EgoPose` file found")] + schema = SchemaName.EGO_POSE diff --git a/t4_devkit/sanity/format/fmt005.py b/t4_devkit/sanity/format/fmt005.py index c510b5f..a89c29c 100644 --- a/t4_devkit/sanity/format/fmt005.py +++ b/t4_devkit/sanity/format/fmt005.py @@ -2,37 +2,22 @@ from typing import TYPE_CHECKING -from returns.maybe import Some -from returns.pipeline import is_successful - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe -from .utility import build_records +from .base import FieldTypeChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["FMT005"] @CHECKERS.register(RuleID("FMT005")) -class FMT005(Checker): +class FMT005(FieldTypeChecker): """A checker of FMT005.""" name = RuleName("instance-field") description = "All types of 'Instance' fields are valid." - - def check(self, context: SanityContext) -> list[Reason]: - schema = SchemaName.INSTANCE - match context.to_schema_file(schema): - case Some(x): - records = load_json_safe(x.as_posix()) - if not is_successful(records): - return [Reason("Invalid `Instance` file")] - return build_records(schema, records.unwrap()) - case _: - return [Reason("No `Instance` file found")] + schema = SchemaName.INSTANCE diff --git a/t4_devkit/sanity/format/fmt006.py b/t4_devkit/sanity/format/fmt006.py index c77381b..8580af5 100644 --- a/t4_devkit/sanity/format/fmt006.py +++ b/t4_devkit/sanity/format/fmt006.py @@ -2,37 +2,22 @@ from typing import TYPE_CHECKING -from returns.maybe import Some -from returns.pipeline import is_successful - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe -from .utility import build_records +from .base import FieldTypeChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["FMT006"] @CHECKERS.register(RuleID("FMT006")) -class FMT006(Checker): +class FMT006(FieldTypeChecker): """A checker of FMT006.""" name = RuleName("log-field") description = "All types of 'Log' fields are valid." - - def check(self, context: SanityContext) -> list[Reason]: - schema = SchemaName.LOG - match context.to_schema_file(schema): - case Some(x): - records = load_json_safe(x.as_posix()) - if not is_successful(records): - return [Reason("Invalid `Log` file")] - return build_records(schema, records.unwrap()) - case _: - return [Reason("No `Log` file found")] + schema = SchemaName.LOG diff --git a/t4_devkit/sanity/format/fmt007.py b/t4_devkit/sanity/format/fmt007.py index d8eadb0..e4f1c08 100644 --- a/t4_devkit/sanity/format/fmt007.py +++ b/t4_devkit/sanity/format/fmt007.py @@ -2,37 +2,22 @@ from typing import TYPE_CHECKING -from returns.maybe import Some -from returns.pipeline import is_successful - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe -from .utility import build_records +from .base import FieldTypeChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["FMT007"] @CHECKERS.register(RuleID("FMT007")) -class FMT007(Checker): +class FMT007(FieldTypeChecker): """A checker of FMT007.""" name = RuleName("map-field") description = "All types of 'Map' fields are valid." - - def check(self, context: SanityContext) -> list[Reason]: - schema = SchemaName.MAP - match context.to_schema_file(schema): - case Some(x): - records = load_json_safe(x.as_posix()) - if not is_successful(records): - return [Reason("Invalid `Map` file")] - return build_records(schema, records.unwrap()) - case _: - return [Reason("No `Map` file found")] + schema = SchemaName.MAP diff --git a/t4_devkit/sanity/format/fmt008.py b/t4_devkit/sanity/format/fmt008.py index e6dd797..f020e6f 100644 --- a/t4_devkit/sanity/format/fmt008.py +++ b/t4_devkit/sanity/format/fmt008.py @@ -2,37 +2,22 @@ from typing import TYPE_CHECKING -from returns.maybe import Some -from returns.pipeline import is_successful - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe -from .utility import build_records +from .base import FieldTypeChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["FMT008"] @CHECKERS.register(RuleID("FMT008")) -class FMT008(Checker): +class FMT008(FieldTypeChecker): """A checker of FMT008.""" name = RuleName("sample-field") description = "All types of 'Sample' fields are valid." - - def check(self, context: SanityContext) -> list[Reason]: - schema = SchemaName.SAMPLE - match context.to_schema_file(schema): - case Some(x): - records = load_json_safe(x.as_posix()) - if not is_successful(records): - return [Reason("Invalid `Sample` file")] - return build_records(schema, records.unwrap()) - case _: - return [Reason("No `Sample` file found")] + schema = SchemaName.SAMPLE diff --git a/t4_devkit/sanity/format/fmt009.py b/t4_devkit/sanity/format/fmt009.py index abfcc5e..e3fb87f 100644 --- a/t4_devkit/sanity/format/fmt009.py +++ b/t4_devkit/sanity/format/fmt009.py @@ -2,37 +2,22 @@ from typing import TYPE_CHECKING -from returns.maybe import Some -from returns.pipeline import is_successful - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe -from .utility import build_records +from .base import FieldTypeChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["FMT009"] @CHECKERS.register(RuleID("FMT009")) -class FMT009(Checker): +class FMT009(FieldTypeChecker): """A checker of FMT009.""" name = RuleName("sample-annotation-field") description = "All types of 'SampleAnnotation' fields are valid." - - def check(self, context: SanityContext) -> list[Reason]: - schema = SchemaName.SAMPLE_ANNOTATION - match context.to_schema_file(schema): - case Some(x): - records = load_json_safe(x.as_posix()) - if not is_successful(records): - return [Reason("Invalid `SampleAnnotation` file")] - return build_records(schema, records.unwrap()) - case _: - return [Reason("No `SampleAnnotation` file found")] + schema = SchemaName.SAMPLE_ANNOTATION diff --git a/t4_devkit/sanity/format/fmt010.py b/t4_devkit/sanity/format/fmt010.py index cc74e24..d5145f8 100644 --- a/t4_devkit/sanity/format/fmt010.py +++ b/t4_devkit/sanity/format/fmt010.py @@ -2,37 +2,22 @@ from typing import TYPE_CHECKING -from returns.maybe import Some -from returns.pipeline import is_successful - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe -from .utility import build_records +from .base import FieldTypeChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["FMT010"] @CHECKERS.register(RuleID("FMT010")) -class FMT010(Checker): +class FMT010(FieldTypeChecker): """A checker of FMT010.""" name = RuleName("sample-data-field") description = "All types of 'SampleData' fields are valid." - - def check(self, context: SanityContext) -> list[Reason]: - schema = SchemaName.SAMPLE_DATA - match context.to_schema_file(schema): - case Some(x): - records = load_json_safe(x.as_posix()) - if not is_successful(records): - return [Reason("Invalid `SampleData` file")] - return build_records(schema, records.unwrap()) - case _: - return [Reason("No `SampleData` file found")] + schema = SchemaName.SAMPLE_DATA diff --git a/t4_devkit/sanity/format/fmt011.py b/t4_devkit/sanity/format/fmt011.py index 35a571b..077732e 100644 --- a/t4_devkit/sanity/format/fmt011.py +++ b/t4_devkit/sanity/format/fmt011.py @@ -2,37 +2,22 @@ from typing import TYPE_CHECKING -from returns.maybe import Some -from returns.pipeline import is_successful - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe -from .utility import build_records +from .base import FieldTypeChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["FMT011"] @CHECKERS.register(RuleID("FMT011")) -class FMT011(Checker): +class FMT011(FieldTypeChecker): """A checker of FMT011.""" name = RuleName("scene-field") description = "All types of 'Scene' fields are valid." - - def check(self, context: SanityContext) -> list[Reason]: - schema = SchemaName.SCENE - match context.to_schema_file(schema): - case Some(x): - records = load_json_safe(x.as_posix()) - if not is_successful(records): - return [Reason("Invalid `Scene` file")] - return build_records(schema, records.unwrap()) - case _: - return [Reason("No `Scene` file found")] + schema = SchemaName.SCENE diff --git a/t4_devkit/sanity/format/fmt012.py b/t4_devkit/sanity/format/fmt012.py index 1ce130e..1083e3c 100644 --- a/t4_devkit/sanity/format/fmt012.py +++ b/t4_devkit/sanity/format/fmt012.py @@ -2,37 +2,22 @@ from typing import TYPE_CHECKING -from returns.maybe import Some -from returns.pipeline import is_successful - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe -from .utility import build_records +from .base import FieldTypeChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["FMT012"] @CHECKERS.register(RuleID("FMT012")) -class FMT012(Checker): +class FMT012(FieldTypeChecker): """A checker of FMT012.""" name = RuleName("sensor-field") description = "All types of 'Sensor' fields are valid." - - def check(self, context: SanityContext) -> list[Reason]: - schema = SchemaName.SENSOR - match context.to_schema_file(schema): - case Some(x): - records = load_json_safe(x.as_posix()) - if not is_successful(records): - return [Reason("Invalid `Sensor` file")] - return build_records(schema, records.unwrap()) - case _: - return [Reason("No `Sensor` file found")] + schema = SchemaName.SENSOR diff --git a/t4_devkit/sanity/format/fmt013.py b/t4_devkit/sanity/format/fmt013.py index 3c693ce..0c808c2 100644 --- a/t4_devkit/sanity/format/fmt013.py +++ b/t4_devkit/sanity/format/fmt013.py @@ -2,37 +2,22 @@ from typing import TYPE_CHECKING -from returns.maybe import Some -from returns.pipeline import is_successful - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe -from .utility import build_records +from .base import FieldTypeChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["FMT013"] @CHECKERS.register(RuleID("FMT013")) -class FMT013(Checker): +class FMT013(FieldTypeChecker): """A checker of FMT013.""" name = RuleName("visibility-field") description = "All types of 'Visibility' fields are valid." - - def check(self, context: SanityContext) -> list[Reason]: - schema = SchemaName.VISIBILITY - match context.to_schema_file(schema): - case Some(x): - records = load_json_safe(x.as_posix()) - if not is_successful(records): - return [Reason("Invalid `Visibility` file")] - return build_records(schema, records.unwrap()) - case _: - return [Reason("No `Visibility` file found")] + schema = SchemaName.VISIBILITY diff --git a/t4_devkit/sanity/format/fmt014.py b/t4_devkit/sanity/format/fmt014.py index bdb2a3b..a4d5986 100644 --- a/t4_devkit/sanity/format/fmt014.py +++ b/t4_devkit/sanity/format/fmt014.py @@ -2,39 +2,22 @@ from typing import TYPE_CHECKING -from returns.maybe import Some -from returns.pipeline import is_successful - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe -from .utility import build_records +from .base import FieldTypeChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["FMT014"] @CHECKERS.register(RuleID("FMT014")) -class FMT014(Checker): +class FMT014(FieldTypeChecker): """A checker of FMT014.""" name = RuleName("lidarseg-field") description = "All types of 'LidarSeg' fields are valid." - - def check(self, context: SanityContext) -> list[Reason]: - schema = SchemaName.LIDARSEG - match context.to_schema_file(schema): - case Some(x): - if not x.exists() and schema.is_optional(): - return [] - records = load_json_safe(x.as_posix()) - if not is_successful(records): - return [Reason("Invalid `LidarSeg` file")] - return build_records(schema, records.unwrap()) - case _: - return [Reason("No `LidarSeg` file found")] + schema = SchemaName.LIDARSEG diff --git a/t4_devkit/sanity/format/fmt015.py b/t4_devkit/sanity/format/fmt015.py index dd41e0a..d90e80e 100644 --- a/t4_devkit/sanity/format/fmt015.py +++ b/t4_devkit/sanity/format/fmt015.py @@ -2,39 +2,22 @@ from typing import TYPE_CHECKING -from returns.maybe import Some -from returns.pipeline import is_successful - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe -from .utility import build_records +from .base import FieldTypeChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["FMT015"] @CHECKERS.register(RuleID("FMT015")) -class FMT015(Checker): +class FMT015(FieldTypeChecker): """A checker of FMT015.""" name = RuleName("object-ann-field") description = "All types of 'ObjectAnn' fields are valid." - - def check(self, context: SanityContext) -> list[Reason]: - schema = SchemaName.OBJECT_ANN - match context.to_schema_file(schema): - case Some(x): - if not x.exists() and schema.is_optional(): - return [] - records = load_json_safe(x.as_posix()) - if not is_successful(records): - return [Reason("Invalid `ObjectAnn` file")] - return build_records(schema, records.unwrap()) - case _: - return [Reason("No `ObjectAnn` file found")] + schema = SchemaName.OBJECT_ANN diff --git a/t4_devkit/sanity/format/fmt016.py b/t4_devkit/sanity/format/fmt016.py index 6c736d9..2014e6a 100644 --- a/t4_devkit/sanity/format/fmt016.py +++ b/t4_devkit/sanity/format/fmt016.py @@ -2,39 +2,22 @@ from typing import TYPE_CHECKING -from returns.maybe import Some -from returns.pipeline import is_successful - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe -from .utility import build_records +from .base import FieldTypeChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["FMT016"] @CHECKERS.register(RuleID("FMT016")) -class FMT016(Checker): +class FMT016(FieldTypeChecker): """A checker of FMT016.""" name = RuleName("surface-ann-field") description = "All types of 'SurfaceAnn' fields are valid." - - def check(self, context: SanityContext) -> list[Reason]: - schema = SchemaName.SURFACE_ANN - match context.to_schema_file(schema): - case Some(x): - if not x.exists() and schema.is_optional(): - return [] - records = load_json_safe(x.as_posix()) - if not is_successful(records): - return [Reason("Invalid `SurfaceAnn` file")] - return build_records(schema, records.unwrap()) - case _: - return [Reason("No `SurfaceAnn` file found")] + schema = SchemaName.SURFACE_ANN diff --git a/t4_devkit/sanity/format/fmt017.py b/t4_devkit/sanity/format/fmt017.py index 8498421..e9dfac6 100644 --- a/t4_devkit/sanity/format/fmt017.py +++ b/t4_devkit/sanity/format/fmt017.py @@ -2,39 +2,22 @@ from typing import TYPE_CHECKING -from returns.maybe import Some -from returns.pipeline import is_successful - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe -from .utility import build_records +from .base import FieldTypeChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["FMT017"] @CHECKERS.register(RuleID("FMT017")) -class FMT017(Checker): +class FMT017(FieldTypeChecker): """A checker of FMT017.""" name = RuleName("keypoint-field") description = "All types of 'Keypoint' fields are valid." - - def check(self, context: SanityContext) -> list[Reason]: - schema = SchemaName.KEYPOINT - match context.to_schema_file(schema): - case Some(x): - if not x.exists() and schema.is_optional(): - return [] - records = load_json_safe(x.as_posix()) - if not is_successful(records): - return [Reason("Invalid `Keypoint` file")] - return build_records(schema, records.unwrap()) - case _: - return [Reason("No `Keypoint` file found")] + schema = SchemaName.KEYPOINT diff --git a/t4_devkit/sanity/format/fmt018.py b/t4_devkit/sanity/format/fmt018.py index 42e6081..0732ba1 100644 --- a/t4_devkit/sanity/format/fmt018.py +++ b/t4_devkit/sanity/format/fmt018.py @@ -2,39 +2,22 @@ from typing import TYPE_CHECKING -from returns.maybe import Some -from returns.pipeline import is_successful - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe -from .utility import build_records +from .base import FieldTypeChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["FMT018"] @CHECKERS.register(RuleID("FMT018")) -class FMT018(Checker): +class FMT018(FieldTypeChecker): """A checker of FMT018.""" name = RuleName("vehicle-state-field") description = "All types of 'VehicleState' fields are valid." - - def check(self, context: SanityContext) -> list[Reason]: - schema = SchemaName.VEHICLE_STATE - match context.to_schema_file(schema): - case Some(x): - if not x.exists() and schema.is_optional(): - return [] - records = load_json_safe(x.as_posix()) - if not is_successful(records): - return [Reason("Invalid `VehicleState` file")] - return build_records(schema, records.unwrap()) - case _: - return [Reason("No `VehicleState` file found")] + schema = SchemaName.VEHICLE_STATE diff --git a/t4_devkit/sanity/format/utility.py b/t4_devkit/sanity/format/utility.py deleted file mode 100644 index aecdb39..0000000 --- a/t4_devkit/sanity/format/utility.py +++ /dev/null @@ -1,23 +0,0 @@ -from __future__ import annotations - -from returns.pipeline import is_successful -from returns.result import safe - -from t4_devkit.schema import SCHEMAS, SchemaName, SchemaBase - -from ..result import Reason - - -def build_records(schema: SchemaName, records: list[dict]) -> list[Reason]: - module = SCHEMAS.get(schema) - failures = [] - for record in records: - conversion = _safe_from_dict(module, record) - if not is_successful(conversion): - failures.append(Reason(f"[{schema.name}] {record['token']}: {conversion.failure()}")) - return failures - - -@safe -def _safe_from_dict(module: SchemaBase, record: dict): - return module.from_dict(record) diff --git a/t4_devkit/schema/builder.py b/t4_devkit/schema/builder.py index c0ae7b9..bcf7dcf 100644 --- a/t4_devkit/schema/builder.py +++ b/t4_devkit/schema/builder.py @@ -22,13 +22,3 @@ def build_schema(name: str | SchemaName, filepath: str) -> list[SchemaTable]: List of schema dataclasses. """ return SCHEMAS.build_from_json(name, filepath) - - -def build_schema_safe(name: str | SchemaName, filepath: str) -> list[SchemaTable]: - """Build schema dataclass from json file path safely. - - Args: - name (str | SchemaName): Name of schema table. - filepath (str): Path to json file. - """ - return SCHEMAS.build_from_json_safe(name, filepath) From 96d905e57cff96344d7abc55cdbbe7ea6a6eff6f Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Fri, 24 Oct 2025 15:24:11 +0900 Subject: [PATCH 06/21] docs: add navigation for schema requirement Signed-off-by: ktro2828 --- mkdocs.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/mkdocs.yaml b/mkdocs.yaml index eaccda8..b8a559c 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -10,6 +10,7 @@ nav: - Home: schema/index.md - Schema Tables: schema/table.md - Sensor Data: schema/data.md + - Requirements: schema/requirement.md - Tutorials: - Initialization: tutorials/initialize.md - Visualization: tutorials/render.md From fb63e76ed55a192e92e9ff1fdc2fb774fc66bb11 Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Fri, 24 Oct 2025 16:14:50 +0900 Subject: [PATCH 07/21] refactor: add base class for reference checker Signed-off-by: ktro2828 --- t4_devkit/sanity/reference/base.py | 59 ++++++++++++++++++++++++++++ t4_devkit/sanity/reference/ref001.py | 29 ++++---------- t4_devkit/sanity/reference/ref002.py | 29 ++++---------- t4_devkit/sanity/reference/ref003.py | 29 ++++---------- t4_devkit/sanity/reference/ref004.py | 29 ++++---------- t4_devkit/sanity/reference/ref005.py | 30 +++++--------- t4_devkit/sanity/reference/ref006.py | 29 ++++---------- t4_devkit/sanity/reference/ref007.py | 31 ++++----------- t4_devkit/sanity/reference/ref008.py | 29 ++++---------- t4_devkit/sanity/reference/ref009.py | 29 ++++---------- t4_devkit/sanity/reference/ref010.py | 31 ++++----------- t4_devkit/sanity/reference/ref011.py | 31 ++++----------- 12 files changed, 139 insertions(+), 246 deletions(-) create mode 100644 t4_devkit/sanity/reference/base.py diff --git a/t4_devkit/sanity/reference/base.py b/t4_devkit/sanity/reference/base.py new file mode 100644 index 0000000..008f140 --- /dev/null +++ b/t4_devkit/sanity/reference/base.py @@ -0,0 +1,59 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +from returns.maybe import Maybe, Nothing, Some + +from ..checker import Checker +from ..result import Reason +from ..safety import load_json_safe + +if TYPE_CHECKING: + from t4_devkit.schema import SchemaName + + from ..context import SanityContext + + +class ReferenceChecker(Checker): + """Base class for reference checkers. + + Attributes: + source (SchemaName): The source schema name. + target (SchemaName): The target schema name. + reference (str): The reference key in the source record. + """ + + source: SchemaName + target: SchemaName + reference: str + + def can_skip(self, context: SanityContext) -> Maybe[Reason]: + source_file = context.to_schema_file(self.source) + target_file = context.to_schema_file(self.target) + match (source_file, target_file): + case Some(x), Some(y): + if not x.exists(): + return Maybe.from_value(Reason(f"Missing {self.source.filename}")) + elif not y.exists(): + return Maybe.from_value(Reason(f"Missing {self.target.filename}")) + else: + return Nothing + case _: + return Maybe.from_value(Reason("Missing 'annotation' directory path")) + + def check(self, context: SanityContext) -> list[Reason]: + source_file = context.to_schema_file(self.source).unwrap() + target_file = context.to_schema_file(self.target).unwrap() + source_records = load_json_safe(source_file).unwrap() + target_tokens = [item["token"] for item in load_json_safe(target_file).unwrap()] + return [ + Reason( + f"No reference to '{self.source.value}.{self.reference}': {record[self.reference]}" + ) + for record in source_records + if record[self.reference] not in target_tokens + and self.is_additional_condition_ok(record) + ] + + def is_additional_condition_ok(self, record: dict[str, Any]) -> bool: + return True diff --git a/t4_devkit/sanity/reference/ref001.py b/t4_devkit/sanity/reference/ref001.py index 6d76ce2..d8a664e 100644 --- a/t4_devkit/sanity/reference/ref001.py +++ b/t4_devkit/sanity/reference/ref001.py @@ -2,39 +2,24 @@ from typing import TYPE_CHECKING -from returns.maybe import Some - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe +from .base import ReferenceChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["REF001"] @CHECKERS.register(RuleID("REF001")) -class REF001(Checker): +class REF001(ReferenceChecker): """A checker of REF001.""" name = RuleName("scene-to-log") description = "'Scene.log_token' refers to 'Log' record." - - def check(self, context: SanityContext) -> list[Reason]: - scene_file = context.to_schema_file(SchemaName.SCENE) - log_file = context.to_schema_file(SchemaName.LOG) - match (scene_file, log_file): - case Some(x), Some(y): - scene = load_json_safe(x).unwrap() - log_tokens = [item["token"] for item in load_json_safe(y).unwrap()] - return [ - Reason(f"No reference to `Scene.log_token`: {s['log_token']}") - for s in scene - if s["log_token"] not in log_tokens - ] - case _: - return [Reason("Missing `Scene` or `Log` file")] + source = SchemaName.SCENE + target = SchemaName.LOG + reference = "log_token" diff --git a/t4_devkit/sanity/reference/ref002.py b/t4_devkit/sanity/reference/ref002.py index e11f089..f97f6e3 100644 --- a/t4_devkit/sanity/reference/ref002.py +++ b/t4_devkit/sanity/reference/ref002.py @@ -2,39 +2,24 @@ from typing import TYPE_CHECKING -from returns.maybe import Some - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe +from .base import ReferenceChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["REF002"] @CHECKERS.register(RuleID("REF002")) -class REF002(Checker): +class REF002(ReferenceChecker): """A checker of REF002.""" name = RuleName("scene-to-first-sample") description = "'Scene.first_sample_token' refers to 'Sample' record." - - def check(self, context: SanityContext) -> list[Reason]: - scene_file = context.to_schema_file(SchemaName.SCENE) - sample_file = context.to_schema_file(SchemaName.SAMPLE) - match (scene_file, sample_file): - case Some(x), Some(y): - scene = load_json_safe(x).unwrap() - sample_tokens = [item["token"] for item in load_json_safe(y).unwrap()] - return [ - Reason(f"No reference to `Scene.first_sample_token`: {s['first_sample_token']}") - for s in scene - if s["first_sample_token"] not in sample_tokens - ] - case _: - return [Reason("Missing `Scene` or `Sample` file")] + source = SchemaName.SCENE + target = SchemaName.SAMPLE + reference = "first_sample_token" diff --git a/t4_devkit/sanity/reference/ref003.py b/t4_devkit/sanity/reference/ref003.py index 4a18b65..6d17399 100644 --- a/t4_devkit/sanity/reference/ref003.py +++ b/t4_devkit/sanity/reference/ref003.py @@ -2,39 +2,24 @@ from typing import TYPE_CHECKING -from returns.maybe import Some - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe +from .base import ReferenceChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["REF003"] @CHECKERS.register(RuleID("REF003")) -class REF003(Checker): +class REF003(ReferenceChecker): """A checker of REF003.""" name = RuleName("scene-to-last-sample") description = "'Scene.last_sample_token' refers to 'Sample' record." - - def check(self, context: SanityContext) -> list[Reason]: - scene_file = context.to_schema_file(SchemaName.SCENE) - sample_file = context.to_schema_file(SchemaName.SAMPLE) - match (scene_file, sample_file): - case Some(x), Some(y): - scene = load_json_safe(x).unwrap() - sample_tokens = [item["token"] for item in load_json_safe(y).unwrap()] - return [ - Reason(f"No reference to `Scene.last_sample_token`: {s['last_sample_token']}") - for s in scene - if s["last_sample_token"] not in sample_tokens - ] - case _: - return [Reason("Missing `Scene` or `Sample` file")] + source = SchemaName.SCENE + target = SchemaName.SAMPLE + reference = "last_sample_token" diff --git a/t4_devkit/sanity/reference/ref004.py b/t4_devkit/sanity/reference/ref004.py index 90b6e50..48b40e3 100644 --- a/t4_devkit/sanity/reference/ref004.py +++ b/t4_devkit/sanity/reference/ref004.py @@ -2,39 +2,24 @@ from typing import TYPE_CHECKING -from returns.maybe import Some - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe +from .base import ReferenceChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["REF004"] @CHECKERS.register(RuleID("REF004")) -class REF004(Checker): +class REF004(ReferenceChecker): """A checker of REF004.""" name = RuleName("sample-to-scene") description = "'Sample.scene_token' refers to 'Scene' record." - - def check(self, context: SanityContext) -> list[Reason]: - sample_file = context.to_schema_file(SchemaName.SAMPLE) - scene_file = context.to_schema_file(SchemaName.SCENE) - match (sample_file, scene_file): - case Some(x), Some(y): - sample = load_json_safe(x).unwrap() - scene_tokens = [item["token"] for item in load_json_safe(y).unwrap()] - return [ - Reason(f"No reference to `Sample.scene_token`: {s['scene_token']}") - for s in sample - if s["scene_token"] not in scene_tokens - ] - case _: - return [Reason("Missing `Scene` or `Sample` file")] + source = SchemaName.SAMPLE + target = SchemaName.SCENE + reference = "scene_token" diff --git a/t4_devkit/sanity/reference/ref005.py b/t4_devkit/sanity/reference/ref005.py index c409f8d..7fd6e9d 100644 --- a/t4_devkit/sanity/reference/ref005.py +++ b/t4_devkit/sanity/reference/ref005.py @@ -2,39 +2,29 @@ from typing import TYPE_CHECKING -from returns.maybe import Some +from typing_extensions import Any from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe +from .base import ReferenceChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["REF005"] @CHECKERS.register(RuleID("REF005")) -class REF005(Checker): +class REF005(ReferenceChecker): """A checker of REF005.""" name = RuleName("sample-data-to-sample") description = "'SampleData.sample_token' refers to 'Sample' record." + source = SchemaName.SAMPLE_DATA + target = SchemaName.SAMPLE + reference = "sample_token" - def check(self, context: SanityContext) -> list[Reason]: - sample_data_file = context.to_schema_file(SchemaName.SAMPLE_DATA) - sample_file = context.to_schema_file(SchemaName.SAMPLE) - match (sample_data_file, sample_file): - case Some(x), Some(y): - sample_data = load_json_safe(x).unwrap() - sample_tokens = [item["token"] for item in load_json_safe(y).unwrap()] - return [ - Reason(f"No reference to 'SampleData.sample_token': {s['sample_token']}") - for s in sample_data - if s["sample_token"] not in sample_tokens and s["is_valid"] - ] - case _: - return [Reason("Missing 'SampleData' or 'Sample' file")] + def is_additional_condition_ok(self, record: dict[str, Any]) -> bool: + return record["is_valid"] diff --git a/t4_devkit/sanity/reference/ref006.py b/t4_devkit/sanity/reference/ref006.py index a23ee68..daea372 100644 --- a/t4_devkit/sanity/reference/ref006.py +++ b/t4_devkit/sanity/reference/ref006.py @@ -2,39 +2,24 @@ from typing import TYPE_CHECKING -from returns.maybe import Some - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe +from .base import ReferenceChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["REF006"] @CHECKERS.register(RuleID("REF006")) -class REF006(Checker): +class REF006(ReferenceChecker): """A checker of REF006.""" name = RuleName("sample-data-to-ego-pose") description = "'SampleData.ego_pose_token' refers to 'EgoPose' record." - - def check(self, context: SanityContext) -> list[Reason]: - sample_data_file = context.to_schema_file(SchemaName.SAMPLE_DATA) - ego_pose_file = context.to_schema_file(SchemaName.EGO_POSE) - match (sample_data_file, ego_pose_file): - case Some(x), Some(y): - sample_data = load_json_safe(x).unwrap() - ego_pose_tokens = [item["token"] for item in load_json_safe(y).unwrap()] - return [ - Reason(f"No reference to `SampleData.ego_pose_token`: {s['ego_pose_token']}") - for s in sample_data - if s["ego_pose_token"] not in ego_pose_tokens - ] - case _: - return [Reason("Missing `SampleData` or `EgoPose` file")] + source = SchemaName.SAMPLE_DATA + target = SchemaName.EGO_POSE + reference = "ego_pose_token" diff --git a/t4_devkit/sanity/reference/ref007.py b/t4_devkit/sanity/reference/ref007.py index 9ccfea8..2e097ae 100644 --- a/t4_devkit/sanity/reference/ref007.py +++ b/t4_devkit/sanity/reference/ref007.py @@ -2,41 +2,24 @@ from typing import TYPE_CHECKING -from returns.maybe import Some - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe +from .base import ReferenceChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["REF007"] @CHECKERS.register(RuleID("REF007")) -class REF007(Checker): +class REF007(ReferenceChecker): """A checker of REF007.""" name = RuleName("sample-data-to-calibrated-sensor") description = "'SampleData.calibrated_sensor_token' refers to 'CalibratedSensor' record." - - def check(self, context: SanityContext) -> list[Reason]: - sample_data_file = context.to_schema_file(SchemaName.SAMPLE_DATA) - calibrated_sensor_file = context.to_schema_file(SchemaName.CALIBRATED_SENSOR) - match (sample_data_file, calibrated_sensor_file): - case Some(x), Some(y): - sample_data = load_json_safe(x).unwrap() - calibrated_sensor_tokens = [item["token"] for item in load_json_safe(y).unwrap()] - return [ - Reason( - f"No reference to `SampleData.calibrated_sensor_token`: {s['calibrated_sensor_token']}" - ) - for s in sample_data - if s["calibrated_sensor_token"] not in calibrated_sensor_tokens - ] - case _: - return [Reason("Missing `SampleData` or `CalibratedSensor` file")] + source = SchemaName.SAMPLE_DATA + target = SchemaName.CALIBRATED_SENSOR + reference = "calibrated_sensor_token" diff --git a/t4_devkit/sanity/reference/ref008.py b/t4_devkit/sanity/reference/ref008.py index 4739c2b..f15ac99 100644 --- a/t4_devkit/sanity/reference/ref008.py +++ b/t4_devkit/sanity/reference/ref008.py @@ -2,39 +2,24 @@ from typing import TYPE_CHECKING -from returns.maybe import Some - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe +from .base import ReferenceChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["REF008"] @CHECKERS.register(RuleID("REF008")) -class REF008(Checker): +class REF008(ReferenceChecker): """A checker of REF008.""" name = RuleName("calibrated-sensor-to-sensor") description = "'CalibratedSensor.sensor_token' refers to 'Sensor' record." - - def check(self, context: SanityContext) -> list[Reason]: - calibrated_sensor_file = context.to_schema_file(SchemaName.CALIBRATED_SENSOR) - sensor_file = context.to_schema_file(SchemaName.SENSOR) - match (calibrated_sensor_file, sensor_file): - case Some(x), Some(y): - calibrated_sensor = load_json_safe(x).unwrap() - sensor_tokens = [item["token"] for item in load_json_safe(y).unwrap()] - return [ - Reason(f"No reference to `CalibratedSensor.sensor_token`: {cs['sensor_token']}") - for cs in calibrated_sensor - if cs["sensor_token"] not in sensor_tokens - ] - case _: - return [Reason("Missing `CalibratedSensor` or `Sensor` file")] + source = SchemaName.CALIBRATED_SENSOR + target = SchemaName.SENSOR + reference = "sensor_token" diff --git a/t4_devkit/sanity/reference/ref009.py b/t4_devkit/sanity/reference/ref009.py index 4d275d8..846835a 100644 --- a/t4_devkit/sanity/reference/ref009.py +++ b/t4_devkit/sanity/reference/ref009.py @@ -2,39 +2,24 @@ from typing import TYPE_CHECKING -from returns.maybe import Some - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe +from .base import ReferenceChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["REF009"] @CHECKERS.register(RuleID("REF009")) -class REF009(Checker): +class REF009(ReferenceChecker): """A checker of REF009.""" name = RuleName("instance-to-category") description = "'Instance.category_token' refers to 'Category' record." - - def check(self, context: SanityContext) -> list[Reason]: - instance_file = context.to_schema_file(SchemaName.INSTANCE) - category_file = context.to_schema_file(SchemaName.CATEGORY) - match (instance_file, category_file): - case Some(x), Some(y): - instance = load_json_safe(x).unwrap() - category_tokens = [item["token"] for item in load_json_safe(y).unwrap()] - return [ - Reason(f"No reference to `Instance.category_token`: {ins['category_token']}") - for ins in instance - if ins["category_token"] not in category_tokens - ] - case _: - return [Reason("Missing `Instance` or `Category` file")] + source = SchemaName.INSTANCE + target = SchemaName.CATEGORY + reference = "category_token" diff --git a/t4_devkit/sanity/reference/ref010.py b/t4_devkit/sanity/reference/ref010.py index 2e01d29..4a38d92 100644 --- a/t4_devkit/sanity/reference/ref010.py +++ b/t4_devkit/sanity/reference/ref010.py @@ -2,41 +2,24 @@ from typing import TYPE_CHECKING -from returns.maybe import Some - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe +from .base import ReferenceChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["REF010"] @CHECKERS.register(RuleID("REF010")) -class REF010(Checker): +class REF010(ReferenceChecker): """A checker of REF010.""" name = RuleName("instance-to-first-sample-annotation") description = "'Instance.first_annotation_token' refers to 'SampleAnnotation' record." - - def check(self, context: SanityContext) -> list[Reason]: - instance_file = context.to_schema_file(SchemaName.INSTANCE) - sample_ann_file = context.to_schema_file(SchemaName.SAMPLE_ANNOTATION) - match (instance_file, sample_ann_file): - case Some(x), Some(y): - instance = load_json_safe(x).unwrap() - sample_ann_tokens = [item["token"] for item in load_json_safe(y).unwrap()] - return [ - Reason( - f"No reference to `Instance.first_annotation_token`: {ins['first_annotation_token']}" - ) - for ins in instance - if ins["first_annotation_token"] not in sample_ann_tokens - ] - case _: - return [Reason("Missing `Instance` or `SampleAnnotation` file")] + source = SchemaName.INSTANCE + target = SchemaName.SAMPLE_ANNOTATION + reference = "first_annotation_token" diff --git a/t4_devkit/sanity/reference/ref011.py b/t4_devkit/sanity/reference/ref011.py index 103c55c..bdbbd40 100644 --- a/t4_devkit/sanity/reference/ref011.py +++ b/t4_devkit/sanity/reference/ref011.py @@ -2,41 +2,24 @@ from typing import TYPE_CHECKING -from returns.maybe import Some - from t4_devkit.schema import SchemaName -from ..checker import Checker, RuleID, RuleName +from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe +from .base import ReferenceChecker if TYPE_CHECKING: - from ..context import SanityContext + pass __all__ = ["REF011"] @CHECKERS.register(RuleID("REF011")) -class REF011(Checker): +class REF011(ReferenceChecker): """A checker of REF011.""" name = RuleName("instance-to-last-sample-annotation") description = "'Instance.last_annotation_token' refers to 'SampleAnnotation' record." - - def check(self, context: SanityContext) -> list[Reason]: - instance_file = context.to_schema_file(SchemaName.INSTANCE) - sample_ann_file = context.to_schema_file(SchemaName.SAMPLE_ANNOTATION) - match (instance_file, sample_ann_file): - case Some(x), Some(y): - instance = load_json_safe(x).unwrap() - sample_ann_tokens = [item["token"] for item in load_json_safe(y).unwrap()] - return [ - Reason( - f"No reference to `Instance.last_annotation_token`: {ins['last_annotation_token']}" - ) - for ins in instance - if ins["last_annotation_token"] not in sample_ann_tokens - ] - case _: - return [Reason("Missing `Instance` or `SampleAnnotation` file")] + source = SchemaName.INSTANCE + target = SchemaName.SAMPLE_ANNOTATION + reference = "last_annotation_token" From c92e866d728e4cbd14a6145724e2d18a97ac1a2d Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Fri, 24 Oct 2025 16:40:10 +0900 Subject: [PATCH 08/21] feat: add excludes option Signed-off-by: ktro2828 --- t4_devkit/cli/sanity.py | 15 +++++++++--- t4_devkit/sanity/registry.py | 46 +++++++++++++++++++++++++++++------- 2 files changed, 49 insertions(+), 12 deletions(-) diff --git a/t4_devkit/cli/sanity.py b/t4_devkit/cli/sanity.py index 5a94e3b..a5026d4 100644 --- a/t4_devkit/cli/sanity.py +++ b/t4_devkit/cli/sanity.py @@ -8,7 +8,7 @@ from t4_devkit.common.io import save_json from t4_devkit.common.serialize import serialize_dataclasses -from t4_devkit.sanity import sanity_check, SanityResult +from t4_devkit.sanity import SanityResult, sanity_check from .version import version_callback @@ -24,10 +24,11 @@ def _run_sanity_check( db_parent: str, *, revision: str | None = None, + excludes: list[str] | None = None, include_warning: bool = False, ) -> list[SanityResult]: return [ - sanity_check(db_root, revision=revision, include_warning=include_warning) + sanity_check(db_root, revision=revision, excludes=excludes, include_warning=include_warning) for db_root in tqdm(Path(db_parent).glob("*"), desc=">>>Sanity checking...") ] @@ -78,6 +79,9 @@ def main( revision: str | None = typer.Option( None, "-rv", "--revision", help="Specify if you want to check the specific version." ), + excludes: list[str] | None = typer.Option( + None, "-e", "--exclude", help="Exclude specific rules or rule groups." + ), include_warning: bool = typer.Option( False, "-iw", "--include-warning", help="Indicates whether to report any warnings." ), @@ -85,7 +89,12 @@ def main( False, "-d", "--detail", help="Indicates whether to display detailed reports." ), ) -> None: - results = _run_sanity_check(db_parent, revision=revision, include_warning=include_warning) + results = _run_sanity_check( + db_parent, + revision=revision, + excludes=excludes, + include_warning=include_warning, + ) _print_table(results, detail=detail) diff --git a/t4_devkit/sanity/registry.py b/t4_devkit/sanity/registry.py index 00a2e54..bdc139a 100644 --- a/t4_devkit/sanity/registry.py +++ b/t4_devkit/sanity/registry.py @@ -16,16 +16,36 @@ class RuleGroup(Enum): @classmethod def values(cls) -> list[str]: + """Return a list of all rule group values.""" return [group.value for group in cls] + @classmethod + def to_group(cls, id: RuleID) -> RuleGroup | None: + """Convert a rule ID to a rule group. -class CheckerRegistry(dict[RuleGroup, dict[RuleID, type[Checker]]]): - def register(self, id: RuleID) -> Callable: - # TODO(ktro2828); Need to validate code after splitting code into group and number - group = None + Args: + id (RuleID): The ID of the rule. + + Returns: + The rule group if the rule ID belongs to any rule group, otherwise None. + """ for g in RuleGroup: if g.value in id: - group = g + return g + return None + + +class CheckerRegistry(dict[RuleGroup, dict[RuleID, type[Checker]]]): + def register(self, id: RuleID) -> Callable: + """Register a checker class. + + Args: + id (RuleID): The ID of the rule. + + Returns: + A decorator function that registers the checker class. + """ + group = RuleGroup.to_group(id) if group is None: raise ValueError(f"'{id}' doesn't belong to any rule groups: {RuleGroup.values()}") @@ -49,14 +69,22 @@ def _add_module(self, module: type[Checker], group: RuleGroup, id: RuleID) -> No self[group][id] = module def build(self, excludes: Sequence[str] | None = None) -> list[Checker]: + """Build a list of checkers from the registry. + + Args: + excludes (Sequence[str] | None, optional): A list of rule IDs or rule groups to exclude. + + Returns: + A list of checkers. + """ if excludes is None: excludes = [] return [ - checker(rule) - for _, values in self.items() - for rule, checker in values.items() - if rule not in excludes + checker(id) + for group, values in self.items() + for id, checker in values.items() + if id not in excludes and group.value not in excludes ] From caddf8d18393c7094842f4ef9901e3625a12cfb2 Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Fri, 24 Oct 2025 18:09:30 +0900 Subject: [PATCH 09/21] docs: update CLI document Signed-off-by: ktro2828 --- docs/cli/t4sanity.md | 128 +++++++++++++++++++++++-------------------- 1 file changed, 70 insertions(+), 58 deletions(-) diff --git a/docs/cli/t4sanity.md b/docs/cli/t4sanity.md index e18d694..abe8fb1 100644 --- a/docs/cli/t4sanity.md +++ b/docs/cli/t4sanity.md @@ -6,18 +6,20 @@ $ t4sanity -h Usage: t4sanity [OPTIONS] DB_PARENT -╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────────────────────────╮ -│ * db_parent TEXT Path to parent directory of the databases [default: None] [required] │ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ -│ --version -v Show the application version and exit. │ -│ --output -o TEXT Path to output JSON file. [default: None] │ -│ --revision -rv TEXT Specify if you want to load the specific version. [default: None] │ -│ --include-warning -iw Indicates whether to report any warnings. │ -│ --install-completion Install completion for the current shell. │ -│ --show-completion Show completion for the current shell, to copy it or customize the installation. │ -│ --help -h Show this message and exit. │ -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Arguments ───────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ * db_parent TEXT Path to parent directory of the databases. [required] │ +╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Options ─────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ --version -v Show the application version and exit. │ +│ --output -o TEXT Path to output JSON file. │ +│ --revision -rv TEXT Specify if you want to check the specific version. │ +│ --exclude -e TEXT Exclude specific rules or rule groups. │ +│ --include-warning -iw Indicates whether to report any warnings. │ +│ --detail -d Indicates whether to display detailed reports. │ +│ --install-completion Install completion for the current shell. │ +│ --show-completion Show completion for the current shell, to copy it or customize the installation. │ +│ --help -h Show this message and exit. │ +╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ``` ## Shell Completion @@ -33,7 +35,7 @@ t4sanity --install-completion As an example, we have the following the dataset structure: ```shell - + ├── dataset1 │ └── │ ├── annotation @@ -46,45 +48,49 @@ As an example, we have the following the dataset structure: ... ``` -Then, you can run sanity checks with `t4sanity `: +Then, you can run sanity checks with `t4sanity `: ```shell +$ t4sanity + >>>Sanity checking...: 1it [00:00, 9.70it/s] -✅ No exceptions occurred!! -``` -### Exclude Warnings +============================= Summary ============================= ++-----------+---------+---------+-------+---------+----------+-------+ +| DatasetID | Version | Status | Rules | Success | Failures | Skips | ++-----------+---------+---------+-------+---------+----------+-------+ +| dataset1 | 0 | SUCCESS | 44 | 44 | 0 | 0 | ++-----------+---------+---------+-------+---------+----------+-------+ +| dataset2 | 0 | SUCCESS | 44 | 44 | 0 | 0 | ++-----------+---------+---------+-------+---------+----------+-------+ +``` -To run sanity check ignoring warnings, providing the path to the parent directory of the datasets: +Also, `-d; --detail` option helps us to display detailed information about each check: ```shell -$ t4sanity - ->>>Sanity checking...: 2it [00:00, 18.69it/s] -⚠️ Encountered some exceptions!! -+-----------+---------+--------+------------------------------------------------------------------------------------------------+ -| DatasetID | Version | Status | Message | -+-----------+---------+--------+------------------------------------------------------------------------------------------------+ -| dataset1 | 2 | ERROR | bbox must be (xmin, ymin, xmax, ymax) and xmin <= xmax && ymin <= ymax: (1532, 198, 1440, 265) | -| dataset2 | 1 | OK | | -+-----------+---------+--------+------------------------------------------------------------------------------------------------+ -``` +$ t4sanity -d -### Include Warnings - -To run sanity check and report any warnings, use the `-iw; --include-warning` option: +>>>Sanity checking...: 1it [00:00, 9.70it/s] -```shell -$ t4sanity -iw - ->>>Sanity checking...: 2it [00:00, 21.54it/s] -⚠️ Encountered some exceptions!! -+-----------+---------+---------+------------------------------------------------------------------------------------------------+ -| DatasetID | Version | Status | Message | -+-----------+---------+---------+------------------------------------------------------------------------------------------------+ -| dataset1 | 2 | ERROR | bbox must be (xmin, ymin, xmax, ymax) and xmin <= xmax && ymin <= ymax: (1532, 198, 1440, 265) | -| dataset2 | 1 | WARNING | Category token is empty for surface ann: 0c15d9c143fb2723c16ac7e0c735b0a8 | -+-----------+---------+---------+------------------------------------------------------------------------------------------------+ +=== DatasetID: dataset1 === + STR001: ✅ + STR002: ✅ + STR003: ✅ + STR004: ✅ + STR005: ✅ + STR006: ✅ + STR007: ✅ + STR008: ✅ + ... + +============================= Summary ============================= ++-----------+---------+---------+-------+---------+----------+-------+ +| DatasetID | Version | Status | Rules | Success | Failures | Skips | ++-----------+---------+---------+-------+---------+----------+-------+ +| dataset1 | 0 | SUCCESS | 44 | 44 | 0 | 0 | ++-----------+---------+---------+-------+---------+----------+-------+ +| dataset2 | 0 | SUCCESS | 44 | 44 | 0 | 0 | ++-----------+---------+---------+-------+---------+----------+-------+ ``` ### Dump Results as JSON @@ -92,27 +98,33 @@ $ t4sanity -iw To dump results into JSON, use the `-o; --output` option: ```shell -$ t4sanity -o results.json - ->>>Sanity checking...: 2it [00:00, 21.54it/s] -... +t4sanity -o results.json ``` -Then a JSON file named `results.json` will be generated: +Then a JSON file named `results.json` will be generated as follows: ```json [ { - "dataset_id": "dataset1", - "version": 2, - "status": "ERROR", - "message": "bbox must be (xmin, ymin, xmax, ymax) and xmin <= xmax && ymin <= ymax: (1532, 198, 1440, 265)" - }, - { - "dataset_id": "dataset2", - "version": 1, - "status": "WARNING", - "message": "Category token is empty for surface ann: 0c15d9c143fb2723c16ac7e0c735b0a8" + "dataset_id": "", + "version": , + "reports": { + "": { + "id": "", + "name": "", + "description": "", + "status": "", + "reasons": "<[, , ...]: [str; N] | null>" // Failure or skipped reasons, null if success + }, + } } ] ``` + +### Exclude Checks + +With `-e; --excludes` option enables us to exclude specific checks by specifying the **rule IDs or groups**: + +```shell +t4sanity -e STR001 -e FMT +``` From 89ee0ec5a7579b6b04d9130a590b0479dd8463f6 Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Fri, 24 Oct 2025 19:28:43 +0900 Subject: [PATCH 10/21] refactor: rename SCH to REC Signed-off-by: ktro2828 --- docs/schema/requirement.md | 14 +++++----- t4_devkit/sanity/__init__.py | 8 +++--- t4_devkit/sanity/record/__init__.py | 8 ++++++ t4_devkit/sanity/record/base.py | 40 +++++++++++++++++++++++++++ t4_devkit/sanity/record/rec001.py | 32 ++++++++++++++++++++++ t4_devkit/sanity/record/rec002.py | 28 +++++++++++++++++++ t4_devkit/sanity/record/rec003.py | 28 +++++++++++++++++++ t4_devkit/sanity/record/rec004.py | 28 +++++++++++++++++++ t4_devkit/sanity/record/rec005.py | 32 ++++++++++++++++++++++ t4_devkit/sanity/record/rec006.py | 28 +++++++++++++++++++ t4_devkit/sanity/registry.py | 2 +- t4_devkit/sanity/schema/__init__.py | 8 ------ t4_devkit/sanity/schema/sch001.py | 42 ----------------------------- t4_devkit/sanity/schema/sch002.py | 38 -------------------------- t4_devkit/sanity/schema/sch003.py | 38 -------------------------- t4_devkit/sanity/schema/sch004.py | 38 -------------------------- t4_devkit/sanity/schema/sch005.py | 40 --------------------------- t4_devkit/sanity/schema/sch006.py | 38 -------------------------- 18 files changed, 236 insertions(+), 254 deletions(-) create mode 100644 t4_devkit/sanity/record/__init__.py create mode 100644 t4_devkit/sanity/record/base.py create mode 100644 t4_devkit/sanity/record/rec001.py create mode 100644 t4_devkit/sanity/record/rec002.py create mode 100644 t4_devkit/sanity/record/rec003.py create mode 100644 t4_devkit/sanity/record/rec004.py create mode 100644 t4_devkit/sanity/record/rec005.py create mode 100644 t4_devkit/sanity/record/rec006.py delete mode 100644 t4_devkit/sanity/schema/__init__.py delete mode 100644 t4_devkit/sanity/schema/sch001.py delete mode 100644 t4_devkit/sanity/schema/sch002.py delete mode 100644 t4_devkit/sanity/schema/sch003.py delete mode 100644 t4_devkit/sanity/schema/sch004.py delete mode 100644 t4_devkit/sanity/schema/sch005.py delete mode 100644 t4_devkit/sanity/schema/sch006.py diff --git a/docs/schema/requirement.md b/docs/schema/requirement.md index 12d6c5f..ede585d 100644 --- a/docs/schema/requirement.md +++ b/docs/schema/requirement.md @@ -14,16 +14,16 @@ | `STR008` | `lanelet-file-presence` | `Warn` | `lanelet2_map.osm` file exists under the `map/` directory. | | `STR009` | `pointcloud-map-dir-presence` | `Warn` | `pointcloud_map.pcd` directory exists under the `map/` directory. | -## Schema (`SCH`) +## Schema Record (`REC`) | ID | Name | Severity | Description | | -------- | ----------------------------- | -------- | --------------------------------------- | -| `SCH001` | `scene-single` | `Error` | `Scene` record is a single. | -| `SCH002` | `sample-not-empty` | `Error` | `Sample` record is not empty. | -| `SCH003` | `sample-data-not-empty` | `Error` | `SampleData` record is not empty. | -| `SCH004` | `ego-pose-not-empty` | `Error` | `EgoPose` record is not empty. | -| `SCH005` | `calibrated-sensor-non-empty` | `Error` | `CalibratedSensor` record is not empty. | -| `SCH006` | `instance-not-empty` | `Error` | `Instance` record is not empty. | +| `REC001` | `scene-single` | `Error` | `Scene` record is a single. | +| `REC002` | `sample-not-empty` | `Error` | `Sample` record is not empty. | +| `REC003` | `sample-data-not-empty` | `Error` | `SampleData` record is not empty. | +| `REC004` | `ego-pose-not-empty` | `Error` | `EgoPose` record is not empty. | +| `REC005` | `calibrated-sensor-non-empty` | `Error` | `CalibratedSensor` record is not empty. | +| `REC006` | `instance-not-empty` | `Error` | `Instance` record is not empty. | ## Reference (`REF`) diff --git a/t4_devkit/sanity/__init__.py b/t4_devkit/sanity/__init__.py index 2446981..ca2434b 100644 --- a/t4_devkit/sanity/__init__.py +++ b/t4_devkit/sanity/__init__.py @@ -1,9 +1,9 @@ from __future__ import annotations +from .format import * # noqa +from .record import * # noqa +from .reference import * # noqa from .registry import * # noqa -from .run import * # noqa from .result import * # noqa +from .run import * # noqa from .structure import * # noqa -from .schema import * # noqa -from .reference import * # noqa -from .format import * # noqa diff --git a/t4_devkit/sanity/record/__init__.py b/t4_devkit/sanity/record/__init__.py new file mode 100644 index 0000000..a4980bd --- /dev/null +++ b/t4_devkit/sanity/record/__init__.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +from .rec001 import * # noqa +from .rec002 import * # noqa +from .rec003 import * # noqa +from .rec004 import * # noqa +from .rec005 import * # noqa +from .rec006 import * # noqa diff --git a/t4_devkit/sanity/record/base.py b/t4_devkit/sanity/record/base.py new file mode 100644 index 0000000..5ac936c --- /dev/null +++ b/t4_devkit/sanity/record/base.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from abc import abstractmethod +from typing import TYPE_CHECKING + +from returns.maybe import Maybe, Nothing, Some + +from t4_devkit.schema import SchemaName + +from ..checker import Checker +from ..result import Reason +from ..safety import load_json_safe + +if TYPE_CHECKING: + from ..context import SanityContext + + +class RecordCountChecker(Checker): + """Base class for record count checkers.""" + + schema: SchemaName + + def can_skip(self, context: SanityContext) -> Maybe[Reason]: + match context.to_schema_file(self.schema): + case Some(x): + if not x.exists(): + return Maybe.from_value(Reason(f"Missing {self.schema.filename}")) + else: + return Nothing + case _: + return Maybe.from_value(Reason("Missing 'annotation' directory path")) + + def check(self, context: SanityContext) -> list[Reason]: + filepath = context.to_schema_file(self.schema).unwrap() + records = load_json_safe(filepath).unwrap() + return self.check_count(records) + + @abstractmethod + def check_count(self, records: list[dict]) -> list[Reason]: + pass diff --git a/t4_devkit/sanity/record/rec001.py b/t4_devkit/sanity/record/rec001.py new file mode 100644 index 0000000..306b54a --- /dev/null +++ b/t4_devkit/sanity/record/rec001.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from t4_devkit.schema import SchemaName + +from ..checker import RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from .base import RecordCountChecker + +if TYPE_CHECKING: + pass + +__all__ = ["REC001"] + + +@CHECKERS.register(RuleID("REC001")) +class REC001(RecordCountChecker): + """A checker of REC001.""" + + name = RuleName("scene-single") + description = "'Scene' record is a single." + schema = SchemaName.SCENE + + def check_count(self, records: list[dict]) -> list[Reason]: + num_scene = len(records) + return ( + [] + if num_scene == 1 + else [Reason(f"'Scene' must contain exactly one element: {num_scene}")] + ) diff --git a/t4_devkit/sanity/record/rec002.py b/t4_devkit/sanity/record/rec002.py new file mode 100644 index 0000000..5ec6cd5 --- /dev/null +++ b/t4_devkit/sanity/record/rec002.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from t4_devkit.schema import SchemaName + +from ..checker import RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from .base import RecordCountChecker + +if TYPE_CHECKING: + pass + +__all__ = ["REC002"] + + +@CHECKERS.register(RuleID("REC002")) +class REC002(RecordCountChecker): + """A checker of REC002.""" + + name = RuleName("sample-not-empty") + description = "'Sample' record is not empty." + schema = SchemaName.SAMPLE + + def check_count(self, records: list[dict]) -> list[Reason]: + num_sample = len(records) + return [Reason("'Sample' record must not be empty")] if num_sample == 0 else [] diff --git a/t4_devkit/sanity/record/rec003.py b/t4_devkit/sanity/record/rec003.py new file mode 100644 index 0000000..d5c7a10 --- /dev/null +++ b/t4_devkit/sanity/record/rec003.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from t4_devkit.schema import SchemaName + +from ..checker import RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from .base import RecordCountChecker + +if TYPE_CHECKING: + pass + +__all__ = ["REC003"] + + +@CHECKERS.register(RuleID("REC003")) +class REC003(RecordCountChecker): + """A checker of REC003.""" + + name = RuleName("sample-data-not-empty") + description = "'SampleData' record is not empty." + schema = SchemaName.SAMPLE_DATA + + def check_count(self, records: list[dict]) -> list[Reason]: + num_sample_data = len(records) + return [Reason("'SampleData' record must not be empty")] if num_sample_data == 0 else [] diff --git a/t4_devkit/sanity/record/rec004.py b/t4_devkit/sanity/record/rec004.py new file mode 100644 index 0000000..6cc3ea7 --- /dev/null +++ b/t4_devkit/sanity/record/rec004.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from t4_devkit.schema import SchemaName + +from ..checker import RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from .base import RecordCountChecker + +if TYPE_CHECKING: + pass + +__all__ = ["REC004"] + + +@CHECKERS.register(RuleID("REC004")) +class REC004(RecordCountChecker): + """A checker of REC004.""" + + name = RuleName("ego-pose-not-empty") + description = "'EgoPose' record is not empty." + schema = SchemaName.EGO_POSE + + def check_count(self, records: list[dict]) -> list[Reason]: + num_ego_pose = len(records) + return [Reason("'EgoPose' record must not be empty")] if num_ego_pose == 0 else [] diff --git a/t4_devkit/sanity/record/rec005.py b/t4_devkit/sanity/record/rec005.py new file mode 100644 index 0000000..38beee6 --- /dev/null +++ b/t4_devkit/sanity/record/rec005.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from t4_devkit.schema import SchemaName + +from ..checker import RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from .base import RecordCountChecker + +if TYPE_CHECKING: + pass + +__all__ = ["REC005"] + + +@CHECKERS.register(RuleID("REC005")) +class REC005(RecordCountChecker): + """A checker of REC005.""" + + name = RuleName("calibrated-sensor-not-empty") + description = "'CalibratedSensor' record is not empty." + schema = SchemaName.CALIBRATED_SENSOR + + def check_count(self, records: list[dict]) -> list[Reason]: + num_calibrated_sensor = len(records) + return ( + [Reason("'CalibratedSensor' record must not be empty")] + if num_calibrated_sensor == 0 + else [] + ) diff --git a/t4_devkit/sanity/record/rec006.py b/t4_devkit/sanity/record/rec006.py new file mode 100644 index 0000000..5d2d5a9 --- /dev/null +++ b/t4_devkit/sanity/record/rec006.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from t4_devkit.schema import SchemaName + +from ..checker import RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from .base import RecordCountChecker + +if TYPE_CHECKING: + pass + +__all__ = ["REC006"] + + +@CHECKERS.register(RuleID("REC006")) +class REC006(RecordCountChecker): + """A checker of REC006.""" + + name = RuleName("instance-not-empty") + description = "'Instance' record is not empty." + schema = SchemaName.INSTANCE + + def check_count(self, records: list[dict]) -> list[Reason]: + num_instance = len(records) + return [Reason("'Instance' record must not be empty")] if num_instance == 0 else [] diff --git a/t4_devkit/sanity/registry.py b/t4_devkit/sanity/registry.py index bdc139a..12276a2 100644 --- a/t4_devkit/sanity/registry.py +++ b/t4_devkit/sanity/registry.py @@ -10,7 +10,7 @@ @unique class RuleGroup(Enum): STRUCTURE = "STR" - SCHEMA = "SCH" + RECORD = "REC" REFERENCE = "REF" FORMAT = "FMT" diff --git a/t4_devkit/sanity/schema/__init__.py b/t4_devkit/sanity/schema/__init__.py deleted file mode 100644 index f58dd5d..0000000 --- a/t4_devkit/sanity/schema/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import annotations - -from .sch001 import * # noqa -from .sch002 import * # noqa -from .sch003 import * # noqa -from .sch004 import * # noqa -from .sch005 import * # noqa -from .sch006 import * # noqa diff --git a/t4_devkit/sanity/schema/sch001.py b/t4_devkit/sanity/schema/sch001.py deleted file mode 100644 index 4b45705..0000000 --- a/t4_devkit/sanity/schema/sch001.py +++ /dev/null @@ -1,42 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from returns.maybe import Some -from returns.pipeline import is_successful - -from t4_devkit.schema import SchemaName - -from ..checker import Checker, RuleID, RuleName -from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe - -if TYPE_CHECKING: - from ..context import SanityContext - -__all__ = ["SCH001"] - - -@CHECKERS.register(RuleID("SCH001")) -class SCH001(Checker): - """A checker of SCH001.""" - - name = RuleName("scene-single") - description = "'Scene' record is a single." - - def check(self, context: SanityContext) -> list[Reason]: - match context.to_schema_file(SchemaName.SCENE): - case Some(x): - result = load_json_safe(x.as_posix()) - if not is_successful(result): - return [Reason("Failed to load 'Scene' file")] - else: - num_scene = len(result.unwrap()) - return ( - [] - if num_scene == 1 - else [Reason(f"'Scene' must contain exactly one element: {num_scene}")] - ) - case _: - return [Reason("Failed to load 'Scene' file")] diff --git a/t4_devkit/sanity/schema/sch002.py b/t4_devkit/sanity/schema/sch002.py deleted file mode 100644 index 173c45c..0000000 --- a/t4_devkit/sanity/schema/sch002.py +++ /dev/null @@ -1,38 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from returns.maybe import Some -from returns.pipeline import is_successful - -from t4_devkit.schema import SchemaName - -from ..checker import Checker, RuleID, RuleName -from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe - -if TYPE_CHECKING: - from ..context import SanityContext - -__all__ = ["SCH002"] - - -@CHECKERS.register(RuleID("SCH002")) -class SCH002(Checker): - """A checker of SCH002.""" - - name = RuleName("sample-not-empty") - description = "'Sample' record is not empty." - - def check(self, context: SanityContext) -> list[Reason]: - match context.to_schema_file(SchemaName.SAMPLE): - case Some(x): - result = load_json_safe(x.as_posix()) - if not is_successful(result): - return [Reason("Failed to load 'Sample' file")] - else: - num_sample = len(result.unwrap()) - return [] if num_sample > 0 else [Reason("No 'Sample' found")] - case _: - return [Reason("Failed to load 'Sample' file")] diff --git a/t4_devkit/sanity/schema/sch003.py b/t4_devkit/sanity/schema/sch003.py deleted file mode 100644 index fde6671..0000000 --- a/t4_devkit/sanity/schema/sch003.py +++ /dev/null @@ -1,38 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from returns.maybe import Some -from returns.pipeline import is_successful - -from t4_devkit.schema import SchemaName - -from ..checker import Checker, RuleID, RuleName -from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe - -if TYPE_CHECKING: - from ..context import SanityContext - -__all__ = ["SCH003"] - - -@CHECKERS.register(RuleID("SCH003")) -class SCH003(Checker): - """A checker of SCH003.""" - - name = RuleName("sample-data-not-empty") - description = "'SampleData' record is not empty." - - def check(self, context: SanityContext) -> list[Reason]: - match context.to_schema_file(SchemaName.SAMPLE_DATA): - case Some(x): - result = load_json_safe(x.as_posix()) - if not is_successful(result): - return [Reason("Failed to load 'SampleData' file")] - else: - num_sample_data = len(result.unwrap()) - return [] if num_sample_data > 0 else [Reason("No 'SampleData' found")] - case _: - return [Reason("Failed to load 'SampleData' file")] diff --git a/t4_devkit/sanity/schema/sch004.py b/t4_devkit/sanity/schema/sch004.py deleted file mode 100644 index c1b2090..0000000 --- a/t4_devkit/sanity/schema/sch004.py +++ /dev/null @@ -1,38 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from returns.maybe import Some -from returns.pipeline import is_successful - -from t4_devkit.schema import SchemaName - -from ..checker import Checker, RuleID, RuleName -from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe - -if TYPE_CHECKING: - from ..context import SanityContext - -__all__ = ["SCH004"] - - -@CHECKERS.register(RuleID("SCH004")) -class SCH004(Checker): - """A checker of SCH004.""" - - name = RuleName("ego-pose-not-empty") - description = "'EgoPose' record is not empty." - - def check(self, context: SanityContext) -> list[Reason]: - match context.to_schema_file(SchemaName.EGO_POSE): - case Some(x): - result = load_json_safe(x.as_posix()) - if not is_successful(result): - return [Reason("Failed to load 'EgoPose' file")] - else: - num_ego_pose = len(result.unwrap()) - return [] if num_ego_pose > 0 else [Reason("No 'EgoPose' found")] - case _: - return [Reason("Failed to load 'EgoPose' file")] diff --git a/t4_devkit/sanity/schema/sch005.py b/t4_devkit/sanity/schema/sch005.py deleted file mode 100644 index 60a3c52..0000000 --- a/t4_devkit/sanity/schema/sch005.py +++ /dev/null @@ -1,40 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from returns.maybe import Some -from returns.pipeline import is_successful - -from t4_devkit.schema import SchemaName - -from ..checker import Checker, RuleID, RuleName -from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe - -if TYPE_CHECKING: - from ..context import SanityContext - -__all__ = ["SCH005"] - - -@CHECKERS.register(RuleID("SCH005")) -class SCH005(Checker): - """A checker of SCH005.""" - - name = RuleName("calibrated-sensor-not-empty") - description = "'CalibratedSensor' record is not empty." - - def check(self, context: SanityContext) -> list[Reason]: - match context.to_schema_file(SchemaName.CALIBRATED_SENSOR): - case Some(x): - result = load_json_safe(x.as_posix()) - if not is_successful(result): - return [Reason("Failed to load 'CalibratedSensor' file")] - else: - num_calibrated_sensor = len(result.unwrap()) - return ( - [] if num_calibrated_sensor > 0 else [Reason("No 'CalibratedSensor' found")] - ) - case _: - return [Reason("Failed to load 'CalibratedSensor' file")] diff --git a/t4_devkit/sanity/schema/sch006.py b/t4_devkit/sanity/schema/sch006.py deleted file mode 100644 index 6ecb737..0000000 --- a/t4_devkit/sanity/schema/sch006.py +++ /dev/null @@ -1,38 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -from returns.maybe import Some -from returns.pipeline import is_successful - -from t4_devkit.schema import SchemaName - -from ..checker import Checker, RuleID, RuleName -from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe - -if TYPE_CHECKING: - from ..context import SanityContext - -__all__ = ["SCH006"] - - -@CHECKERS.register(RuleID("SCH006")) -class SCH006(Checker): - """A checker of SCH006.""" - - name = RuleName("instance-not-empty") - description = "'Instance' record is not empty." - - def check(self, context: SanityContext) -> list[Reason]: - match context.to_schema_file(SchemaName.INSTANCE): - case Some(x): - result = load_json_safe(x.as_posix()) - if not is_successful(result): - return [Reason("Failed to load 'Instance' file")] - else: - num_instance = len(result.unwrap()) - return [] if num_instance > 0 else [Reason("No 'Instance' found")] - case _: - return [Reason("Failed to load 'Instance' file")] From 2ec319a124885bfe8da6ddc2603d9270d45a38c0 Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Tue, 28 Oct 2025 17:02:31 +0900 Subject: [PATCH 11/21] feat: store reports as list instead of dict Signed-off-by: ktro2828 --- docs/cli/t4sanity.md | 7 ++++--- t4_devkit/cli/sanity.py | 6 +++--- t4_devkit/sanity/result.py | 18 ++++++++++++------ t4_devkit/sanity/run.py | 2 +- 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/docs/cli/t4sanity.md b/docs/cli/t4sanity.md index abe8fb1..b32e5dd 100644 --- a/docs/cli/t4sanity.md +++ b/docs/cli/t4sanity.md @@ -108,15 +108,15 @@ Then a JSON file named `results.json` will be generated as follows: { "dataset_id": "", "version": , - "reports": { - "": { + "reports": [ + { "id": "", "name": "", "description": "", "status": "", "reasons": "<[, , ...]: [str; N] | null>" // Failure or skipped reasons, null if success }, - } + ] } ] ``` @@ -126,5 +126,6 @@ Then a JSON file named `results.json` will be generated as follows: With `-e; --excludes` option enables us to exclude specific checks by specifying the **rule IDs or groups**: ```shell +# Exclude STR001 and all FMT-relevant rules t4sanity -e STR001 -e FMT ``` diff --git a/t4_devkit/cli/sanity.py b/t4_devkit/cli/sanity.py index a5026d4..129bf6f 100644 --- a/t4_devkit/cli/sanity.py +++ b/t4_devkit/cli/sanity.py @@ -36,9 +36,9 @@ def _run_sanity_check( def _print_table(results: list[SanityResult], *, detail: bool = False) -> str: summary_rows = [] for result in results: - success = sum(1 for rp in result.reports.values() if rp.is_success()) - failures = sum(1 for rp in result.reports.values() if rp.is_failure()) - skips = sum(1 for rp in result.reports.values() if rp.is_skipped()) + success = sum(1 for rp in result.reports if rp.is_success()) + failures = sum(1 for rp in result.reports if rp.is_failure()) + skips = sum(1 for rp in result.reports if rp.is_skipped()) summary_rows.append( [ result.dataset_id, diff --git a/t4_devkit/sanity/result.py b/t4_devkit/sanity/result.py index a7db36a..e1f6f0e 100644 --- a/t4_devkit/sanity/result.py +++ b/t4_devkit/sanity/result.py @@ -39,6 +39,12 @@ class Report: status: Status reasons: list[Reason] | None = field(default=None) + def __attrs_post_init__(self) -> None: + if self.is_success(): + assert self.reasons is None, "Success report cannot have reasons" + else: + assert self.reasons is not None, "Failure report must have reasons" + def is_success(self) -> bool: return self.status == Status.SUCCESS @@ -68,10 +74,10 @@ def make_failure(id: RuleID, name: RuleName, description: str, reasons: list[Rea class SanityResult: dataset_id: str version: str | None - reports: dict[str, Report] + reports: list[Report] @classmethod - def from_context(cls, context: SanityContext, reports: dict[RuleID, Report]) -> Self: + def from_context(cls, context: SanityContext, reports: list[Report]) -> Self: return cls( dataset_id=context.dataset_id.value_or("UNKNOWN"), version=context.version.value_or(None), @@ -80,15 +86,15 @@ def from_context(cls, context: SanityContext, reports: dict[RuleID, Report]) -> def __repr__(self) -> str: string = f"=== DatasetID: {self.dataset_id} ===\n" - for id, report in self.reports.items(): + for report in self.reports: if report.is_failure(): - string += f"\033[31m {id}:\033[0m\n" + string += f"\033[31m {report.id}:\033[0m\n" for reason in report.reasons or []: string += f"\033[31m - {reason}\033[0m\n" elif report.is_skipped(): - string += f"\033[33m {id}:\033[0m\n" + string += f"\033[33m {report.id}:\033[0m\n" for reason in report.reasons or []: string += f"\033[33m - {reason}\033[0m\n" else: - string += f"\033[32m {id}: ✅\033[0m\n" + string += f"\033[32m {report.id}: ✅\033[0m\n" return string diff --git a/t4_devkit/sanity/run.py b/t4_devkit/sanity/run.py index 416db8a..5c0f1b4 100644 --- a/t4_devkit/sanity/run.py +++ b/t4_devkit/sanity/run.py @@ -26,6 +26,6 @@ def sanity_check( context = SanityContext.from_path(data_root, revision=revision) checkers = CHECKERS.build(excludes=excludes) - reports = {checker.id: checker(context) for checker in checkers} + reports = [checker(context) for checker in checkers] return SanityResult.from_context(context, reports) From 1b4b1d0f648ebd7aa6edcc9619505b57c130259f Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Tue, 28 Oct 2025 17:28:09 +0900 Subject: [PATCH 12/21] feat: add tier4 checker Signed-off-by: ktro2828 --- docs/schema/requirement.md | 6 ++-- t4_devkit/sanity/__init__.py | 1 + t4_devkit/sanity/registry.py | 1 + t4_devkit/sanity/tier4/__init__.py | 3 ++ t4_devkit/sanity/tier4/tiv001.py | 48 ++++++++++++++++++++++++++++++ 5 files changed, 56 insertions(+), 3 deletions(-) create mode 100644 t4_devkit/sanity/tier4/__init__.py create mode 100644 t4_devkit/sanity/tier4/tiv001.py diff --git a/docs/schema/requirement.md b/docs/schema/requirement.md index ede585d..99507d9 100644 --- a/docs/schema/requirement.md +++ b/docs/schema/requirement.md @@ -66,6 +66,6 @@ ## Tier4 Instance (`TIV`) -| ID | Name | Severity | Description | -| -------- | ------------ | -------- | --------------------------------------- | -| `TIV001` | `load-tier4` | `Error` | Success to initialize `Tier4` instance. | +| ID | Name | Severity | Description | +| -------- | ------------ | -------- | ----------------------------------------------- | +| `TIV001` | `load-tier4` | `Error` | Ensure `Tier4` instance is loaded successfully. | diff --git a/t4_devkit/sanity/__init__.py b/t4_devkit/sanity/__init__.py index ca2434b..7a7a3ca 100644 --- a/t4_devkit/sanity/__init__.py +++ b/t4_devkit/sanity/__init__.py @@ -7,3 +7,4 @@ from .result import * # noqa from .run import * # noqa from .structure import * # noqa +from .tier4 import * # noqa diff --git a/t4_devkit/sanity/registry.py b/t4_devkit/sanity/registry.py index 12276a2..1bcd388 100644 --- a/t4_devkit/sanity/registry.py +++ b/t4_devkit/sanity/registry.py @@ -13,6 +13,7 @@ class RuleGroup(Enum): RECORD = "REC" REFERENCE = "REF" FORMAT = "FMT" + TIER4 = "TIV" @classmethod def values(cls) -> list[str]: diff --git a/t4_devkit/sanity/tier4/__init__.py b/t4_devkit/sanity/tier4/__init__.py new file mode 100644 index 0000000..92b85ff --- /dev/null +++ b/t4_devkit/sanity/tier4/__init__.py @@ -0,0 +1,3 @@ +from __future__ import annotations + +from .tiv001 import * # noqa diff --git a/t4_devkit/sanity/tier4/tiv001.py b/t4_devkit/sanity/tier4/tiv001.py new file mode 100644 index 0000000..0ee376b --- /dev/null +++ b/t4_devkit/sanity/tier4/tiv001.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.pipeline import is_successful +from returns.result import Result, safe +from returns.maybe import Maybe, Nothing, Some + +from ..checker import Checker, RuleID, RuleName +from ..result import Reason +from ..registry import CHECKERS +from t4_devkit import Tier4 + +if TYPE_CHECKING: + from ..context import SanityContext + +__all__ = ["TIV001"] + + +@CHECKERS.register(RuleID("TIV001")) +class TIV001(Checker): + """A checker for TIV001.""" + + name = RuleName("load-tier4") + description = "Ensure 'Tier4' instance is loaded successfully." + + def can_skip(self, context: SanityContext) -> Maybe[Reason]: + match context.data_root: + case Some(x): + if not x.exists(): + return Maybe.from_value(Reason(f"'{x.as_posix()}' not found")) + return Nothing + case _: + return Nothing + + def check(self, context: SanityContext) -> list[Reason]: + result = _load_tier4_safe(context) + return ( + [] if is_successful(result) else [Reason(f"Failed to load Tier4: {result.failure()}")] + ) + + +@safe +def _load_tier4_safe(context: SanityContext) -> Result[Tier4, Exception]: + data_root = context.data_root.unwrap() + revision = context.version.value_or(None) + data_root = data_root.as_posix() if revision is None else data_root.parent.as_posix() + return Tier4(data_root, revision=revision, verbose=False) From 7c0f13290621ec9107928b074dbba929939c4195 Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Fri, 31 Oct 2025 11:15:41 +0900 Subject: [PATCH 13/21] style: remove unused TYPE_CHECKING Signed-off-by: ktro2828 --- t4_devkit/sanity/format/base.py | 4 ++-- t4_devkit/sanity/format/fmt001.py | 4 ---- t4_devkit/sanity/format/fmt002.py | 4 ---- t4_devkit/sanity/format/fmt003.py | 4 ---- t4_devkit/sanity/format/fmt004.py | 4 ---- t4_devkit/sanity/format/fmt005.py | 4 ---- t4_devkit/sanity/format/fmt006.py | 5 ----- t4_devkit/sanity/format/fmt007.py | 5 ----- t4_devkit/sanity/format/fmt008.py | 4 ---- t4_devkit/sanity/format/fmt009.py | 4 ---- t4_devkit/sanity/format/fmt010.py | 4 ---- t4_devkit/sanity/format/fmt011.py | 4 ---- t4_devkit/sanity/format/fmt012.py | 4 ---- t4_devkit/sanity/format/fmt013.py | 4 ---- t4_devkit/sanity/format/fmt014.py | 4 ---- t4_devkit/sanity/format/fmt015.py | 4 ---- t4_devkit/sanity/format/fmt016.py | 4 ---- t4_devkit/sanity/format/fmt017.py | 4 ---- t4_devkit/sanity/format/fmt018.py | 4 ---- t4_devkit/sanity/record/base.py | 8 ++++++++ t4_devkit/sanity/record/rec001.py | 4 ---- t4_devkit/sanity/record/rec002.py | 4 ---- t4_devkit/sanity/record/rec003.py | 4 ---- t4_devkit/sanity/record/rec004.py | 4 ---- t4_devkit/sanity/record/rec005.py | 4 ---- t4_devkit/sanity/record/rec006.py | 4 ---- t4_devkit/sanity/reference/base.py | 8 ++++++++ t4_devkit/sanity/reference/ref001.py | 4 ---- t4_devkit/sanity/reference/ref002.py | 4 ---- t4_devkit/sanity/reference/ref003.py | 4 ---- t4_devkit/sanity/reference/ref004.py | 4 ---- t4_devkit/sanity/reference/ref005.py | 4 ---- t4_devkit/sanity/reference/ref006.py | 5 ----- t4_devkit/sanity/reference/ref007.py | 5 ----- t4_devkit/sanity/reference/ref008.py | 4 ---- t4_devkit/sanity/reference/ref009.py | 4 ---- t4_devkit/sanity/reference/ref010.py | 4 ---- t4_devkit/sanity/reference/ref011.py | 4 ---- 38 files changed, 18 insertions(+), 146 deletions(-) diff --git a/t4_devkit/sanity/format/base.py b/t4_devkit/sanity/format/base.py index d041491..2421f03 100644 --- a/t4_devkit/sanity/format/base.py +++ b/t4_devkit/sanity/format/base.py @@ -37,10 +37,10 @@ def check(self, context: SanityContext) -> list[Reason]: return [] records = load_json_safe(filepath) - return build_records(self.schema, records.unwrap()) + return _build_records(self.schema, records.unwrap()) -def build_records(schema: SchemaName, records: list[dict]) -> list[Reason]: +def _build_records(schema: SchemaName, records: list[dict]) -> list[Reason]: module = SCHEMAS.get(schema) failures = [] for record in records: diff --git a/t4_devkit/sanity/format/fmt001.py b/t4_devkit/sanity/format/fmt001.py index 04acc3b..b156ee0 100644 --- a/t4_devkit/sanity/format/fmt001.py +++ b/t4_devkit/sanity/format/fmt001.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import FieldTypeChecker -if TYPE_CHECKING: - pass __all__ = ["FMT001"] diff --git a/t4_devkit/sanity/format/fmt002.py b/t4_devkit/sanity/format/fmt002.py index 525ebbf..240498b 100644 --- a/t4_devkit/sanity/format/fmt002.py +++ b/t4_devkit/sanity/format/fmt002.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import FieldTypeChecker -if TYPE_CHECKING: - pass __all__ = ["FMT002"] diff --git a/t4_devkit/sanity/format/fmt003.py b/t4_devkit/sanity/format/fmt003.py index c524e87..8baf068 100644 --- a/t4_devkit/sanity/format/fmt003.py +++ b/t4_devkit/sanity/format/fmt003.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import FieldTypeChecker -if TYPE_CHECKING: - pass __all__ = ["FMT003"] diff --git a/t4_devkit/sanity/format/fmt004.py b/t4_devkit/sanity/format/fmt004.py index 26b0a60..685f650 100644 --- a/t4_devkit/sanity/format/fmt004.py +++ b/t4_devkit/sanity/format/fmt004.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import FieldTypeChecker -if TYPE_CHECKING: - pass __all__ = ["FMT004"] diff --git a/t4_devkit/sanity/format/fmt005.py b/t4_devkit/sanity/format/fmt005.py index a89c29c..0610280 100644 --- a/t4_devkit/sanity/format/fmt005.py +++ b/t4_devkit/sanity/format/fmt005.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import FieldTypeChecker -if TYPE_CHECKING: - pass __all__ = ["FMT005"] diff --git a/t4_devkit/sanity/format/fmt006.py b/t4_devkit/sanity/format/fmt006.py index 8580af5..b35ed1e 100644 --- a/t4_devkit/sanity/format/fmt006.py +++ b/t4_devkit/sanity/format/fmt006.py @@ -1,16 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import FieldTypeChecker -if TYPE_CHECKING: - pass - __all__ = ["FMT006"] diff --git a/t4_devkit/sanity/format/fmt007.py b/t4_devkit/sanity/format/fmt007.py index e4f1c08..a62ef8d 100644 --- a/t4_devkit/sanity/format/fmt007.py +++ b/t4_devkit/sanity/format/fmt007.py @@ -1,16 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import FieldTypeChecker -if TYPE_CHECKING: - pass - __all__ = ["FMT007"] diff --git a/t4_devkit/sanity/format/fmt008.py b/t4_devkit/sanity/format/fmt008.py index f020e6f..7bc978e 100644 --- a/t4_devkit/sanity/format/fmt008.py +++ b/t4_devkit/sanity/format/fmt008.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import FieldTypeChecker -if TYPE_CHECKING: - pass __all__ = ["FMT008"] diff --git a/t4_devkit/sanity/format/fmt009.py b/t4_devkit/sanity/format/fmt009.py index e3fb87f..9211e44 100644 --- a/t4_devkit/sanity/format/fmt009.py +++ b/t4_devkit/sanity/format/fmt009.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import FieldTypeChecker -if TYPE_CHECKING: - pass __all__ = ["FMT009"] diff --git a/t4_devkit/sanity/format/fmt010.py b/t4_devkit/sanity/format/fmt010.py index d5145f8..0235bc4 100644 --- a/t4_devkit/sanity/format/fmt010.py +++ b/t4_devkit/sanity/format/fmt010.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import FieldTypeChecker -if TYPE_CHECKING: - pass __all__ = ["FMT010"] diff --git a/t4_devkit/sanity/format/fmt011.py b/t4_devkit/sanity/format/fmt011.py index 077732e..612b3de 100644 --- a/t4_devkit/sanity/format/fmt011.py +++ b/t4_devkit/sanity/format/fmt011.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import FieldTypeChecker -if TYPE_CHECKING: - pass __all__ = ["FMT011"] diff --git a/t4_devkit/sanity/format/fmt012.py b/t4_devkit/sanity/format/fmt012.py index 1083e3c..d98169f 100644 --- a/t4_devkit/sanity/format/fmt012.py +++ b/t4_devkit/sanity/format/fmt012.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import FieldTypeChecker -if TYPE_CHECKING: - pass __all__ = ["FMT012"] diff --git a/t4_devkit/sanity/format/fmt013.py b/t4_devkit/sanity/format/fmt013.py index 0c808c2..3318f72 100644 --- a/t4_devkit/sanity/format/fmt013.py +++ b/t4_devkit/sanity/format/fmt013.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import FieldTypeChecker -if TYPE_CHECKING: - pass __all__ = ["FMT013"] diff --git a/t4_devkit/sanity/format/fmt014.py b/t4_devkit/sanity/format/fmt014.py index a4d5986..85fa09c 100644 --- a/t4_devkit/sanity/format/fmt014.py +++ b/t4_devkit/sanity/format/fmt014.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import FieldTypeChecker -if TYPE_CHECKING: - pass __all__ = ["FMT014"] diff --git a/t4_devkit/sanity/format/fmt015.py b/t4_devkit/sanity/format/fmt015.py index d90e80e..02b61fb 100644 --- a/t4_devkit/sanity/format/fmt015.py +++ b/t4_devkit/sanity/format/fmt015.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import FieldTypeChecker -if TYPE_CHECKING: - pass __all__ = ["FMT015"] diff --git a/t4_devkit/sanity/format/fmt016.py b/t4_devkit/sanity/format/fmt016.py index 2014e6a..59bc770 100644 --- a/t4_devkit/sanity/format/fmt016.py +++ b/t4_devkit/sanity/format/fmt016.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import FieldTypeChecker -if TYPE_CHECKING: - pass __all__ = ["FMT016"] diff --git a/t4_devkit/sanity/format/fmt017.py b/t4_devkit/sanity/format/fmt017.py index e9dfac6..ea5727b 100644 --- a/t4_devkit/sanity/format/fmt017.py +++ b/t4_devkit/sanity/format/fmt017.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import FieldTypeChecker -if TYPE_CHECKING: - pass __all__ = ["FMT017"] diff --git a/t4_devkit/sanity/format/fmt018.py b/t4_devkit/sanity/format/fmt018.py index 0732ba1..6e4df61 100644 --- a/t4_devkit/sanity/format/fmt018.py +++ b/t4_devkit/sanity/format/fmt018.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import FieldTypeChecker -if TYPE_CHECKING: - pass __all__ = ["FMT018"] diff --git a/t4_devkit/sanity/record/base.py b/t4_devkit/sanity/record/base.py index 5ac936c..63ac573 100644 --- a/t4_devkit/sanity/record/base.py +++ b/t4_devkit/sanity/record/base.py @@ -37,4 +37,12 @@ def check(self, context: SanityContext) -> list[Reason]: @abstractmethod def check_count(self, records: list[dict]) -> list[Reason]: + """Check the count of records. + + Args: + records (list[dict]): The list of records to check. + + Returns: + A list of reasons for any issues found, otherwise an empty list. + """ pass diff --git a/t4_devkit/sanity/record/rec001.py b/t4_devkit/sanity/record/rec001.py index 306b54a..2923595 100644 --- a/t4_devkit/sanity/record/rec001.py +++ b/t4_devkit/sanity/record/rec001.py @@ -1,7 +1,5 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName @@ -9,8 +7,6 @@ from ..result import Reason from .base import RecordCountChecker -if TYPE_CHECKING: - pass __all__ = ["REC001"] diff --git a/t4_devkit/sanity/record/rec002.py b/t4_devkit/sanity/record/rec002.py index 5ec6cd5..0d802c9 100644 --- a/t4_devkit/sanity/record/rec002.py +++ b/t4_devkit/sanity/record/rec002.py @@ -1,7 +1,5 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName @@ -9,8 +7,6 @@ from ..result import Reason from .base import RecordCountChecker -if TYPE_CHECKING: - pass __all__ = ["REC002"] diff --git a/t4_devkit/sanity/record/rec003.py b/t4_devkit/sanity/record/rec003.py index d5c7a10..e11301b 100644 --- a/t4_devkit/sanity/record/rec003.py +++ b/t4_devkit/sanity/record/rec003.py @@ -1,7 +1,5 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName @@ -9,8 +7,6 @@ from ..result import Reason from .base import RecordCountChecker -if TYPE_CHECKING: - pass __all__ = ["REC003"] diff --git a/t4_devkit/sanity/record/rec004.py b/t4_devkit/sanity/record/rec004.py index 6cc3ea7..048fdfa 100644 --- a/t4_devkit/sanity/record/rec004.py +++ b/t4_devkit/sanity/record/rec004.py @@ -1,7 +1,5 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName @@ -9,8 +7,6 @@ from ..result import Reason from .base import RecordCountChecker -if TYPE_CHECKING: - pass __all__ = ["REC004"] diff --git a/t4_devkit/sanity/record/rec005.py b/t4_devkit/sanity/record/rec005.py index 38beee6..9464e44 100644 --- a/t4_devkit/sanity/record/rec005.py +++ b/t4_devkit/sanity/record/rec005.py @@ -1,7 +1,5 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName @@ -9,8 +7,6 @@ from ..result import Reason from .base import RecordCountChecker -if TYPE_CHECKING: - pass __all__ = ["REC005"] diff --git a/t4_devkit/sanity/record/rec006.py b/t4_devkit/sanity/record/rec006.py index 5d2d5a9..802d659 100644 --- a/t4_devkit/sanity/record/rec006.py +++ b/t4_devkit/sanity/record/rec006.py @@ -1,7 +1,5 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName @@ -9,8 +7,6 @@ from ..result import Reason from .base import RecordCountChecker -if TYPE_CHECKING: - pass __all__ = ["REC006"] diff --git a/t4_devkit/sanity/reference/base.py b/t4_devkit/sanity/reference/base.py index 008f140..cf1f849 100644 --- a/t4_devkit/sanity/reference/base.py +++ b/t4_devkit/sanity/reference/base.py @@ -56,4 +56,12 @@ def check(self, context: SanityContext) -> list[Reason]: ] def is_additional_condition_ok(self, record: dict[str, Any]) -> bool: + """Return True if the additional condition is met. + + Args: + record: The record to check. + + Returns: + True if the additional condition is met, False otherwise. + """ return True diff --git a/t4_devkit/sanity/reference/ref001.py b/t4_devkit/sanity/reference/ref001.py index d8a664e..7e1c0ea 100644 --- a/t4_devkit/sanity/reference/ref001.py +++ b/t4_devkit/sanity/reference/ref001.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import ReferenceChecker -if TYPE_CHECKING: - pass __all__ = ["REF001"] diff --git a/t4_devkit/sanity/reference/ref002.py b/t4_devkit/sanity/reference/ref002.py index f97f6e3..45e1ab7 100644 --- a/t4_devkit/sanity/reference/ref002.py +++ b/t4_devkit/sanity/reference/ref002.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import ReferenceChecker -if TYPE_CHECKING: - pass __all__ = ["REF002"] diff --git a/t4_devkit/sanity/reference/ref003.py b/t4_devkit/sanity/reference/ref003.py index 6d17399..8c395e9 100644 --- a/t4_devkit/sanity/reference/ref003.py +++ b/t4_devkit/sanity/reference/ref003.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import ReferenceChecker -if TYPE_CHECKING: - pass __all__ = ["REF003"] diff --git a/t4_devkit/sanity/reference/ref004.py b/t4_devkit/sanity/reference/ref004.py index 48b40e3..7206aba 100644 --- a/t4_devkit/sanity/reference/ref004.py +++ b/t4_devkit/sanity/reference/ref004.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import ReferenceChecker -if TYPE_CHECKING: - pass __all__ = ["REF004"] diff --git a/t4_devkit/sanity/reference/ref005.py b/t4_devkit/sanity/reference/ref005.py index 7fd6e9d..b9251b2 100644 --- a/t4_devkit/sanity/reference/ref005.py +++ b/t4_devkit/sanity/reference/ref005.py @@ -1,7 +1,5 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from typing_extensions import Any from t4_devkit.schema import SchemaName @@ -10,8 +8,6 @@ from ..registry import CHECKERS from .base import ReferenceChecker -if TYPE_CHECKING: - pass __all__ = ["REF005"] diff --git a/t4_devkit/sanity/reference/ref006.py b/t4_devkit/sanity/reference/ref006.py index daea372..708c7b5 100644 --- a/t4_devkit/sanity/reference/ref006.py +++ b/t4_devkit/sanity/reference/ref006.py @@ -1,16 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import ReferenceChecker -if TYPE_CHECKING: - pass - __all__ = ["REF006"] diff --git a/t4_devkit/sanity/reference/ref007.py b/t4_devkit/sanity/reference/ref007.py index 2e097ae..d23481e 100644 --- a/t4_devkit/sanity/reference/ref007.py +++ b/t4_devkit/sanity/reference/ref007.py @@ -1,16 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import ReferenceChecker -if TYPE_CHECKING: - pass - __all__ = ["REF007"] diff --git a/t4_devkit/sanity/reference/ref008.py b/t4_devkit/sanity/reference/ref008.py index f15ac99..da2b4ce 100644 --- a/t4_devkit/sanity/reference/ref008.py +++ b/t4_devkit/sanity/reference/ref008.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import ReferenceChecker -if TYPE_CHECKING: - pass __all__ = ["REF008"] diff --git a/t4_devkit/sanity/reference/ref009.py b/t4_devkit/sanity/reference/ref009.py index 846835a..15161e2 100644 --- a/t4_devkit/sanity/reference/ref009.py +++ b/t4_devkit/sanity/reference/ref009.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import ReferenceChecker -if TYPE_CHECKING: - pass __all__ = ["REF009"] diff --git a/t4_devkit/sanity/reference/ref010.py b/t4_devkit/sanity/reference/ref010.py index 4a38d92..d988b72 100644 --- a/t4_devkit/sanity/reference/ref010.py +++ b/t4_devkit/sanity/reference/ref010.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import ReferenceChecker -if TYPE_CHECKING: - pass __all__ = ["REF010"] diff --git a/t4_devkit/sanity/reference/ref011.py b/t4_devkit/sanity/reference/ref011.py index bdbbd40..b67d340 100644 --- a/t4_devkit/sanity/reference/ref011.py +++ b/t4_devkit/sanity/reference/ref011.py @@ -1,15 +1,11 @@ from __future__ import annotations -from typing import TYPE_CHECKING - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS from .base import ReferenceChecker -if TYPE_CHECKING: - pass __all__ = ["REF011"] From 4de416f02f44466af91fe4847eb8c7c9e05de944 Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Wed, 5 Nov 2025 18:14:36 +0900 Subject: [PATCH 14/21] feat: add file reference checker Signed-off-by: ktro2828 --- docs/schema/requirement.md | 2 ++ t4_devkit/sanity/reference/__init__.py | 2 ++ t4_devkit/sanity/reference/base.py | 17 +++++++++++ t4_devkit/sanity/reference/ref012.py | 40 +++++++++++++++++++++++++ t4_devkit/sanity/reference/ref013.py | 41 ++++++++++++++++++++++++++ 5 files changed, 102 insertions(+) create mode 100644 t4_devkit/sanity/reference/ref012.py create mode 100644 t4_devkit/sanity/reference/ref013.py diff --git a/docs/schema/requirement.md b/docs/schema/requirement.md index 99507d9..18185cd 100644 --- a/docs/schema/requirement.md +++ b/docs/schema/requirement.md @@ -40,6 +40,8 @@ | `REF009` | `instance-to-category` | `Error` | `Instance.category_token` refers to `Category` record. | | `REF010` | `instance-to-first-sample-annotation` | `Error` | `Instance.first_annotation_token` refers to `SampleAnnotation` record. | | `REF011` | `instance-to-last-sample-annotation` | `Error` | `Instance.last_annotation_token` refers to `SampleAnnotation` record. | +| `REF012` | `sample-data-filename-presence` | `Error` | `SampleData.filename` exists. | +| `REF013` | `sample-data-info-filename-presence` | `Error` | `SampleData.info_filename` exists if it is not `None`. | ## Format (`FMT`) diff --git a/t4_devkit/sanity/reference/__init__.py b/t4_devkit/sanity/reference/__init__.py index 15f583d..264c169 100644 --- a/t4_devkit/sanity/reference/__init__.py +++ b/t4_devkit/sanity/reference/__init__.py @@ -11,3 +11,5 @@ from .ref009 import * # noqa from .ref010 import * # noqa from .ref011 import * # noqa +from .ref012 import * # noqa +from .ref013 import * # noqa diff --git a/t4_devkit/sanity/reference/base.py b/t4_devkit/sanity/reference/base.py index cf1f849..27fc52f 100644 --- a/t4_devkit/sanity/reference/base.py +++ b/t4_devkit/sanity/reference/base.py @@ -65,3 +65,20 @@ def is_additional_condition_ok(self, record: dict[str, Any]) -> bool: True if the additional condition is met, False otherwise. """ return True + + +class FileReferenceChecker(Checker): + """Base class for file reference checkers. + + Attributes: + """ + + schema: SchemaName + + def can_skip(self, context: SanityContext) -> Maybe[Reason]: + filepath = context.to_schema_file(self.schema) + match filepath: + case Some(x): + return Nothing if x.exists() else Maybe.from_value(Reason(f"Missing {x}")) + case _: + return Maybe.from_value(Reason("Missing 'annotation' directory path")) diff --git a/t4_devkit/sanity/reference/ref012.py b/t4_devkit/sanity/reference/ref012.py new file mode 100644 index 0000000..7387f61 --- /dev/null +++ b/t4_devkit/sanity/reference/ref012.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from t4_devkit.schema import SchemaName + +from ..checker import RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .base import FileReferenceChecker + +if TYPE_CHECKING: + from ..context import SanityContext + + +@CHECKERS.register(RuleID("REF012")) +class REF012(FileReferenceChecker): + """A checker of REF012.""" + + name = RuleName("sample-data-filename-presence") + description = "'SampleData.filename' exists." + + schema = SchemaName.SAMPLE_DATA + + def check(self, context: SanityContext) -> list[Reason]: + filepath = context.to_schema_file(self.schema).unwrap() + records = load_json_safe(filepath).unwrap() + match context.data_root: + case Some(x): + reasons = [ + Reason(f"File not found: {record['filename']}") + for record in records + if not x.joinpath(record["filename"]).exists() + ] + return reasons + case _: + return [Reason("Missing sensor data directory.")] diff --git a/t4_devkit/sanity/reference/ref013.py b/t4_devkit/sanity/reference/ref013.py new file mode 100644 index 0000000..741466c --- /dev/null +++ b/t4_devkit/sanity/reference/ref013.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from returns.maybe import Some + +from t4_devkit.schema import SchemaName + +from ..checker import RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .base import FileReferenceChecker + +if TYPE_CHECKING: + from ..context import SanityContext + + +@CHECKERS.register(RuleID("REF013")) +class REF013(FileReferenceChecker): + """A checker of REF013.""" + + name = RuleName("sample-data-info-filename-presence") + description = "'SampleData.info_filename' exists." + + schema = SchemaName.SAMPLE_DATA + + def check(self, context: SanityContext) -> list[Reason]: + filepath = context.to_schema_file(self.schema).unwrap() + records = load_json_safe(filepath).unwrap() + match context.data_root: + case Some(x): + reasons = [ + Reason(f"File not found: {record['info_filename']}") + for record in records + if record.get("info_filename") is not None + and not x.joinpath(record["info_filename"]).exists() + ] + return reasons + case _: + return [Reason("Missing sensor data directory.")] From c35b1208fcd190fa47b6d1600c1eb068dc46a42e Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Wed, 5 Nov 2025 18:19:15 +0900 Subject: [PATCH 15/21] refactor: rename ReferenceChecker to RecordReferenceChecker Signed-off-by: ktro2828 --- t4_devkit/sanity/reference/base.py | 6 +++--- t4_devkit/sanity/reference/ref001.py | 4 ++-- t4_devkit/sanity/reference/ref002.py | 4 ++-- t4_devkit/sanity/reference/ref003.py | 4 ++-- t4_devkit/sanity/reference/ref004.py | 4 ++-- t4_devkit/sanity/reference/ref005.py | 4 ++-- t4_devkit/sanity/reference/ref006.py | 4 ++-- t4_devkit/sanity/reference/ref007.py | 4 ++-- t4_devkit/sanity/reference/ref008.py | 4 ++-- t4_devkit/sanity/reference/ref009.py | 4 ++-- t4_devkit/sanity/reference/ref010.py | 4 ++-- t4_devkit/sanity/reference/ref011.py | 4 ++-- 12 files changed, 25 insertions(+), 25 deletions(-) diff --git a/t4_devkit/sanity/reference/base.py b/t4_devkit/sanity/reference/base.py index 27fc52f..04d4d1c 100644 --- a/t4_devkit/sanity/reference/base.py +++ b/t4_devkit/sanity/reference/base.py @@ -14,13 +14,13 @@ from ..context import SanityContext -class ReferenceChecker(Checker): - """Base class for reference checkers. +class RecordReferenceChecker(Checker): + """Base class for record reference checkers. Attributes: source (SchemaName): The source schema name. target (SchemaName): The target schema name. - reference (str): The reference key in the source record. + reference (str): The reference token name in the source record. """ source: SchemaName diff --git a/t4_devkit/sanity/reference/ref001.py b/t4_devkit/sanity/reference/ref001.py index 7e1c0ea..dcce036 100644 --- a/t4_devkit/sanity/reference/ref001.py +++ b/t4_devkit/sanity/reference/ref001.py @@ -4,14 +4,14 @@ from ..checker import RuleID, RuleName from ..registry import CHECKERS -from .base import ReferenceChecker +from .base import RecordReferenceChecker __all__ = ["REF001"] @CHECKERS.register(RuleID("REF001")) -class REF001(ReferenceChecker): +class REF001(RecordReferenceChecker): """A checker of REF001.""" name = RuleName("scene-to-log") diff --git a/t4_devkit/sanity/reference/ref002.py b/t4_devkit/sanity/reference/ref002.py index 45e1ab7..e4fc05f 100644 --- a/t4_devkit/sanity/reference/ref002.py +++ b/t4_devkit/sanity/reference/ref002.py @@ -4,14 +4,14 @@ from ..checker import RuleID, RuleName from ..registry import CHECKERS -from .base import ReferenceChecker +from .base import RecordReferenceChecker __all__ = ["REF002"] @CHECKERS.register(RuleID("REF002")) -class REF002(ReferenceChecker): +class REF002(RecordReferenceChecker): """A checker of REF002.""" name = RuleName("scene-to-first-sample") diff --git a/t4_devkit/sanity/reference/ref003.py b/t4_devkit/sanity/reference/ref003.py index 8c395e9..1a16684 100644 --- a/t4_devkit/sanity/reference/ref003.py +++ b/t4_devkit/sanity/reference/ref003.py @@ -4,14 +4,14 @@ from ..checker import RuleID, RuleName from ..registry import CHECKERS -from .base import ReferenceChecker +from .base import RecordReferenceChecker __all__ = ["REF003"] @CHECKERS.register(RuleID("REF003")) -class REF003(ReferenceChecker): +class REF003(RecordReferenceChecker): """A checker of REF003.""" name = RuleName("scene-to-last-sample") diff --git a/t4_devkit/sanity/reference/ref004.py b/t4_devkit/sanity/reference/ref004.py index 7206aba..1176553 100644 --- a/t4_devkit/sanity/reference/ref004.py +++ b/t4_devkit/sanity/reference/ref004.py @@ -4,14 +4,14 @@ from ..checker import RuleID, RuleName from ..registry import CHECKERS -from .base import ReferenceChecker +from .base import RecordReferenceChecker __all__ = ["REF004"] @CHECKERS.register(RuleID("REF004")) -class REF004(ReferenceChecker): +class REF004(RecordReferenceChecker): """A checker of REF004.""" name = RuleName("sample-to-scene") diff --git a/t4_devkit/sanity/reference/ref005.py b/t4_devkit/sanity/reference/ref005.py index b9251b2..6bc4c40 100644 --- a/t4_devkit/sanity/reference/ref005.py +++ b/t4_devkit/sanity/reference/ref005.py @@ -6,14 +6,14 @@ from ..checker import RuleID, RuleName from ..registry import CHECKERS -from .base import ReferenceChecker +from .base import RecordReferenceChecker __all__ = ["REF005"] @CHECKERS.register(RuleID("REF005")) -class REF005(ReferenceChecker): +class REF005(RecordReferenceChecker): """A checker of REF005.""" name = RuleName("sample-data-to-sample") diff --git a/t4_devkit/sanity/reference/ref006.py b/t4_devkit/sanity/reference/ref006.py index 708c7b5..f16b5f3 100644 --- a/t4_devkit/sanity/reference/ref006.py +++ b/t4_devkit/sanity/reference/ref006.py @@ -4,13 +4,13 @@ from ..checker import RuleID, RuleName from ..registry import CHECKERS -from .base import ReferenceChecker +from .base import RecordReferenceChecker __all__ = ["REF006"] @CHECKERS.register(RuleID("REF006")) -class REF006(ReferenceChecker): +class REF006(RecordReferenceChecker): """A checker of REF006.""" name = RuleName("sample-data-to-ego-pose") diff --git a/t4_devkit/sanity/reference/ref007.py b/t4_devkit/sanity/reference/ref007.py index d23481e..35b2e08 100644 --- a/t4_devkit/sanity/reference/ref007.py +++ b/t4_devkit/sanity/reference/ref007.py @@ -4,13 +4,13 @@ from ..checker import RuleID, RuleName from ..registry import CHECKERS -from .base import ReferenceChecker +from .base import RecordReferenceChecker __all__ = ["REF007"] @CHECKERS.register(RuleID("REF007")) -class REF007(ReferenceChecker): +class REF007(RecordReferenceChecker): """A checker of REF007.""" name = RuleName("sample-data-to-calibrated-sensor") diff --git a/t4_devkit/sanity/reference/ref008.py b/t4_devkit/sanity/reference/ref008.py index da2b4ce..da3e969 100644 --- a/t4_devkit/sanity/reference/ref008.py +++ b/t4_devkit/sanity/reference/ref008.py @@ -4,14 +4,14 @@ from ..checker import RuleID, RuleName from ..registry import CHECKERS -from .base import ReferenceChecker +from .base import RecordReferenceChecker __all__ = ["REF008"] @CHECKERS.register(RuleID("REF008")) -class REF008(ReferenceChecker): +class REF008(RecordReferenceChecker): """A checker of REF008.""" name = RuleName("calibrated-sensor-to-sensor") diff --git a/t4_devkit/sanity/reference/ref009.py b/t4_devkit/sanity/reference/ref009.py index 15161e2..cb1218f 100644 --- a/t4_devkit/sanity/reference/ref009.py +++ b/t4_devkit/sanity/reference/ref009.py @@ -4,14 +4,14 @@ from ..checker import RuleID, RuleName from ..registry import CHECKERS -from .base import ReferenceChecker +from .base import RecordReferenceChecker __all__ = ["REF009"] @CHECKERS.register(RuleID("REF009")) -class REF009(ReferenceChecker): +class REF009(RecordReferenceChecker): """A checker of REF009.""" name = RuleName("instance-to-category") diff --git a/t4_devkit/sanity/reference/ref010.py b/t4_devkit/sanity/reference/ref010.py index d988b72..7ce58cb 100644 --- a/t4_devkit/sanity/reference/ref010.py +++ b/t4_devkit/sanity/reference/ref010.py @@ -4,14 +4,14 @@ from ..checker import RuleID, RuleName from ..registry import CHECKERS -from .base import ReferenceChecker +from .base import RecordReferenceChecker __all__ = ["REF010"] @CHECKERS.register(RuleID("REF010")) -class REF010(ReferenceChecker): +class REF010(RecordReferenceChecker): """A checker of REF010.""" name = RuleName("instance-to-first-sample-annotation") diff --git a/t4_devkit/sanity/reference/ref011.py b/t4_devkit/sanity/reference/ref011.py index b67d340..a374a35 100644 --- a/t4_devkit/sanity/reference/ref011.py +++ b/t4_devkit/sanity/reference/ref011.py @@ -4,14 +4,14 @@ from ..checker import RuleID, RuleName from ..registry import CHECKERS -from .base import ReferenceChecker +from .base import RecordReferenceChecker __all__ = ["REF011"] @CHECKERS.register(RuleID("REF011")) -class REF011(ReferenceChecker): +class REF011(RecordReferenceChecker): """A checker of REF011.""" name = RuleName("instance-to-last-sample-annotation") From 4afa45768d8da5d9d23d3c57bd3195ff10431597 Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Thu, 6 Nov 2025 23:12:33 +0900 Subject: [PATCH 16/21] feat: t4sanity performs sanity checking only a single dataset Signed-off-by: ktro2828 --- docs/cli/t4sanity.md | 65 +++++++++++++++++------------------------ t4_devkit/cli/sanity.py | 62 ++++++++++++++------------------------- 2 files changed, 48 insertions(+), 79 deletions(-) diff --git a/docs/cli/t4sanity.md b/docs/cli/t4sanity.md index b32e5dd..173548a 100644 --- a/docs/cli/t4sanity.md +++ b/docs/cli/t4sanity.md @@ -1,5 +1,4 @@ -`t4sanity` performs sanity checks on T4 datasets, reporting any issues in a structured format. -It checks the dataset directories and versions, tries to load them using the `Tier4` library, and reports any exceptions or warnings. +`t4sanity` performs sanity checks on T4 datasets, reporting any issues regarding the [dataset requirements](../../schema/requirement.md). ```shell $ t4sanity -h @@ -7,7 +6,7 @@ $ t4sanity -h Usage: t4sanity [OPTIONS] DB_PARENT ╭─ Arguments ───────────────────────────────────────────────────────────────────────────────────────────────────────────╮ -│ * db_parent TEXT Path to parent directory of the databases. [required] │ +│ * data_root TEXT Path to root directory of a dataset. [default: None] [required] │ ╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ─────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ │ --version -v Show the application version and exit. │ @@ -35,23 +34,17 @@ t4sanity --install-completion As an example, we have the following the dataset structure: ```shell - -├── dataset1 -│ └── -│ ├── annotation -│ ├── data -| ... -├── dataset2 -│ ├── annotation -│ ├── data -| ... -... + +├── +│ ├── annotation +│ ├── data +| ... ``` -Then, you can run sanity checks with `t4sanity `: +Then, you can run sanity checks with `t4sanity `: ```shell -$ t4sanity +$ t4sanity >>>Sanity checking...: 1it [00:00, 9.70it/s] @@ -61,14 +54,12 @@ $ t4sanity +-----------+---------+---------+-------+---------+----------+-------+ | dataset1 | 0 | SUCCESS | 44 | 44 | 0 | 0 | +-----------+---------+---------+-------+---------+----------+-------+ -| dataset2 | 0 | SUCCESS | 44 | 44 | 0 | 0 | -+-----------+---------+---------+-------+---------+----------+-------+ ``` Also, `-d; --detail` option helps us to display detailed information about each check: ```shell -$ t4sanity -d +$ t4sanity -d >>>Sanity checking...: 1it [00:00, 9.70it/s] @@ -89,8 +80,6 @@ $ t4sanity -d +-----------+---------+---------+-------+---------+----------+-------+ | dataset1 | 0 | SUCCESS | 44 | 44 | 0 | 0 | +-----------+---------+---------+-------+---------+----------+-------+ -| dataset2 | 0 | SUCCESS | 44 | 44 | 0 | 0 | -+-----------+---------+---------+-------+---------+----------+-------+ ``` ### Dump Results as JSON @@ -98,27 +87,25 @@ $ t4sanity -d To dump results into JSON, use the `-o; --output` option: ```shell -t4sanity -o results.json +t4sanity -o result.json ``` -Then a JSON file named `results.json` will be generated as follows: +Then a JSON file named `result.json` will be generated as follows: ```json -[ - { - "dataset_id": "", - "version": , - "reports": [ - { - "id": "", - "name": "", - "description": "", - "status": "", - "reasons": "<[, , ...]: [str; N] | null>" // Failure or skipped reasons, null if success - }, - ] - } -] +{ + "dataset_id": "", + "version": , + "reports": [ + { + "id": "", + "name": "", + "description": "", + "status": "", + "reasons": "<[, , ...]: [str; N] | null>" // Failure or skipped reasons, null if success + }, + ] +} ``` ### Exclude Checks @@ -127,5 +114,5 @@ With `-e; --excludes` option enables us to exclude specific checks by specifying ```shell # Exclude STR001 and all FMT-relevant rules -t4sanity -e STR001 -e FMT +t4sanity -e STR001 -e FMT ``` diff --git a/t4_devkit/cli/sanity.py b/t4_devkit/cli/sanity.py index 129bf6f..71a0f06 100644 --- a/t4_devkit/cli/sanity.py +++ b/t4_devkit/cli/sanity.py @@ -1,13 +1,10 @@ from __future__ import annotations -from pathlib import Path - import typer from tabulate import tabulate -from tqdm import tqdm from t4_devkit.common.io import save_json -from t4_devkit.common.serialize import serialize_dataclasses +from t4_devkit.common.serialize import serialize_dataclass from t4_devkit.sanity import SanityResult, sanity_check from .version import version_callback @@ -20,39 +17,24 @@ ) -def _run_sanity_check( - db_parent: str, - *, - revision: str | None = None, - excludes: list[str] | None = None, - include_warning: bool = False, -) -> list[SanityResult]: - return [ - sanity_check(db_root, revision=revision, excludes=excludes, include_warning=include_warning) - for db_root in tqdm(Path(db_parent).glob("*"), desc=">>>Sanity checking...") +def _print_table(result: SanityResult, *, detail: bool = False) -> str: + success = sum(1 for rp in result.reports if rp.is_success()) + failures = sum(1 for rp in result.reports if rp.is_failure()) + skips = sum(1 for rp in result.reports if rp.is_skipped()) + summary_rows = [ + [ + result.dataset_id, + result.version, + "\033[31mFAILURE\033[0m" if failures > 0 else "\033[32mSUCCESS\033[0m", + len(result.reports), + success, + failures, + skips, + ] ] - -def _print_table(results: list[SanityResult], *, detail: bool = False) -> str: - summary_rows = [] - for result in results: - success = sum(1 for rp in result.reports if rp.is_success()) - failures = sum(1 for rp in result.reports if rp.is_failure()) - skips = sum(1 for rp in result.reports if rp.is_skipped()) - summary_rows.append( - [ - result.dataset_id, - result.version, - "\033[31mFAILURE\033[0m" if failures > 0 else "\033[32mSUCCESS\033[0m", - len(result.reports), - success, - failures, - skips, - ] - ) - - if detail: - print(result) + if detail: + print(result) print(f"\n{'=' * 40} Summary {'=' * 40}") print( @@ -74,7 +56,7 @@ def main( callback=version_callback, is_eager=True, ), - db_parent: str = typer.Argument(..., help="Path to parent directory of the databases."), + data_root: str = typer.Argument(..., help="Path to root directory of a dataset."), output: str | None = typer.Option(None, "-o", "--output", help="Path to output JSON file."), revision: str | None = typer.Option( None, "-rv", "--revision", help="Specify if you want to check the specific version." @@ -89,15 +71,15 @@ def main( False, "-d", "--detail", help="Indicates whether to display detailed reports." ), ) -> None: - results = _run_sanity_check( - db_parent, + result = sanity_check( + data_root=data_root, revision=revision, excludes=excludes, include_warning=include_warning, ) - _print_table(results, detail=detail) + _print_table(result, detail=detail) if output: - serialized = serialize_dataclasses(results) + serialized = serialize_dataclass(result) save_json(serialized, output) From 3be04d7e631aa3597469b59c6d041e1fbc8b8997 Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Thu, 6 Nov 2025 23:50:08 +0900 Subject: [PATCH 17/21] feat: add lidarseg-relevant reference checkers Signed-off-by: ktro2828 --- docs/schema/requirement.md | 6 +++-- t4_devkit/sanity/reference/__init__.py | 2 ++ t4_devkit/sanity/reference/ref012.py | 37 +++++++------------------- t4_devkit/sanity/reference/ref013.py | 27 ++++++++----------- t4_devkit/sanity/reference/ref014.py | 37 ++++++++++++++++++++++++++ t4_devkit/sanity/reference/ref015.py | 36 +++++++++++++++++++++++++ 6 files changed, 99 insertions(+), 46 deletions(-) create mode 100644 t4_devkit/sanity/reference/ref014.py create mode 100644 t4_devkit/sanity/reference/ref015.py diff --git a/docs/schema/requirement.md b/docs/schema/requirement.md index 18185cd..9988179 100644 --- a/docs/schema/requirement.md +++ b/docs/schema/requirement.md @@ -40,8 +40,10 @@ | `REF009` | `instance-to-category` | `Error` | `Instance.category_token` refers to `Category` record. | | `REF010` | `instance-to-first-sample-annotation` | `Error` | `Instance.first_annotation_token` refers to `SampleAnnotation` record. | | `REF011` | `instance-to-last-sample-annotation` | `Error` | `Instance.last_annotation_token` refers to `SampleAnnotation` record. | -| `REF012` | `sample-data-filename-presence` | `Error` | `SampleData.filename` exists. | -| `REF013` | `sample-data-info-filename-presence` | `Error` | `SampleData.info_filename` exists if it is not `None`. | +| `REF012` | `lidarseg-to-sample-data` | `Error` | `LidarSeg.sample_data_token` refers to `SampleData` record. | +| `REF013` | `sample-data-filename-presence` | `Error` | `SampleData.filename` exists. | +| `REF014` | `sample-data-info-filename-presence` | `Error` | `SampleData.info_filename` exists if it is not `None`. | +| `REF015` | `lidarseg-filename-presence` | `Error` | `LidarSeg.filename` exists if `lidarseg.json` exists. | ## Format (`FMT`) diff --git a/t4_devkit/sanity/reference/__init__.py b/t4_devkit/sanity/reference/__init__.py index 264c169..4aefae1 100644 --- a/t4_devkit/sanity/reference/__init__.py +++ b/t4_devkit/sanity/reference/__init__.py @@ -13,3 +13,5 @@ from .ref011 import * # noqa from .ref012 import * # noqa from .ref013 import * # noqa +from .ref014 import * # noqa +from .ref015 import * # noqa diff --git a/t4_devkit/sanity/reference/ref012.py b/t4_devkit/sanity/reference/ref012.py index 7387f61..f786993 100644 --- a/t4_devkit/sanity/reference/ref012.py +++ b/t4_devkit/sanity/reference/ref012.py @@ -1,40 +1,21 @@ from __future__ import annotations -from typing import TYPE_CHECKING - -from returns.maybe import Some - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName from ..registry import CHECKERS -from ..result import Reason -from ..safety import load_json_safe -from .base import FileReferenceChecker +from .base import RecordReferenceChecker + -if TYPE_CHECKING: - from ..context import SanityContext +__all__ = ["REF012"] @CHECKERS.register(RuleID("REF012")) -class REF012(FileReferenceChecker): +class REF012(RecordReferenceChecker): """A checker of REF012.""" - name = RuleName("sample-data-filename-presence") - description = "'SampleData.filename' exists." - - schema = SchemaName.SAMPLE_DATA - - def check(self, context: SanityContext) -> list[Reason]: - filepath = context.to_schema_file(self.schema).unwrap() - records = load_json_safe(filepath).unwrap() - match context.data_root: - case Some(x): - reasons = [ - Reason(f"File not found: {record['filename']}") - for record in records - if not x.joinpath(record["filename"]).exists() - ] - return reasons - case _: - return [Reason("Missing sensor data directory.")] + name = RuleName("lidarset-to-sample-data") + description = "'LidarSeg.sample_data_token' refers to 'SampleData' record." + source = SchemaName.LIDARSEG + target = SchemaName.SAMPLE_DATA + reference = "sample_data_token" diff --git a/t4_devkit/sanity/reference/ref013.py b/t4_devkit/sanity/reference/ref013.py index 741466c..dd5b2e1 100644 --- a/t4_devkit/sanity/reference/ref013.py +++ b/t4_devkit/sanity/reference/ref013.py @@ -2,8 +2,6 @@ from typing import TYPE_CHECKING -from returns.maybe import Some - from t4_devkit.schema import SchemaName from ..checker import RuleID, RuleName @@ -16,26 +14,23 @@ from ..context import SanityContext +__all__ = ["REF013"] + + @CHECKERS.register(RuleID("REF013")) class REF013(FileReferenceChecker): """A checker of REF013.""" - name = RuleName("sample-data-info-filename-presence") - description = "'SampleData.info_filename' exists." - + name = RuleName("sample-data-filename-presence") + description = "'SampleData.filename' exists." schema = SchemaName.SAMPLE_DATA def check(self, context: SanityContext) -> list[Reason]: filepath = context.to_schema_file(self.schema).unwrap() records = load_json_safe(filepath).unwrap() - match context.data_root: - case Some(x): - reasons = [ - Reason(f"File not found: {record['info_filename']}") - for record in records - if record.get("info_filename") is not None - and not x.joinpath(record["info_filename"]).exists() - ] - return reasons - case _: - return [Reason("Missing sensor data directory.")] + data_root = context.data_root.unwrap() + return [ + Reason(f"File not found: {record['filename']}") + for record in records + if not data_root.joinpath(record["filename"]).exists() + ] diff --git a/t4_devkit/sanity/reference/ref014.py b/t4_devkit/sanity/reference/ref014.py new file mode 100644 index 0000000..227e7f1 --- /dev/null +++ b/t4_devkit/sanity/reference/ref014.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from t4_devkit.schema import SchemaName + +from ..checker import RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .base import FileReferenceChecker + +if TYPE_CHECKING: + from ..context import SanityContext + + +__all__ = ["REF014"] + + +@CHECKERS.register(RuleID("REF014")) +class REF014(FileReferenceChecker): + """A checker of REF014.""" + + name = RuleName("sample-data-filename-presence") + description = "'SampleData.filename' exists." + schema = SchemaName.SAMPLE_DATA + + def check(self, context: SanityContext) -> list[Reason]: + filepath = context.to_schema_file(self.schema).unwrap() + records = load_json_safe(filepath).unwrap() + data_root = context.data_root.unwrap() + return [ + Reason(f"File not found: {record['info_filename']}") + for record in records + if record.get("info_filename") is not None + and not data_root.joinpath(record["info_filename"]).exists() + ] diff --git a/t4_devkit/sanity/reference/ref015.py b/t4_devkit/sanity/reference/ref015.py new file mode 100644 index 0000000..2bfaf9b --- /dev/null +++ b/t4_devkit/sanity/reference/ref015.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from t4_devkit.schema import SchemaName + +from ..checker import RuleID, RuleName +from ..registry import CHECKERS +from ..result import Reason +from ..safety import load_json_safe +from .base import FileReferenceChecker + +if TYPE_CHECKING: + from ..context import SanityContext + + +__all__ = ["REF015"] + + +@CHECKERS.register(RuleID("REF015")) +class REF015(FileReferenceChecker): + """A checker of REF015.""" + + name = RuleName("lidarseg-filename-presence") + description = "'LidarSeg.filename' exists." + schema = SchemaName.LIDARSEG + + def check(self, context: SanityContext) -> list[Reason]: + filepath = context.to_schema_file(self.schema).unwrap() + records = load_json_safe(filepath).unwrap() + data_root = context.data_root.unwrap() + return [ + Reason(f"File not found: {record['filename']}") + for record in records + if not data_root.joinpath(record["filename"]).exists() + ] From 0b48f0cbea86877e56d658046c0ac27c8bf068b1 Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Fri, 7 Nov 2025 00:08:51 +0900 Subject: [PATCH 18/21] style: add docsrings for class varitables Signed-off-by: ktro2828 --- t4_devkit/sanity/format/base.py | 8 +++++++- t4_devkit/sanity/record/base.py | 8 +++++++- t4_devkit/sanity/reference/base.py | 5 +++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/t4_devkit/sanity/format/base.py b/t4_devkit/sanity/format/base.py index 2421f03..3670eb1 100644 --- a/t4_devkit/sanity/format/base.py +++ b/t4_devkit/sanity/format/base.py @@ -17,7 +17,13 @@ class FieldTypeChecker(Checker): - """Base class for format checkers.""" + """Base class for format checkers. + + Attributes: + name (RuleName): The name of the rule. + description (str): The description of the rule. + schema (SchemaName): The schema name to check. + """ schema: SchemaName diff --git a/t4_devkit/sanity/record/base.py b/t4_devkit/sanity/record/base.py index 63ac573..e172355 100644 --- a/t4_devkit/sanity/record/base.py +++ b/t4_devkit/sanity/record/base.py @@ -16,7 +16,13 @@ class RecordCountChecker(Checker): - """Base class for record count checkers.""" + """Base class for record count checkers. + + Attributes: + name (RuleName): The name of the rule. + description (str): The description of the rule. + schema (SchemaName): The schema name to check. + """ schema: SchemaName diff --git a/t4_devkit/sanity/reference/base.py b/t4_devkit/sanity/reference/base.py index 04d4d1c..f7a8ea4 100644 --- a/t4_devkit/sanity/reference/base.py +++ b/t4_devkit/sanity/reference/base.py @@ -18,6 +18,8 @@ class RecordReferenceChecker(Checker): """Base class for record reference checkers. Attributes: + name (RuleName): The name of the rule. + description (str): The description of the rule. source (SchemaName): The source schema name. target (SchemaName): The target schema name. reference (str): The reference token name in the source record. @@ -71,6 +73,9 @@ class FileReferenceChecker(Checker): """Base class for file reference checkers. Attributes: + name (RuleName): The name of the rule. + description (str): The description of the rule. + schema (SchemaName): The schema name to check. """ schema: SchemaName From 8cc4c7b8bf8d5697bbc4e1500e486596ec0f7c90 Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Fri, 7 Nov 2025 04:40:09 +0900 Subject: [PATCH 19/21] refactor: always print detailed results Signed-off-by: ktro2828 --- docs/cli/t4sanity.md | 17 ----------------- t4_devkit/cli/sanity.py | 37 ++----------------------------------- t4_devkit/sanity/result.py | 37 ++++++++++++++++++++++++++++++++++++- 3 files changed, 38 insertions(+), 53 deletions(-) diff --git a/docs/cli/t4sanity.md b/docs/cli/t4sanity.md index 173548a..98d363c 100644 --- a/docs/cli/t4sanity.md +++ b/docs/cli/t4sanity.md @@ -14,7 +14,6 @@ $ t4sanity -h │ --revision -rv TEXT Specify if you want to check the specific version. │ │ --exclude -e TEXT Exclude specific rules or rule groups. │ │ --include-warning -iw Indicates whether to report any warnings. │ -│ --detail -d Indicates whether to display detailed reports. │ │ --install-completion Install completion for the current shell. │ │ --show-completion Show completion for the current shell, to copy it or customize the installation. │ │ --help -h Show this message and exit. │ @@ -48,21 +47,6 @@ $ t4sanity >>>Sanity checking...: 1it [00:00, 9.70it/s] -============================= Summary ============================= -+-----------+---------+---------+-------+---------+----------+-------+ -| DatasetID | Version | Status | Rules | Success | Failures | Skips | -+-----------+---------+---------+-------+---------+----------+-------+ -| dataset1 | 0 | SUCCESS | 44 | 44 | 0 | 0 | -+-----------+---------+---------+-------+---------+----------+-------+ -``` - -Also, `-d; --detail` option helps us to display detailed information about each check: - -```shell -$ t4sanity -d - ->>>Sanity checking...: 1it [00:00, 9.70it/s] - === DatasetID: dataset1 === STR001: ✅ STR002: ✅ @@ -74,7 +58,6 @@ $ t4sanity -d STR008: ✅ ... -============================= Summary ============================= +-----------+---------+---------+-------+---------+----------+-------+ | DatasetID | Version | Status | Rules | Success | Failures | Skips | +-----------+---------+---------+-------+---------+----------+-------+ diff --git a/t4_devkit/cli/sanity.py b/t4_devkit/cli/sanity.py index 71a0f06..63847c5 100644 --- a/t4_devkit/cli/sanity.py +++ b/t4_devkit/cli/sanity.py @@ -1,11 +1,10 @@ from __future__ import annotations import typer -from tabulate import tabulate from t4_devkit.common.io import save_json from t4_devkit.common.serialize import serialize_dataclass -from t4_devkit.sanity import SanityResult, sanity_check +from t4_devkit.sanity import print_sanity_result, sanity_check from .version import version_callback @@ -17,35 +16,6 @@ ) -def _print_table(result: SanityResult, *, detail: bool = False) -> str: - success = sum(1 for rp in result.reports if rp.is_success()) - failures = sum(1 for rp in result.reports if rp.is_failure()) - skips = sum(1 for rp in result.reports if rp.is_skipped()) - summary_rows = [ - [ - result.dataset_id, - result.version, - "\033[31mFAILURE\033[0m" if failures > 0 else "\033[32mSUCCESS\033[0m", - len(result.reports), - success, - failures, - skips, - ] - ] - - if detail: - print(result) - - print(f"\n{'=' * 40} Summary {'=' * 40}") - print( - tabulate( - summary_rows, - headers=["DatasetID", "Version", "Status", "Rules", "Success", "Failures", "Skips"], - tablefmt="pretty", - ), - ) - - @cli.command() def main( version: bool = typer.Option( @@ -67,9 +37,6 @@ def main( include_warning: bool = typer.Option( False, "-iw", "--include-warning", help="Indicates whether to report any warnings." ), - detail: bool = typer.Option( - False, "-d", "--detail", help="Indicates whether to display detailed reports." - ), ) -> None: result = sanity_check( data_root=data_root, @@ -78,7 +45,7 @@ def main( include_warning=include_warning, ) - _print_table(result, detail=detail) + print_sanity_result(result) if output: serialized = serialize_dataclass(result) diff --git a/t4_devkit/sanity/result.py b/t4_devkit/sanity/result.py index e1f6f0e..7bd7cae 100644 --- a/t4_devkit/sanity/result.py +++ b/t4_devkit/sanity/result.py @@ -4,13 +4,14 @@ from typing import TYPE_CHECKING, NewType from attrs import define, field +from tabulate import tabulate from typing_extensions import Self if TYPE_CHECKING: from .checker import RuleID, RuleName from .context import SanityContext -__all__ = ["Status", "Report", "SanityResult"] +__all__ = ["Status", "Report", "SanityResult", "print_sanity_result"] class Status(str, Enum): @@ -98,3 +99,37 @@ def __repr__(self) -> str: else: string += f"\033[32m {report.id}: ✅\033[0m\n" return string + + +def print_sanity_result(result: SanityResult) -> None: + """Print detailed and summary results of a sanity check. + + Args: + result (SanityResult): The result of a sanity check. + """ + # print detailed result + print(result) + + # print summary result + success = sum(1 for rp in result.reports if rp.is_success()) + failures = sum(1 for rp in result.reports if rp.is_failure()) + skips = sum(1 for rp in result.reports if rp.is_skipped()) + summary_rows = [ + [ + result.dataset_id, + result.version, + "\033[31mFAILURE\033[0m" if failures > 0 else "\033[32mSUCCESS\033[0m", + len(result.reports), + success, + failures, + skips, + ] + ] + + print( + tabulate( + summary_rows, + headers=["DatasetID", "Version", "Status", "Rules", "Success", "Failures", "Skips"], + tablefmt="pretty", + ), + ) From ae621300445cac97284622fcc078fa3e5c16e616 Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Fri, 7 Nov 2025 04:44:08 +0900 Subject: [PATCH 20/21] style: add docstrings Signed-off-by: ktro2828 --- t4_devkit/sanity/result.py | 32 ++++++++++++++++++++++++++++++++ t4_devkit/sanity/run.py | 11 +++++++++++ 2 files changed, 43 insertions(+) diff --git a/t4_devkit/sanity/result.py b/t4_devkit/sanity/result.py index 7bd7cae..e29d0b1 100644 --- a/t4_devkit/sanity/result.py +++ b/t4_devkit/sanity/result.py @@ -15,17 +15,22 @@ class Status(str, Enum): + """Status of a report.""" + SUCCESS = "SUCCESS" FAILURE = "FAILURE" SKIPPED = "SKIPPED" def is_success(self) -> bool: + """Check if the status is success.""" return self == Status.SUCCESS def is_failure(self) -> bool: + """Check if the status is failure.""" return self == Status.FAILURE def is_skipped(self) -> bool: + """Check if the status is skipped.""" return self == Status.SKIPPED @@ -34,6 +39,16 @@ def is_skipped(self) -> bool: @define class Report: + """A report for a rule. + + Attributes: + id (RuleID): The ID of the rule. + name (RuleName): The name of the rule. + description (str): The description of the rule. + status (Status): The status of the report. + reasons (list[Reason] | None): The list of reasons for the report if the report is a failure or skipped. + """ + id: RuleID name: RuleName description: str @@ -73,12 +88,29 @@ def make_failure(id: RuleID, name: RuleName, description: str, reasons: list[Rea @define class SanityResult: + """The result of a Sanity check. + + Attributes: + dataset_id (str): The ID of the dataset. + version (str | None): The version of the dataset. + reports (list[Report]): The list of reports. + """ + dataset_id: str version: str | None reports: list[Report] @classmethod def from_context(cls, context: SanityContext, reports: list[Report]) -> Self: + """Create a SanityResult from a SanityContext and a list of reports. + + Args: + context (SanityContext): The SanityContext to use. + reports (list[Report]): The list of reports to include in the result. + + Returns: + The created SanityResult. + """ return cls( dataset_id=context.dataset_id.value_or("UNKNOWN"), version=context.version.value_or(None), diff --git a/t4_devkit/sanity/run.py b/t4_devkit/sanity/run.py index 5c0f1b4..cd62ab9 100644 --- a/t4_devkit/sanity/run.py +++ b/t4_devkit/sanity/run.py @@ -17,6 +17,17 @@ def sanity_check( include_warning: bool = False, excludes: Sequence[str] | None = None, ) -> SanityResult: + """Run sanity checks on the given data root. + + Args: + data_root (str): The root directory of the data. + revision (str | None, optional): The revision to check. If None, the latest revision is used. + include_warning (bool, optional): Whether to include warning checks. + excludes (Sequence[str] | None, optional): A list of rule names or groups to exclude. + + Returns: + A SanityResult object. + """ with warnings.catch_warnings(): if include_warning: warnings.simplefilter("error") From 5b054ad8db241fb223e05863a5e102cadb4c845d Mon Sep 17 00:00:00 2001 From: ktro2828 Date: Mon, 10 Nov 2025 15:58:50 +0900 Subject: [PATCH 21/21] docs: resolve the dead link Signed-off-by: ktro2828 --- docs/cli/t4sanity.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/cli/t4sanity.md b/docs/cli/t4sanity.md index 98d363c..3a6a583 100644 --- a/docs/cli/t4sanity.md +++ b/docs/cli/t4sanity.md @@ -1,4 +1,4 @@ -`t4sanity` performs sanity checks on T4 datasets, reporting any issues regarding the [dataset requirements](../../schema/requirement.md). +`t4sanity` performs sanity checks on T4 datasets, reporting any issues regarding the [dataset requirements](../schema/requirement.md). ```shell $ t4sanity -h