diff --git a/docs/cli/t4sanity.md b/docs/cli/t4sanity.md index ce4bcfc..ee5209b 100644 --- a/docs/cli/t4sanity.md +++ b/docs/cli/t4sanity.md @@ -14,6 +14,7 @@ $ t4sanity -h │ --revision -rv TEXT Specify if you want to check the specific version. [default: None] │ │ --exclude -e TEXT Exclude specific rules or rule groups. [default: None] │ │ --strict -s Indicates whether warnings are treated as failures. │ +│ --fix -f Attempt to fix the issues reported by the sanity check. │ │ --install-completion Install completion for the current shell. │ │ --show-completion Show completion for the current shell, to copy it or customize the installation. │ │ --help -h Show this message and exit. │ @@ -58,11 +59,20 @@ $ t4sanity STR008: ✅ ... -+-----------+---------+--------+--------+---------+----------+ -| DatasetID | Version | Passed | Failed | Skipped | Warnings | -+-----------+---------+--------+--------+---------+----------+ -| dataset1 | 0 | 49 | 0 | 2 | 3 | -+-----------+---------+--------+--------+---------+----------+ ++-----------+---------+--------+--------+---------+----------+-------+ +| DatasetID | Version | Passed | Failed | Skipped | Warnings | Fixed | ++-----------+---------+--------+--------+---------+----------+-------+ +| dataset1 | 0 | 49 | 0 | 2 | 3 | 0 | ++-----------+---------+--------+--------+---------+----------+-------+ +``` + +### Exclude Checks + +With `-e; --exclude` option enables us to exclude specific checks by specifying the **rule IDs or groups**: + +```shell +# Exclude STR001 and all FMT-relevant rules +t4sanity -e STR001 -e FMT ``` ### Strict Mode @@ -73,28 +83,46 @@ With `-s; --strict` option enables us to treat warnings as failures: ```shell # Run strict mode -t4sanity -s +t4sanity --strict ``` -### Exclude Checks +### Fix Issues -With `-e; --excludes` option enables us to exclude specific checks by specifying the **rule IDs or groups**: +With `-f; --fix` option enables to fix issues automatically: ```shell -# Exclude STR001 and all FMT-relevant rules -t4sanity -e STR001 -e FMT +# Fix issues automatically +t4sanity --fix + +>>>Sanity checking...: 1it [00:00, 9.70it/s] + +=== DatasetID: dataset1 === + ... + REC007: --> FIXED ✅ + - All categories either must have an 'index' set or all have a 'null' index. + ... + ++-----------+---------+--------+--------+---------+----------+-------+ +| DatasetID | Version | Passed | Failed | Skipped | Warnings | Fixed | ++-----------+---------+--------+--------+---------+----------+-------+ +| dataset1 | 0 | 49 | 0 | 2 | 3 | 1 | ++-----------+---------+--------+--------+---------+----------+-------+ ``` +The generated report contains failure or warning reasons, but it's considered as passed if the fix was successful. + ### Exit Status Logic `t4sanity` CLI returns the exit code based on the following conditions: -| Condition | `--strict` | Exit Code | Notes | -| ----------------------------------------------------------------------- | ----------------- | --------- | --------------------------------------------------- | -| At least one `Severity.ERROR` rule failed | N/A | 1 | Always fails the run | -| At least one `Severity.WARNING` rule failed, no `Severity.ERROR` failed | `False` (default) | 0 | Run is considered successful, warnings are reported | -| At least one `Severity.WARNING` rule failed, no `Severity.ERROR` failed | `True` | 1 | Treat warnings as failures; exit with failure | -| All rules passed or skipped | N/A | 0 | Run is considered successful | +| Condition | `--strict` | `--fix` | Exit Code | Notes | +| ---------------------------------------------------------------------------------- | ----------------- | ------- | --------- | ------------------------------------------------------------------------- | +| At least one `Severity.ERROR` rule failed | N/A | `False` | 1 | Always fails the run | +| At least one `Severity.ERROR` rule failed, but fixed | N/A | `True` | 0 | Run is considered successful, error reasons are reported and `Fixed=true` | +| At least one `Severity.WARNING` rule failed, no `Severity.ERROR` failed | `False` (default) | N/A | 0 | Run is considered successful, warnings are reported | +| At least one `Severity.WARNING` rule failed, no `Severity.ERROR` failed | `True` | `False` | 1 | Treat warnings as failures; exit with failure | +| At least one `Severity.WARNING` rule failed, no `Severity.ERROR` failed, but fixed | `True` | `True` | 0 | Run is considered successful, warnings are reported and `Fixed=true` | +| All rules passed or skipped | N/A | N/A | 0 | Run is considered successful | ### Dump Results as JSON @@ -117,7 +145,8 @@ Then a JSON file named `result.json` will be generated as follows: "severity": "", "description": "", "status": "", - "reasons": "<[, , ...]: [str; N] | null>" // Failed or skipped reasons, null if passed + "reasons": "<[, , ...]: [str; N] | null>", + "fixed": "" }, ] } @@ -134,3 +163,4 @@ Here is the description of the JSON format: - `description`: A description of the rule. - `status`: What happened when it ran. - `reasons`: An array of reasons for failure or skipped rules, null if passed. + - `fixed`: Whether the rule was fixed. diff --git a/docs/schema/requirement.md b/docs/schema/requirement.md index c5685c8..30bccc9 100644 --- a/docs/schema/requirement.md +++ b/docs/schema/requirement.md @@ -2,99 +2,99 @@ ## Structure (`STR`) -| ID | Name | Severity | Description | -| -------- | ----------------------------- | --------- | -------------------------------------------------------------------- | -| `STR001` | `version-dir-presence` | `WARNING` | `version/` directory exists under the dataset root directory. | -| `STR002` | `annotation-dir-presence` | `ERROR` | `annotation/` directory exists under the dataset root directory. | -| `STR003` | `data-dir-presence` | `ERROR` | `data/` directory exists under the dataset root directory. | -| `STR004` | `map-dir-presence` | `WARNING` | `map/` directory exists under the dataset root directory. | -| `STR005` | `bag-dir-presence` | `WARNING` | `input_bag/` directory exists under the dataset root directory. | -| `STR006` | `status-file-presence` | `WARNING` | `status.json` file exists under the dataset root directory. | -| `STR007` | `schema-files-presence` | `ERROR` | Mandatory schema JSON files exist under the `annotation/` directory. | -| `STR008` | `lanelet-file-presence` | `WARNING` | `lanelet2_map.osm` file exists under the `map/` directory. | -| `STR009` | `pointcloud-map-dir-presence` | `WARNING` | `pointcloud_map.pcd` directory exists under the `map/` directory. | +| ID | Name | Severity | Fixable | Description | +| -------- | ----------------------------- | --------- | ------- | -------------------------------------------------------------------- | +| `STR001` | `version-dir-presence` | `WARNING` | `N/A` | `version/` directory exists under the dataset root directory. | +| `STR002` | `annotation-dir-presence` | `ERROR` | `N/A` | `annotation/` directory exists under the dataset root directory. | +| `STR003` | `data-dir-presence` | `ERROR` | `N/A` | `data/` directory exists under the dataset root directory. | +| `STR004` | `map-dir-presence` | `WARNING` | `N/A` | `map/` directory exists under the dataset root directory. | +| `STR005` | `bag-dir-presence` | `WARNING` | `N/A` | `input_bag/` directory exists under the dataset root directory. | +| `STR006` | `status-file-presence` | `WARNING` | `N/A` | `status.json` file exists under the dataset root directory. | +| `STR007` | `schema-files-presence` | `ERROR` | `N/A` | Mandatory schema JSON files exist under the `annotation/` directory. | +| `STR008` | `lanelet-file-presence` | `WARNING` | `N/A` | `lanelet2_map.osm` file exists under the `map/` directory. | +| `STR009` | `pointcloud-map-dir-presence` | `WARNING` | `N/A` | `pointcloud_map.pcd` directory exists under the `map/` directory. | ## Schema Record (`REC`) -| ID | Name | Severity | Description | -| -------- | ----------------------------- | -------- | ----------------------------------------------------------------------------------------------- | -| `REC001` | `scene-single` | `ERROR` | `Scene` record is a single. | -| `REC002` | `sample-not-empty` | `ERROR` | `Sample` record is not empty. | -| `REC003` | `sample-data-not-empty` | `ERROR` | `SampleData` record is not empty. | -| `REC004` | `ego-pose-not-empty` | `ERROR` | `EgoPose` record is not empty. | -| `REC005` | `calibrated-sensor-non-empty` | `ERROR` | `CalibratedSensor` record is not empty. | -| `REC006` | `instance-not-empty` | `ERROR` | `Instance` record is not empty if either `SampleAnnotation` or `ObjectAnnotation` is not empty. | -| `REC007` | `category-indices-consistent` | `ERROR` | `Category` records must either all have a unique `index` or all have a `null` index. | +| ID | Name | Severity | Fixable | Description | +| -------- | ----------------------------- | -------- | ------- | ----------------------------------------------------------------------------------------------- | +| `REC001` | `scene-single` | `ERROR` | `N/A` | `Scene` record is a single. | +| `REC002` | `sample-not-empty` | `ERROR` | `N/A` | `Sample` record is not empty. | +| `REC003` | `sample-data-not-empty` | `ERROR` | `N/A` | `SampleData` record is not empty. | +| `REC004` | `ego-pose-not-empty` | `ERROR` | `N/A` | `EgoPose` record is not empty. | +| `REC005` | `calibrated-sensor-non-empty` | `ERROR` | `N/A` | `CalibratedSensor` record is not empty. | +| `REC006` | `instance-not-empty` | `ERROR` | `N/A` | `Instance` record is not empty if either `SampleAnnotation` or `ObjectAnnotation` is not empty. | +| `REC007` | `category-indices-consistent` | `ERROR` | `YES` | `Category` records must either all have a unique `index` or all have a `null` index. | ## Reference (`REF`) ### Record Reference (A to B) -| ID | Name | Severity | Description | -| -------- | ------------------------------------- | -------- | ------------------------------------------------------------------------- | -| `REF001` | `scene-to-log` | `ERROR` | `Scene.log_token` refers to `Log` record. | -| `REF002` | `scene-to-first-sample` | `ERROR` | `Scene.first_sample_token` refers to `Sample` record. | -| `REF003` | `scene-to-last-sample` | `ERROR` | `Scene.last_sample_token` refers to `Sample` record. | -| `REF004` | `sample-to-scene` | `ERROR` | `Sample.scene_token` refers to `Scene` record. | -| `REF005` | `sample-data-to-sample` | `ERROR` | `SampleData.sample_token` refers to `Sample` record. | -| `REF006` | `sample-data-to-ego-pose` | `ERROR` | `SampleData.ego_pose_token` refers to `EgoPose` record. | -| `REF007` | `sample-data-to-calibrated-sensor` | `ERROR` | `SampleData.calibrated_sensor_token` refers to `CalibratedSensor` record. | -| `REF008` | `calibrated-sensor-to-sensor` | `ERROR` | `CalibratedSensor.sensor_token` refers to `Sensor` record. | -| `REF009` | `instance-to-category` | `ERROR` | `Instance.category_token` refers to `Category` record. | -| `REF010` | `instance-to-first-sample-annotation` | `ERROR` | `Instance.first_annotation_token` refers to `SampleAnnotation` record. | -| `REF011` | `instance-to-last-sample-annotation` | `ERROR` | `Instance.last_annotation_token` refers to `SampleAnnotation` record. | -| `REF012` | `lidarseg-to-sample-data` | `ERROR` | `LidarSeg.sample_data_token` refers to `SampleData` record. | +| ID | Name | Severity | Fixable | Description | +| -------- | ------------------------------------- | -------- | ------- | ------------------------------------------------------------------------- | +| `REF001` | `scene-to-log` | `ERROR` | `N/A` | `Scene.log_token` refers to `Log` record. | +| `REF002` | `scene-to-first-sample` | `ERROR` | `N/A` | `Scene.first_sample_token` refers to `Sample` record. | +| `REF003` | `scene-to-last-sample` | `ERROR` | `N/A` | `Scene.last_sample_token` refers to `Sample` record. | +| `REF004` | `sample-to-scene` | `ERROR` | `N/A` | `Sample.scene_token` refers to `Scene` record. | +| `REF005` | `sample-data-to-sample` | `ERROR` | `N/A` | `SampleData.sample_token` refers to `Sample` record. | +| `REF006` | `sample-data-to-ego-pose` | `ERROR` | `N/A` | `SampleData.ego_pose_token` refers to `EgoPose` record. | +| `REF007` | `sample-data-to-calibrated-sensor` | `ERROR` | `N/A` | `SampleData.calibrated_sensor_token` refers to `CalibratedSensor` record. | +| `REF008` | `calibrated-sensor-to-sensor` | `ERROR` | `N/A` | `CalibratedSensor.sensor_token` refers to `Sensor` record. | +| `REF009` | `instance-to-category` | `ERROR` | `N/A` | `Instance.category_token` refers to `Category` record. | +| `REF010` | `instance-to-first-sample-annotation` | `ERROR` | `N/A` | `Instance.first_annotation_token` refers to `SampleAnnotation` record. | +| `REF011` | `instance-to-last-sample-annotation` | `ERROR` | `N/A` | `Instance.last_annotation_token` refers to `SampleAnnotation` record. | +| `REF012` | `lidarseg-to-sample-data` | `ERROR` | `N/A` | `LidarSeg.sample_data_token` refers to `SampleData` record. | ### Record Reference (A to A') -| ID | Name | Severity | Description | -| -------- | ----------------------------------- | -------- | ----------------------------------------------------------------- | -| `REF101` | `sample-next-to-another` | `ERROR` | `Sample.next` refers to another one unless it is empty. | -| `REF102` | `sample-prev-to-another` | `ERROR` | `Sample.prev` refers to another one unless it is empty. | -| `REF103` | `sample-annotation-next-to-another` | `ERROR` | `SampleAnnotation.next` refers to another one unless it is empty. | -| `REF104` | `sample-annotation-prev-to-another` | `ERROR` | `SampleAnnotation.prev` refers to another one unless it is empty. | -| `REF105` | `sample-data-next-to-another` | `ERROR` | `SampleData.next` refers to another one unless it is empty. | -| `REF106` | `sample-data-prev-to-another` | `ERROR` | `SampleData.prev` refers to another one unless it is empty. | +| ID | Name | Severity | Fixable | Description | +| -------- | ----------------------------------- | -------- | ------- | ----------------------------------------------------------------- | +| `REF101` | `sample-next-to-another` | `ERROR` | `N/A` | `Sample.next` refers to another one unless it is empty. | +| `REF102` | `sample-prev-to-another` | `ERROR` | `N/A` | `Sample.prev` refers to another one unless it is empty. | +| `REF103` | `sample-annotation-next-to-another` | `ERROR` | `N/A` | `SampleAnnotation.next` refers to another one unless it is empty. | +| `REF104` | `sample-annotation-prev-to-another` | `ERROR` | `N/A` | `SampleAnnotation.prev` refers to another one unless it is empty. | +| `REF105` | `sample-data-next-to-another` | `ERROR` | `N/A` | `SampleData.next` refers to another one unless it is empty. | +| `REF106` | `sample-data-prev-to-another` | `ERROR` | `N/A` | `SampleData.prev` refers to another one unless it is empty. | ### File Reference -| ID | Name | Severity | Description | -| -------- | ------------------------------------ | -------- | ------------------------------------------------------ | -| `REF201` | `sample-data-filename-presence` | `ERROR` | `SampleData.filename` exists. | -| `REF202` | `sample-data-info-filename-presence` | `ERROR` | `SampleData.info_filename` exists if it is not `None`. | -| `REF203` | `lidarseg-filename-presence` | `ERROR` | `LidarSeg.filename` exists if `lidarseg.json` exists. | +| ID | Name | Severity | Fixable | Description | +| -------- | ------------------------------------ | -------- | ------- | ------------------------------------------------------ | +| `REF201` | `sample-data-filename-presence` | `ERROR` | `N/A` | `SampleData.filename` exists. | +| `REF202` | `sample-data-info-filename-presence` | `ERROR` | `N/A` | `SampleData.info_filename` exists if it is not `None`. | +| `REF203` | `lidarseg-filename-presence` | `ERROR` | `N/A` | `LidarSeg.filename` exists if `lidarseg.json` exists. | ### External Table Reference -| ID | Name | Severity | Description | -| -------- | --------------------------------- | -------- | ------------------------------------------ | -| `REF301` | `pointcloud-metainfo-token-check` | `ERROR` | `PointCloudMetainfo sensor tokens` exists. | +| ID | Name | Severity | Fixable | Description | +| -------- | --------------------------------- | -------- | ------- | ------------------------------------------ | +| `REF301` | `pointcloud-metainfo-token-check` | `ERROR` | `N/A` | `PointCloudMetainfo sensor tokens` exists. | ## Format (`FMT`) -| ID | Name | Severity | Description | -| -------- | ------------------------- | -------- | ------------------------------------------------- | -| `FMT001` | `attribute-field` | `ERROR` | All types of `Attribute` fields are valid. | -| `FMT002` | `calibrated-sensor-field` | `ERROR` | All types of `CalibratedSensor` fields are valid. | -| `FMT003` | `category-field` | `ERROR` | All types of `Category` fields are valid. | -| `FMT004` | `ego-pose-field` | `ERROR` | All types of `EgoPose` fields are valid. | -| `FMT005` | `instance-field` | `ERROR` | All types of `Instance` fields are valid. | -| `FMT006` | `log-field` | `ERROR` | All types of `Log` fields are valid. | -| `FMT007` | `map-field` | `ERROR` | All types of `Map` fields are valid. | -| `FMT008` | `sample-field` | `ERROR` | All types of `Sample` fields are valid. | -| `FMT009` | `sample-annotation-field` | `ERROR` | All types of `SampleAnnotation` fields are valid. | -| `FMT010` | `sample-data-field` | `ERROR` | All types of `SampleData` fields are valid. | -| `FMT011` | `scene-field` | `ERROR` | All types of `Scene` fields are valid. | -| `FMT012` | `sensor-field` | `ERROR` | All types of `Sensor` fields are valid. | -| `FMT013` | `visibility-field` | `ERROR` | All types of `Visibility` fields are valid. | -| `FMT014` | `lidarseg-field` | `ERROR` | All types of `Lidarseg` fields are valid. | -| `FMT015` | `object-ann-field` | `ERROR` | All types of `ObjectAnn` fields are valid. | -| `FMT016` | `surface-ann-field` | `ERROR` | All types of `SurfaceAnn` fields are valid. | -| `FMT017` | `keypoint-field` | `ERROR` | All types of `Keypoint` fields are valid. | -| `FMT018` | `vehicle-state-field` | `ERROR` | All types of `VehicleState` fields are valid. | +| ID | Name | Severity | Fixable | Description | +| -------- | ------------------------- | -------- | ------- | ------------------------------------------------- | +| `FMT001` | `attribute-field` | `ERROR` | `N/A` | All types of `Attribute` fields are valid. | +| `FMT002` | `calibrated-sensor-field` | `ERROR` | `N/A` | All types of `CalibratedSensor` fields are valid. | +| `FMT003` | `category-field` | `ERROR` | `N/A` | All types of `Category` fields are valid. | +| `FMT004` | `ego-pose-field` | `ERROR` | `N/A` | All types of `EgoPose` fields are valid. | +| `FMT005` | `instance-field` | `ERROR` | `N/A` | All types of `Instance` fields are valid. | +| `FMT006` | `log-field` | `ERROR` | `N/A` | All types of `Log` fields are valid. | +| `FMT007` | `map-field` | `ERROR` | `N/A` | All types of `Map` fields are valid. | +| `FMT008` | `sample-field` | `ERROR` | `N/A` | All types of `Sample` fields are valid. | +| `FMT009` | `sample-annotation-field` | `ERROR` | `N/A` | All types of `SampleAnnotation` fields are valid. | +| `FMT010` | `sample-data-field` | `ERROR` | `N/A` | All types of `SampleData` fields are valid. | +| `FMT011` | `scene-field` | `ERROR` | `N/A` | All types of `Scene` fields are valid. | +| `FMT012` | `sensor-field` | `ERROR` | `N/A` | All types of `Sensor` fields are valid. | +| `FMT013` | `visibility-field` | `ERROR` | `N/A` | All types of `Visibility` fields are valid. | +| `FMT014` | `lidarseg-field` | `ERROR` | `N/A` | All types of `Lidarseg` fields are valid. | +| `FMT015` | `object-ann-field` | `ERROR` | `N/A` | All types of `ObjectAnn` fields are valid. | +| `FMT016` | `surface-ann-field` | `ERROR` | `N/A` | All types of `SurfaceAnn` fields are valid. | +| `FMT017` | `keypoint-field` | `ERROR` | `N/A` | All types of `Keypoint` fields are valid. | +| `FMT018` | `vehicle-state-field` | `ERROR` | `N/A` | All types of `VehicleState` fields are valid. | ## Tier4 Instance (`TIV`) -| ID | Name | Severity | Description | -| -------- | ------------ | -------- | ----------------------------------------------- | -| `TIV001` | `load-tier4` | `ERROR` | Ensure `Tier4` instance is loaded successfully. | +| ID | Name | Severity | Fixable | Description | +| -------- | ------------ | -------- | ------- | ----------------------------------------------- | +| `TIV001` | `load-tier4` | `ERROR` | `N/A` | Ensure `Tier4` instance is loaded successfully. | diff --git a/docs/tutorials/sanity.md b/docs/tutorials/sanity.md index d804aa4..0005c94 100644 --- a/docs/tutorials/sanity.md +++ b/docs/tutorials/sanity.md @@ -22,12 +22,28 @@ save_json(serialize_dataclass(result), "result.json") ### How to Add New Checkers +The following diagram shows the logic of the checkers: + +```mermaid +flowchart LR + Start --> A{Can skip?} + A --> |Yes| B[Skip check and
returns skipped report] + A --> |No| C[Perform check] + C --> D{Failed and --fix=True?} + D --> |Yes| E[Fix issues] + E --> F[Return report] + D --> |No| F +``` + All checkers must follow: - Implement a class that inherits from `Checker` class. - Its ID must be unique and belong to one of `RuleGroup` enum. -- Override the `check() -> list[Reason] | None` method to perform the specific check. - Register the checker using `CHECKERS.register()` decorator. +- Override the `check(...) -> list[Reason] | None` method to perform the specific check. +- [OPTIONAL] The following methods can be overridden if needed: + - Override `can_skip(...) -> Maybe[Reason]` method to determine whether the checker can be skipped. + - Override `fix(...) -> bool` method to fix the issue if possible. ```python title="str000.py" from __future__ import annotations diff --git a/t4_devkit/cli/sanity.py b/t4_devkit/cli/sanity.py index 6c5ed79..3a06758 100644 --- a/t4_devkit/cli/sanity.py +++ b/t4_devkit/cli/sanity.py @@ -42,8 +42,11 @@ def main( "--strict", help="By default, warnings do not cause failure. If set, warnings are treated as failures.", ), + fix: bool = typer.Option( + False, "-f", "--fix", help="Attempt to fix the issues reported by the sanity check." + ), ) -> None: - result = sanity_check(data_root=data_root, revision=revision, excludes=excludes) + result = sanity_check(data_root=data_root, revision=revision, excludes=excludes, fix=fix) print_sanity_result(result, strict=strict) diff --git a/t4_devkit/sanity/checker.py b/t4_devkit/sanity/checker.py index 86365f0..e4a88d7 100644 --- a/t4_devkit/sanity/checker.py +++ b/t4_devkit/sanity/checker.py @@ -40,16 +40,36 @@ class Checker(ABC): description: str severity: Severity - def __call__(self, context: SanityContext) -> Report: + def __call__(self, context: SanityContext, fix: bool = False) -> Report: + """Run the checker and return a report. + + The issues will be fixed if the checker is fixable, `fix` is True and + the checker returns a list of failure or warning reasons (not `None`). + + Args: + context (SanityContext): The sanity context. + fix (bool, optional): Whether to attempt to fix the issue. + + Returns: + A report containing the results of the checker. + """ match self.can_skip(context): case Some(skip): return make_skipped(self.id, self.name, self.severity, self.description, skip) reasons = self.check(context) - return make_report(self.id, self.name, self.severity, self.description, reasons) + fixed = self.fix(context) if fix and reasons else False + return make_report(self.id, self.name, self.severity, self.description, reasons, fixed) + + def can_skip(self, context: SanityContext) -> Maybe[Reason]: + """Return a skip reason if the checker should be skipped. + + Args: + context (SanityContext): The sanity context. - def can_skip(self, _: SanityContext) -> Maybe[Reason]: - """Return a skip reason if the checker should be skipped.""" + Returns: + A skip reason if the checker should be skipped, or Nothing if it should not be skipped. + """ return Nothing @abstractmethod @@ -63,3 +83,14 @@ def check(self, context: SanityContext) -> list[Reason] | None: A list of reasons if the checker fails, or None if it passes. """ pass + + def fix(self, context: SanityContext) -> bool: + """Fix the issue reported by the checker. + + Args: + context (SanityContext): The sanity context. + + Returns: + True if the issue was fixed, False otherwise. + """ + return False diff --git a/t4_devkit/sanity/context.py b/t4_devkit/sanity/context.py index db6afc6..1e6459d 100644 --- a/t4_devkit/sanity/context.py +++ b/t4_devkit/sanity/context.py @@ -3,11 +3,12 @@ from pathlib import Path from attrs import define -from returns.maybe import Maybe +from returns.maybe import Maybe, Some from returns.pipeline import is_successful from typing_extensions import Self from t4_devkit import DBMetadata +from t4_devkit.common import save_json from t4_devkit.schema.name import SchemaName from .safety import load_metadata_safe @@ -66,3 +67,15 @@ def status_json(self) -> Maybe[Path]: def to_schema_file(self, schema: SchemaName) -> Maybe[Path]: """Convert schema name to file path, which is /annotation/.json.""" return self.annotation_dir.map(lambda ann: ann.joinpath(schema.filename)) + + def save_records(self, schema: SchemaName, records: list[dict]) -> bool: + """Save schema data to file.""" + match self.to_schema_file(schema): + case Some(filepath): + try: + save_json(records, filepath) + return True + except Exception as e: + print(f"Error saving schema: {e}") + return False + return False diff --git a/t4_devkit/sanity/record/rec007.py b/t4_devkit/sanity/record/rec007.py index cdb267a..61a88d2 100644 --- a/t4_devkit/sanity/record/rec007.py +++ b/t4_devkit/sanity/record/rec007.py @@ -57,3 +57,13 @@ def check(self, context: SanityContext) -> list[Reason] | None: return [Reason("Categories must have unique 'index' values.")] return None + + def fix(self, context: SanityContext) -> bool: + """Fix the 'index' values of categories.""" + filepath = context.to_schema_file(self.schema).unwrap() + records = load_json_safe(filepath).unwrap() + + for idx, record in enumerate(records): + record["index"] = idx + + return context.save_records(self.schema, records) diff --git a/t4_devkit/sanity/result.py b/t4_devkit/sanity/result.py index 3286563..fbc35b7 100644 --- a/t4_devkit/sanity/result.py +++ b/t4_devkit/sanity/result.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, NewType from attrs import define, field +from colorama import Fore from tabulate import tabulate from typing_extensions import Self @@ -36,6 +37,7 @@ class Report: description (str): The description of the rule. status (Status): The status of the report. reasons (list[Reason] | None): The list of reasons for the report if the report is a failure or skipped. + fixed (bool): Whether the report is fixed. """ id: RuleID @@ -44,6 +46,7 @@ class Report: description: str status: Status reasons: list[Reason] | None = field(default=None) + fixed: bool = False def __attrs_post_init__(self) -> None: if self.status == Status.PASSED: @@ -57,6 +60,7 @@ def is_passed(self, *, strict: bool = False) -> bool: self.status == Status.PASSED or self.is_skipped() or (not strict and self.severity.is_warning()) + or self.fixed ) def is_failed(self, *, strict: bool = False) -> bool: @@ -80,19 +84,29 @@ def to_str(self, *, strict: bool = False) -> str: """ parts = [] if not self.is_passed(strict=strict): - parts.append(f"\033[31m {self.id}:\033[0m\n") + # print failure reasons + parts.append(f"{Fore.RED} {self.id}:\n") for reason in self.reasons or []: - parts.append(f"\033[31m - {reason}\033[0m\n") + parts.append(f"{Fore.RED} - {reason}\n") elif self.is_skipped(): - parts.append(f"\033[36m {self.id}: [SKIPPED]\033[0m\n") + # print skipped reasons + parts.append(f"{Fore.CYAN} {self.id}: [SKIPPED]\n") for reason in self.reasons or []: - parts.append(f"\033[36m - {reason}\033[0m\n") + parts.append(f"{Fore.CYAN} - {reason}\n") elif self.severity.is_warning() and self.reasons: - parts.append(f"\033[33m {self.id}:\033[0m\n") + # print warning reasons + parts.append(f"{Fore.YELLOW} {self.id}:\n") for reason in self.reasons or []: - parts.append(f"\033[33m - {reason}\033[0m\n") + parts.append(f"{Fore.YELLOW} - {reason}\n") + elif self.is_passed() and self.fixed: + # print failure or warning but fixed reasons + parts.append(f"{Fore.GREEN} {self.id}: --> FIXED ✅\n") + for reason in self.reasons or []: + parts.append(f"{Fore.GREEN} - {reason}\n") else: - parts.append(f"\033[32m {self.id}: ✅\033[0m\n") + # print passed + parts.append(f"{Fore.GREEN} {self.id}: ✅\n") + parts.append(f"{Fore.RESET}") return "".join(parts) @@ -102,10 +116,11 @@ def make_report( severity: Severity, description: str, reasons: list[Reason] | None = None, + fixed: bool = False, ) -> Report: """Make a report for the given rule.""" if reasons: - return Report(id, name, severity, description, Status.FAILED, reasons) + return Report(id, name, severity, description, Status.FAILED, reasons, fixed) else: return Report(id, name, severity, description, Status.PASSED) @@ -190,12 +205,15 @@ def print_sanity_result(result: SanityResult, *, strict: bool = False) -> None: # just count the number of warnings warnings = sum(1 for rp in result.reports if rp.severity.is_warning() and rp.reasons) - summary_rows = [[result.dataset_id, result.version, passed, failed, skipped, warnings]] + # count the number of fixed issues + fixed = sum(1 for rp in result.reports if rp.fixed) + + summary_rows = [[result.dataset_id, result.version, passed, failed, skipped, warnings, fixed]] print( tabulate( summary_rows, - headers=["DatasetID", "Version", "Passed", "Failed", "Skipped", "Warnings"], + headers=["DatasetID", "Version", "Passed", "Failed", "Skipped", "Warnings", "Fixed"], tablefmt="pretty", ), ) diff --git a/t4_devkit/sanity/run.py b/t4_devkit/sanity/run.py index ae91431..be88b71 100644 --- a/t4_devkit/sanity/run.py +++ b/t4_devkit/sanity/run.py @@ -14,6 +14,7 @@ def sanity_check( revision: str | None = None, *, excludes: Sequence[str] | None = None, + fix: bool = False, ) -> SanityResult: """Run sanity checks on the given data root. @@ -21,6 +22,7 @@ def sanity_check( data_root (str): The root directory of the data. revision (str | None, optional): The revision to check. If None, the latest revision is used. excludes (Sequence[str] | None, optional): A list of rule names or groups to exclude. + fix (bool, optional): Attempt to fix the issues reported by the sanity check. Returns: A SanityResult object. @@ -28,6 +30,6 @@ def sanity_check( context = SanityContext.from_path(data_root, revision=revision) checkers = CHECKERS.build(excludes=excludes) - reports = [checker(context) for checker in checkers] + reports = [checker(context, fix=fix) for checker in checkers] return SanityResult.from_context(context, reports)