diff --git a/src/kili/adapters/kili_api_gateway/asset/mappers.py b/src/kili/adapters/kili_api_gateway/asset/mappers.py index f1ef743e2..39af4ab5b 100644 --- a/src/kili/adapters/kili_api_gateway/asset/mappers.py +++ b/src/kili/adapters/kili_api_gateway/asset/mappers.py @@ -64,4 +64,6 @@ def asset_where_mapper(filters: AssetFilters): "type": filters.issue_type, "status": filters.issue_status, }, + "stepIdIn": filters.step_id_in, + "stepStatusIn": filters.step_status_in, } diff --git a/src/kili/domain/asset/asset.py b/src/kili/domain/asset/asset.py index 32d432283..0138f06d7 100644 --- a/src/kili/domain/asset/asset.py +++ b/src/kili/domain/asset/asset.py @@ -12,10 +12,12 @@ AssetId = NewType("AssetId", str) AssetExternalId = NewType("AssetExternalId", str) - +AssetStatusInStep = NewType("AssetStatusInStep", str) AssetStatus = Literal["TODO", "ONGOING", "LABELED", "REVIEWED", "TO_REVIEW"] +StatusInStep = Literal["TO_DO", "DOING", "PARTIALLY_DONE", "REDO", "DONE", "SKIPPED"] + @dataclass class AssetFilters: @@ -48,8 +50,6 @@ class AssetFilters: assignee_in: Optional[ListOrTuple[str]] = None assignee_not_in: Optional[ListOrTuple[str]] = None metadata_where: Optional[dict] = None - skipped: Optional[bool] = None - status_in: Optional[ListOrTuple[AssetStatus]] = None updated_at_gte: Optional[str] = None updated_at_lte: Optional[str] = None label_category_search: Optional[str] = None @@ -59,3 +59,7 @@ class AssetFilters: inference_mark_lte: Optional[float] = None issue_type: Optional["IssueType"] = None issue_status: Optional["IssueStatus"] = None + skipped: Optional[bool] = None + status_in: Optional[ListOrTuple[AssetStatus]] = None + step_id_in: Optional[ListOrTuple[str]] = None + step_status_in: Optional[ListOrTuple[StatusInStep]] = None diff --git a/src/kili/domain/project.py b/src/kili/domain/project.py index ec413f8f1..ed305e5d4 100644 --- a/src/kili/domain/project.py +++ b/src/kili/domain/project.py @@ -2,7 +2,7 @@ from dataclasses import dataclass from enum import Enum -from typing import TYPE_CHECKING, Literal, NewType, Optional +from typing import TYPE_CHECKING, Literal, NewType, Optional, TypedDict from .types import ListOrTuple @@ -15,6 +15,14 @@ ] +@dataclass(frozen=True) +class ProjectStep(TypedDict, total=True): + """Project step type.""" + + id: str + name: str + + class InputTypeEnum(str, Enum): """Input type enum.""" diff --git a/src/kili/presentation/client/asset.py b/src/kili/presentation/client/asset.py index 59bc23815..85406e532 100644 --- a/src/kili/presentation/client/asset.py +++ b/src/kili/presentation/client/asset.py @@ -17,7 +17,13 @@ from typeguard import typechecked from kili.adapters.kili_api_gateway.helpers.queries import QueryOptions -from kili.domain.asset import AssetExternalId, AssetFilters, AssetId, AssetStatus +from kili.domain.asset.asset import ( + AssetExternalId, + AssetFilters, + AssetId, + AssetStatus, + StatusInStep, +) from kili.domain.issue import IssueStatus, IssueType from kili.domain.label import LabelType from kili.domain.project import ProjectId @@ -25,7 +31,11 @@ from kili.presentation.client.helpers.common_validators import ( disable_tqdm_if_as_generator, ) +from kili.presentation.client.helpers.filter_conversion import ( + convert_step_in_to_step_id_in_filter, +) from kili.use_cases.asset import AssetUseCases +from kili.use_cases.project.project import ProjectUseCases from kili.utils.logcontext import for_all_methods, log_call from .base import BaseClientMethods @@ -80,8 +90,6 @@ def assets( label_honeypot_mark_lt: Optional[float] = None, label_type_in: Optional[List[LabelType]] = None, metadata_where: Optional[dict] = None, - skipped: Optional[bool] = None, - status_in: Optional[List[AssetStatus]] = None, updated_at_gte: Optional[str] = None, updated_at_lte: Optional[str] = None, label_category_search: Optional[str] = None, @@ -112,6 +120,10 @@ def assets( external_id_strictly_in: Optional[List[str]] = None, external_id_in: Optional[List[str]] = None, label_output_format: Literal["dict", "parsed_label"] = "dict", + skipped: Optional[bool] = None, + status_in: Optional[List[AssetStatus]] = None, + step_name_in: Optional[List[str]] = None, + step_status_in: Optional[List[StatusInStep]] = None, *, as_generator: Literal[True], ) -> Generator[Dict, None, None]: @@ -158,8 +170,6 @@ def assets( label_honeypot_mark_lt: Optional[float] = None, label_type_in: Optional[List[LabelType]] = None, metadata_where: Optional[dict] = None, - skipped: Optional[bool] = None, - status_in: Optional[List[AssetStatus]] = None, updated_at_gte: Optional[str] = None, updated_at_lte: Optional[str] = None, label_category_search: Optional[str] = None, @@ -190,6 +200,10 @@ def assets( external_id_strictly_in: Optional[List[str]] = None, external_id_in: Optional[List[str]] = None, label_output_format: Literal["dict", "parsed_label"] = "dict", + skipped: Optional[bool] = None, + status_in: Optional[List[AssetStatus]] = None, + step_name_in: Optional[List[str]] = None, + step_status_in: Optional[List[StatusInStep]] = None, *, as_generator: Literal[False] = False, ) -> List[Dict]: @@ -236,8 +250,6 @@ def assets( label_honeypot_mark_lt: Optional[float] = None, label_type_in: Optional[List[LabelType]] = None, metadata_where: Optional[dict] = None, - skipped: Optional[bool] = None, - status_in: Optional[List[AssetStatus]] = None, updated_at_gte: Optional[str] = None, updated_at_lte: Optional[str] = None, label_category_search: Optional[str] = None, @@ -268,6 +280,10 @@ def assets( external_id_strictly_in: Optional[List[str]] = None, external_id_in: Optional[List[str]] = None, label_output_format: Literal["dict", "parsed_label"] = "dict", + skipped: Optional[bool] = None, + status_in: Optional[List[AssetStatus]] = None, + step_name_in: Optional[List[str]] = None, + step_status_in: Optional[List[StatusInStep]] = None, *, as_generator: bool = False, ) -> Union[Iterable[Dict], "pd.DataFrame"]: @@ -289,8 +305,6 @@ def assets( metadata_where: Filters by the values of the metadata of the asset. honeypot_mark_gt: Deprecated. Use `honeypot_mark_gte` instead. honeypot_mark_lt: Deprecated. Use `honeypot_mark_lte` instead. - status_in: Returned assets should have a status that belongs to that list, if given. - Possible choices: `TODO`, `ONGOING`, `LABELED`, `TO_REVIEW` or `REVIEWED`. label_type_in: Returned assets should have a label whose type belongs to that list, if given. label_author_in: Returned assets should have a label whose author belongs to that list, if given. An author can be designated by the first name, the last name, or the first name + last name. label_consensus_mark_gt: Deprecated. Use `label_consensus_mark_gte` instead. @@ -300,7 +314,6 @@ def assets( label_created_at_lt: Deprecated. Use `label_created_at_lte` instead. label_honeypot_mark_gt: Deprecated. Use `label_honeypot_mark_gte` instead. label_honeypot_mark_lt: Deprecated. Use `label_honeypot_mark_lte` instead. - skipped: Returned assets should be skipped updated_at_gte: Returned assets should have a label whose update date is greater or equal to this date. updated_at_lte: Returned assets should have a label whose update date is lower or equal to this date. format: If equal to 'pandas', returns a pandas DataFrame @@ -335,6 +348,15 @@ def assets( external_id_in: Returned assets should have external ids that partially match the ones in the list. For example, with `external_id_in=['abc']`, any asset with an external id containing `'abc'` will be returned. label_output_format: If `parsed_label`, the labels in the assets will be parsed. More information on parsed labels in the [documentation](https://python-sdk-docs.kili-technology.com/latest/sdk/tutorials/label_parsing/). + skipped: Returned assets should be skipped + Only applicable if the project is in WorkflowV1 (legacy). + status_in: Returned assets should have a status that belongs to that list, if given. + Possible choices: `TODO`, `ONGOING`, `LABELED`, `TO_REVIEW` or `REVIEWED`. + Only applicable if the project is in the WorkflowV1 (legacy). + step_name_in: Returned assets are in the step whose name belong to that list, if given. + Only applicable if the project is in WorkflowV2. + step_status_in: Returned assets have the status in their step that belongs to that list, if given. + Only applicable if the project is in WorkflowV2. !!! info "Dates format" Date strings should have format: "YYYY-MM-DD" @@ -431,6 +453,33 @@ def assets( disable_tqdm = disable_tqdm_if_as_generator(as_generator, disable_tqdm) + step_id_in = None + if ( + step_name_in is not None + or step_status_in is not None + or status_in is not None + or skipped is not None + ): + project_use_cases = ProjectUseCases(self.kili_api_gateway) + project_steps = project_use_cases.get_project_steps(project_id) + + if step_name_in is not None or step_status_in is not None or status_in is not None: + step_id_in = convert_step_in_to_step_id_in_filter( + project_steps=project_steps, + fields=fields, + asset_filter_kwargs={ + "step_name_in": step_name_in, + "step_status_in": step_status_in, + "status_in": status_in, + "skipped": skipped, + }, + ) + elif skipped is not None and len(project_steps) != 0: + warnings.warn( + "Filter skipped given : only use filter step_status_in with the SKIPPED step status instead for this project", + stacklevel=1, + ) + asset_use_cases = AssetUseCases(self.kili_api_gateway) filters = AssetFilters( project_id=ProjectId(project_id), @@ -474,6 +523,8 @@ def assets( assignee_not_in=assignee_not_in, issue_status=issue_status, issue_type=issue_type, + step_id_in=step_id_in, + step_status_in=step_status_in, ) assets_gen = asset_use_cases.list_assets( filters, diff --git a/src/kili/presentation/client/helpers/filter_conversion.py b/src/kili/presentation/client/helpers/filter_conversion.py new file mode 100644 index 000000000..7a4e318b7 --- /dev/null +++ b/src/kili/presentation/client/helpers/filter_conversion.py @@ -0,0 +1,77 @@ +"""Module for common argument validators across client methods.""" + +import warnings +from typing import Dict, List, Optional + +from kili.domain.project import ProjectStep +from kili.domain.types import ListOrTuple + + +def extract_step_ids_from_project_steps( + project_steps: List[ProjectStep], + step_name_in: List[str], +) -> List[str]: + """Extract step ids from project steps.""" + matching_steps = [step for step in project_steps if step["name"] in step_name_in] + + # Raise an exception if any name in step_name_in does not match a step["name"] + unmatched_names = [ + name for name in step_name_in if name not in [step["name"] for step in project_steps] + ] + if unmatched_names: + raise ValueError(f"The following step names do not match any steps: {unmatched_names}") + + return [step["id"] for step in matching_steps] + + +def convert_step_in_to_step_id_in_filter( + asset_filter_kwargs: Dict[str, object], + project_steps: List[ProjectStep], + fields: Optional[ListOrTuple[str]] = None, +) -> Optional[List[str]]: + """If a stepIn filter is given, convert it to a stepIdIn and return it.""" + step_name_in = asset_filter_kwargs.get("step_name_in") + step_status_in = asset_filter_kwargs.get("step_status_in") + status_in = asset_filter_kwargs.get("status_in") + skipped = asset_filter_kwargs.get("skipped") + + if len(project_steps) != 0: + if step_status_in is not None and status_in is not None: + raise ValueError( + "Filters step_status_in and status_in both given : only use filter step_status_in for this project." + ) + if step_name_in is not None and status_in is not None: + raise ValueError( + "Filters step_name_in and status_in both given : use filter step_status_in instead of status_in for this project." # pylint: disable=line-too-long + ) + if status_in is not None: + warnings.warn( + "Filter status_in given : use filters step_status_in and step_name_in instead for this project.", + stacklevel=1, + ) + if skipped is not None: + warnings.warn( + "Filter skipped given : only use filter step_status_in with the SKIPPED step status instead for this project", # pylint: disable=line-too-long + stacklevel=1, + ) + if fields and "status" in fields: + warnings.warn( + "Field status requested : request fields step and stepStatus instead for this project", + stacklevel=1, + ) + + if ( + step_name_in is not None + and isinstance(step_name_in, list) + and all(isinstance(item, str) for item in step_name_in) + ): + return extract_step_ids_from_project_steps( + project_steps=project_steps, step_name_in=step_name_in + ) + return None + + if step_name_in is not None or step_status_in is not None: + raise ValueError( + "Filters step_name_in and/or step_status_in given : use filter status_in for this project." + ) + return None diff --git a/src/kili/presentation/client/label.py b/src/kili/presentation/client/label.py index cd53fc39e..47c15e8fc 100644 --- a/src/kili/presentation/client/label.py +++ b/src/kili/presentation/client/label.py @@ -34,12 +34,16 @@ assert_all_arrays_have_same_size, disable_tqdm_if_as_generator, ) +from kili.presentation.client.helpers.filter_conversion import ( + convert_step_in_to_step_id_in_filter, +) from kili.services.export import export_labels from kili.services.export.exceptions import NoCompatibleJobError from kili.services.export.types import CocoAnnotationModifier, LabelFormat, SplitOption from kili.use_cases.asset.utils import AssetUseCasesUtils from kili.use_cases.label import LabelUseCases from kili.use_cases.label.types import LabelToCreateUseCaseInput +from kili.use_cases.project.project import ProjectUseCases from kili.utils.labels.parsing import ParsedLabel from kili.utils.logcontext import for_all_methods, log_call @@ -1146,8 +1150,8 @@ def export_labels( - `label_reviewer_not_in` - `assignee_in` - `assignee_not_in` - - `skipped` - - `status_in` + - `skipped`: only applicable if the project is in the WorkflowV1 (legacy). + - `status_in`: only applicable if the project is in the WorkflowV1 (legacy). - `label_category_search` - `created_at_gte` - `created_at_lte` @@ -1156,6 +1160,8 @@ def export_labels( - `inference_mark_gte` - `inference_mark_lte` - `metadata_where` + - `step_name_in`: : only applicable if the project is in the WorkflowV2. + - `step_status_in`: only applicable if the project is in the WorkflowV2. See the documentation of [`kili.assets()`](https://python-sdk-docs.kili-technology.com/latest/sdk/asset/#kili.queries.asset.__init__.QueriesAsset.assets) for more information. normalized_coordinates: This parameter is only effective on the Kili (a.k.a raw) format. @@ -1204,6 +1210,31 @@ def is_rectangle(coco_annotation, coco_image, kili_annotation): else: resolved_asset_ids = cast(List[AssetId], asset_ids) + if asset_filter_kwargs and ( + asset_filter_kwargs.get("step_name_in") is not None + or asset_filter_kwargs.get("step_status_in") is not None + or asset_filter_kwargs.get("status_in") is not None + or asset_filter_kwargs.get("skipped") is not None + ): + project_use_cases = ProjectUseCases(self.kili_api_gateway) + project_steps = project_use_cases.get_project_steps(project_id) + + step_name_in = asset_filter_kwargs.get("step_name_in") + step_status_in = asset_filter_kwargs.get("step_status_in") + status_in = asset_filter_kwargs.get("status_in") + skipped = asset_filter_kwargs.get("skipped") + if step_name_in is not None or step_status_in is not None or status_in is not None: + step_id_in = convert_step_in_to_step_id_in_filter( + project_steps=project_steps, asset_filter_kwargs=asset_filter_kwargs + ) + asset_filter_kwargs.pop("step_name_in", None) + asset_filter_kwargs["step_id_in"] = step_id_in + elif skipped is not None and len(project_steps) != 0: + warnings.warn( + "Filter skipped given : only use filter step_status_in with the SKIPPED step status instead for this project", + stacklevel=1, + ) + try: return export_labels( self, # pyright: ignore[reportGeneralTypeIssues] diff --git a/src/kili/services/export/tools.py b/src/kili/services/export/tools.py index b67157c04..cc4981f45 100644 --- a/src/kili/services/export/tools.py +++ b/src/kili/services/export/tools.py @@ -140,6 +140,8 @@ def fetch_assets( "inference_mark_gte": asset_filter_kwargs.pop("inference_mark_gte", None), "inference_mark_lte": asset_filter_kwargs.pop("inference_mark_lte", None), "metadata_where": asset_filter_kwargs.pop("metadata_where", None), + "step_id_in": asset_filter_kwargs.pop("step_id_in", None), + "step_status_in": asset_filter_kwargs.pop("step_status_in", None), } if asset_filter_kwargs: diff --git a/src/kili/use_cases/project/project.py b/src/kili/use_cases/project/project.py index f04a6a21d..bb9ed86e5 100644 --- a/src/kili/use_cases/project/project.py +++ b/src/kili/use_cases/project/project.py @@ -1,7 +1,7 @@ """Project use cases.""" import json -from typing import Dict, Generator, Optional +from typing import Dict, Generator, List, Optional from tenacity import Retrying from tenacity.retry import retry_if_exception_type @@ -12,7 +12,7 @@ from kili.adapters.kili_api_gateway.project.mappers import project_data_mapper from kili.adapters.kili_api_gateway.project.types import ProjectDataKiliAPIGatewayInput from kili.core.enums import DemoProjectType, ProjectType -from kili.domain.project import ComplianceTag, InputType, ProjectFilters, ProjectId +from kili.domain.project import ComplianceTag, InputType, ProjectFilters, ProjectId, ProjectStep from kili.domain.types import ListOrTuple from kili.exceptions import NotFound from kili.use_cases.base import BaseUseCases @@ -183,3 +183,18 @@ def update_properties_in_project( fields += ("id",) return self._kili_api_gateway.update_properties_in_project(project_id, project_data, fields) + + def get_project_steps( + self, + project_id: str, + ) -> List[ProjectStep]: + """Get and return project steps.""" + project_filters = ProjectFilters(id=ProjectId(project_id)) + options = QueryOptions(disable_tqdm=True, first=1) + project_gen = self.list_projects( + project_filters=project_filters, fields=["steps.id", "steps.name"], options=options + ) + projects = list(project_gen) + project = projects[0] + + return project["steps"]