diff --git a/.infra/prod/values.yaml b/.infra/prod/values.yaml index 1fa707e82..09808b83e 100644 --- a/.infra/prod/values.yaml +++ b/.infra/prod/values.yaml @@ -2,13 +2,13 @@ stack: services: apiv2: image: - tag: sha-2e0e7d0 + tag: sha-3a64fa9 initContainers: # Install cerbos policies where the cerbos sidecar can grab them. - name: install-cerbos-policies image: repository: 533267185808.dkr.ecr.us-west-2.amazonaws.com/core-platform/cryoet-data-portal-backend/apiv2/apiv2 - tag: sha-2e0e7d0 + tag: sha-3a64fa9 command: ["cp", "-r", "./cerbos/", "/var/policies/"] volumeMounts: - mountPath: /var/policies @@ -24,7 +24,7 @@ stack: - name: run-migrations image: repository: 533267185808.dkr.ecr.us-west-2.amazonaws.com/core-platform/cryoet-data-portal-backend/apiv2/apiv2 - tag: sha-2e0e7d0 + tag: sha-3a64fa9 command: ["alembic", "upgrade", "head"] resources: limits: @@ -37,7 +37,7 @@ stack: - name: gen-keypair image: repository: 533267185808.dkr.ecr.us-west-2.amazonaws.com/core-platform/cryoet-data-portal-backend/apiv2/apiv2 - tag: sha-2e0e7d0 + tag: sha-3a64fa9 command: ["bash", "./etc/gen_keys.sh", "/var/keys/"] volumeMounts: - mountPath: /var/keys diff --git a/.infra/staging/values.yaml b/.infra/staging/values.yaml index 1fa707e82..09808b83e 100644 --- a/.infra/staging/values.yaml +++ b/.infra/staging/values.yaml @@ -2,13 +2,13 @@ stack: services: apiv2: image: - tag: sha-2e0e7d0 + tag: sha-3a64fa9 initContainers: # Install cerbos policies where the cerbos sidecar can grab them. - name: install-cerbos-policies image: repository: 533267185808.dkr.ecr.us-west-2.amazonaws.com/core-platform/cryoet-data-portal-backend/apiv2/apiv2 - tag: sha-2e0e7d0 + tag: sha-3a64fa9 command: ["cp", "-r", "./cerbos/", "/var/policies/"] volumeMounts: - mountPath: /var/policies @@ -24,7 +24,7 @@ stack: - name: run-migrations image: repository: 533267185808.dkr.ecr.us-west-2.amazonaws.com/core-platform/cryoet-data-portal-backend/apiv2/apiv2 - tag: sha-2e0e7d0 + tag: sha-3a64fa9 command: ["alembic", "upgrade", "head"] resources: limits: @@ -37,7 +37,7 @@ stack: - name: gen-keypair image: repository: 533267185808.dkr.ecr.us-west-2.amazonaws.com/core-platform/cryoet-data-portal-backend/apiv2/apiv2 - tag: sha-2e0e7d0 + tag: sha-3a64fa9 command: ["bash", "./etc/gen_keys.sh", "/var/keys/"] volumeMounts: - mountPath: /var/keys diff --git a/.release-please-manifest.json b/.release-please-manifest.json index e4fb0915c..b6ee3e42c 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - "apiv2": "1.13.2" + "apiv2": "1.14.0" } \ No newline at end of file diff --git a/apiv2/CHANGELOG.md b/apiv2/CHANGELOG.md index c0b823b8d..baab993da 100644 --- a/apiv2/CHANGELOG.md +++ b/apiv2/CHANGELOG.md @@ -1,5 +1,17 @@ # Changelog +## [1.14.0](https://github.com/chanzuckerberg/cryoet-data-portal-backend/compare/apiv2-v1.13.2...apiv2-v1.14.0) (2025-11-26) + + +### Features + +* add ingestion config (10453) for EMPIAR 12794 - isolated synaptic vesicles from mouse brain ([#538](https://github.com/chanzuckerberg/cryoet-data-portal-backend/issues/538)) ([584fc12](https://github.com/chanzuckerberg/cryoet-data-portal-backend/commit/584fc123e1ecbac4557cefbc299231b6f646548b)) + + +### Bug Fixes + +* neuroglancer config ingestion & db import (fixes staging tomogram viewer) ([#545](https://github.com/chanzuckerberg/cryoet-data-portal-backend/issues/545)) ([dee5844](https://github.com/chanzuckerberg/cryoet-data-portal-backend/commit/dee584432dc596e94323ea2e79f4e8b582145317)) + ## [1.13.2](https://github.com/chanzuckerberg/cryoet-data-portal-backend/compare/apiv2-v1.13.1...apiv2-v1.13.2) (2025-10-30) diff --git a/apiv2/database/migrations/versions/20251028_082407_autogenerated.py b/apiv2/database/migrations/versions/20251028_082407_autogenerated.py new file mode 100644 index 000000000..5dd43e1ff --- /dev/null +++ b/apiv2/database/migrations/versions/20251028_082407_autogenerated.py @@ -0,0 +1,31 @@ +"""autogenerated + +Create Date: 2025-10-28 14:24:13.316629 + +""" +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = '20251028_082407' +down_revision = '20250819_111511' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column('annotation_shape', 'shape_type', + existing_type=sa.VARCHAR(length=20), + type_=sa.Enum('SegmentationMask', 'OrientedPoint', 'Point', 'InstanceSegmentation', 'InstanceSegmentationMask', 'Mesh', name='annotation_file_shape_type_enum', native_enum=False), + existing_nullable=True) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column('annotation_shape', 'shape_type', + existing_type=sa.Enum('SegmentationMask', 'OrientedPoint', 'Point', 'InstanceSegmentation', 'InstanceSegmentationMask', 'Mesh', name='annotation_file_shape_type_enum', native_enum=False), + type_=sa.VARCHAR(length=20), + existing_nullable=True) + # ### end Alembic commands ### diff --git a/apiv2/db_import/common/config.py b/apiv2/db_import/common/config.py index 04424da2e..26ef01581 100644 --- a/apiv2/db_import/common/config.py +++ b/apiv2/db_import/common/config.py @@ -4,7 +4,7 @@ from datetime import datetime from functools import lru_cache from pathlib import PurePath -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Final import sqlalchemy as sa from botocore.exceptions import ClientError @@ -20,6 +20,9 @@ logger = logging.getLogger("config") +STAGING_URL: Final[str] = "https://files.cryoet.staging.si.czi.technology" +PROD_URL: Final[str] = "https://files.cryoetdataportal.cziscience.com" + class DBImportConfig: s3_client: S3Client @@ -41,7 +44,7 @@ def __init__( self.s3fs = s3fs self.bucket_name = bucket_name self.s3_prefix = f"s3://{bucket_name}" - self.https_prefix = https_prefix if https_prefix else "https://files.cryoetdataportal.cziscience.com" + self.https_prefix = https_prefix if https_prefix else PROD_URL self.session = session self.deposition_map: dict[int, models.Deposition] = {} @@ -133,11 +136,21 @@ def load_key_json(self, key: str, is_file_required: bool = True) -> dict[str, An Loads file matching the key value as json. If file does not exist, will raise error if is_file_required is True else it will return None. """ + text = self.load_key_text(key, is_file_required) + if text is None: + return None + return json.loads(text) + + def load_key_text(self, key: str, is_file_required: bool = True) -> str | None: + """ + Loads file matching the key value as text. If file does not exist, will raise error if is_file_required is True + else it will return None. + """ try: if key.startswith(self.bucket_name): key = key[len(self.bucket_name) + 1 :] text = self.s3_client.get_object(Bucket=self.bucket_name, Key=key) - return json.loads(text["Body"].read()) + return text["Body"].read().decode("utf-8") except ClientError as ex: if ex.response["Error"]["Code"] == "NoSuchKey" and not is_file_required: logger.warning("NoSuchKey on bucket_name=%s key=%s", self.bucket_name, key) diff --git a/apiv2/db_import/importers/tomogram.py b/apiv2/db_import/importers/tomogram.py index 5eff3d084..ea21f4908 100644 --- a/apiv2/db_import/importers/tomogram.py +++ b/apiv2/db_import/importers/tomogram.py @@ -3,6 +3,7 @@ from typing import Any from database import models +from db_import.common.config import PROD_URL, STAGING_URL from db_import.common.finders import MetadataFileFinder from db_import.common.normalize_fields import normalize_fiducial_alignment from db_import.importers.base import IntegratedDBImporter, ItemDBImporter @@ -39,6 +40,16 @@ class TomogramItem(ItemDBImporter): "is_visualization_default": ["is_visualization_default"], } + def _update_ng_urls(self, ng_config_text: str) -> str: + """ + A simple function to find and replace all production URLs in the neuroglancer config with the staging URL when in the staging environment. + This is done to ensure that the URLs in the neuroglancer config are correct for the staging environment. + """ + if self.config.https_prefix != STAGING_URL: + return ng_config_text + + return ng_config_text.replace(PROD_URL, self.config.https_prefix) + def normalize_to_unknown_str(self, value: str) -> str: return value.replace(" ", "_") if value else "Unknown" @@ -47,9 +58,13 @@ def generate_neuroglancer_data(self, path) -> str | None: # Handle the case where there is no neuroglancer config file specified which is expected when # visualization_default is set to False. return None - config = self.config.load_key_json(path, is_file_required=False) + ng_config_text = self.config.load_key_text(path, is_file_required=False) + if ng_config_text: + ng_config_text = self._update_ng_urls(ng_config_text) + ng_config_json = json.loads(ng_config_text) + # TODO: Log warning - return json.dumps(config, separators=(",", ":")) if config else "{}" + return json.dumps(ng_config_json, separators=(",", ":")) if ng_config_json else "{}" def load_computed_fields(self): https_prefix = self.config.https_prefix diff --git a/apiv2/db_import/tests/conftest.py b/apiv2/db_import/tests/conftest.py index deaf26799..ea7421277 100644 --- a/apiv2/db_import/tests/conftest.py +++ b/apiv2/db_import/tests/conftest.py @@ -133,5 +133,5 @@ def expected_dataset(http_prefix: str) -> dict[str, Any]: "key_photo_url": f"{http_prefix}/{DATASET_ID}/KeyPhoto/snapshot.png", "key_photo_thumbnail_url": f"{http_prefix}/{DATASET_ID}/KeyPhoto/thumbnail.png", "deposition_id": 300, - "file_size": 1374354.0, + "file_size": 1374808.0, } diff --git a/apiv2/db_import/tests/test_db_annotation_import.py b/apiv2/db_import/tests/test_db_annotation_import.py index 283bf13d7..2ac4ef341 100644 --- a/apiv2/db_import/tests/test_db_annotation_import.py +++ b/apiv2/db_import/tests/test_db_annotation_import.py @@ -53,6 +53,24 @@ def expected_annotations(http_prefix: str) -> list[dict[str, Any]]: def expected_annotation_files(http_prefix: str) -> list[dict[str, Any]]: path = f"{DATASET_ID}/RUN1/Reconstructions/VoxelSpacing12.300/Annotations/" return [ + { + "tomogram_voxel_spacing_id": TOMOGRAM_VOXEL_ID1, + "s3_path": f"s3://test-public-bucket/{path}100-foo-1.0_instancesegmask.mrc", + "https_path": f"{http_prefix}/{path}100-foo-1.0_instancesegmask.mrc", + "source": "community", + "format": "mrc", + "is_visualization_default": False, + "file_size": 0, + }, + { + "tomogram_voxel_spacing_id": TOMOGRAM_VOXEL_ID1, + "s3_path": f"s3://test-public-bucket/{path}100-foo-1.0_instancesegmask.zarr", + "https_path": f"{http_prefix}/{path}100-foo-1.0_instancesegmask.zarr", + "source": "community", + "format": "zarr", + "is_visualization_default": False, + "file_size": 0, + }, { "id": ANNOTATION_FILE_ID, "tomogram_voxel_spacing_id": TOMOGRAM_VOXEL_ID1, diff --git a/apiv2/graphql_api/schema.graphql b/apiv2/graphql_api/schema.graphql index 653aa61a6..e2271c0d0 100644 --- a/apiv2/graphql_api/schema.graphql +++ b/apiv2/graphql_api/schema.graphql @@ -1429,7 +1429,7 @@ type AnnotationShape implements EntityInterface & Node { annotationFilesAggregate(where: AnnotationFileWhereClause = null): AnnotationFileAggregate """ - The shape of the annotation (SegmentationMask, OrientedPoint, Point, InstanceSegmentation, Mesh) + The shape of the annotation (SegmentationMask, OrientedPoint, Point, InstanceSegmentation, Mesh, InstanceSegmentationMask) """ shapeType: annotation_file_shape_type_enum } @@ -1479,7 +1479,7 @@ input AnnotationShapeCreateInput { annotationId: ID = null """ - The shape of the annotation (SegmentationMask, OrientedPoint, Point, InstanceSegmentation, Mesh) + The shape of the annotation (SegmentationMask, OrientedPoint, Point, InstanceSegmentation, Mesh, InstanceSegmentationMask) """ shapeType: annotation_file_shape_type_enum = null @@ -1522,7 +1522,7 @@ input AnnotationShapeUpdateInput { annotationId: ID = null """ - The shape of the annotation (SegmentationMask, OrientedPoint, Point, InstanceSegmentation, Mesh) + The shape of the annotation (SegmentationMask, OrientedPoint, Point, InstanceSegmentation, Mesh, InstanceSegmentationMask) """ shapeType: annotation_file_shape_type_enum = null @@ -4592,7 +4592,7 @@ type PerSectionParameters implements EntityInterface & Node { """ minorDefocus: Float - """Phase shift estimated for this tilt image in degrees.""" + """Phase shift estimated for this tilt image in radians.""" phaseShift: Float """Nominal tilt angle for this tilt image reported by the microscope.""" @@ -4674,7 +4674,7 @@ input PerSectionParametersCreateInput { """ minorDefocus: Float = null - """Phase shift estimated for this tilt image in degrees.""" + """Phase shift estimated for this tilt image in radians.""" phaseShift: Float = null """Nominal tilt angle for this tilt image reported by the microscope.""" @@ -4774,7 +4774,7 @@ input PerSectionParametersUpdateInput { """ minorDefocus: Float = null - """Phase shift estimated for this tilt image in degrees.""" + """Phase shift estimated for this tilt image in radians.""" phaseShift: Float = null """Nominal tilt angle for this tilt image reported by the microscope.""" @@ -7098,6 +7098,7 @@ enum annotation_file_shape_type_enum { OrientedPoint Point InstanceSegmentation + InstanceSegmentationMask Mesh } diff --git a/apiv2/graphql_api/schema.json b/apiv2/graphql_api/schema.json index 3cea3dce7..128d34079 100644 --- a/apiv2/graphql_api/schema.json +++ b/apiv2/graphql_api/schema.json @@ -12321,6 +12321,9 @@ { "name": "InstanceSegmentation" }, + { + "name": "InstanceSegmentationMask" + }, { "name": "Mesh" } diff --git a/apiv2/graphql_api/types/annotation_shape.py b/apiv2/graphql_api/types/annotation_shape.py index 5f8d47536..326fc065a 100644 --- a/apiv2/graphql_api/types/annotation_shape.py +++ b/apiv2/graphql_api/types/annotation_shape.py @@ -197,7 +197,7 @@ class AnnotationShape(EntityInterface): Annotated["AnnotationFileAggregate", strawberry.lazy("graphql_api.types.annotation_file")] ] = load_annotation_file_aggregate_rows # type:ignore shape_type: Optional[annotation_file_shape_type_enum] = strawberry.field( - description="The shape of the annotation (SegmentationMask, OrientedPoint, Point, InstanceSegmentation, Mesh)", + description="The shape of the annotation (SegmentationMask, OrientedPoint, Point, InstanceSegmentation, Mesh, InstanceSegmentationMask)", default=None, ) id: int = strawberry.field(description="Numeric identifier (May change!)") @@ -312,7 +312,7 @@ class AnnotationShapeCreateInput: description="Metadata about an shapes for an annotation", default=None, ) shape_type: Optional[annotation_file_shape_type_enum] = strawberry.field( - description="The shape of the annotation (SegmentationMask, OrientedPoint, Point, InstanceSegmentation, Mesh)", + description="The shape of the annotation (SegmentationMask, OrientedPoint, Point, InstanceSegmentation, Mesh, InstanceSegmentationMask)", default=None, ) id: int = strawberry.field(description="Numeric identifier (May change!)") @@ -324,7 +324,7 @@ class AnnotationShapeUpdateInput: description="Metadata about an shapes for an annotation", default=None, ) shape_type: Optional[annotation_file_shape_type_enum] = strawberry.field( - description="The shape of the annotation (SegmentationMask, OrientedPoint, Point, InstanceSegmentation, Mesh)", + description="The shape of the annotation (SegmentationMask, OrientedPoint, Point, InstanceSegmentation, Mesh, InstanceSegmentationMask)", default=None, ) id: Optional[int] = strawberry.field(description="Numeric identifier (May change!)") diff --git a/apiv2/graphql_api/types/per_section_parameters.py b/apiv2/graphql_api/types/per_section_parameters.py index 04d9bf893..d18180551 100644 --- a/apiv2/graphql_api/types/per_section_parameters.py +++ b/apiv2/graphql_api/types/per_section_parameters.py @@ -207,7 +207,7 @@ class PerSectionParameters(EntityInterface): default=None, ) phase_shift: Optional[float] = strawberry.field( - description="Phase shift estimated for this tilt image in degrees.", default=None, + description="Phase shift estimated for this tilt image in radians.", default=None, ) raw_angle: float = strawberry.field( description="Nominal tilt angle for this tilt image reported by the microscope.", @@ -361,7 +361,7 @@ class PerSectionParametersCreateInput: default=None, ) phase_shift: Optional[float] = strawberry.field( - description="Phase shift estimated for this tilt image in degrees.", default=None, + description="Phase shift estimated for this tilt image in radians.", default=None, ) raw_angle: float = strawberry.field( description="Nominal tilt angle for this tilt image reported by the microscope.", @@ -388,7 +388,7 @@ class PerSectionParametersUpdateInput: default=None, ) phase_shift: Optional[float] = strawberry.field( - description="Phase shift estimated for this tilt image in degrees.", default=None, + description="Phase shift estimated for this tilt image in radians.", default=None, ) raw_angle: Optional[float] = strawberry.field( description="Nominal tilt angle for this tilt image reported by the microscope.", diff --git a/apiv2/schema/schema.yaml b/apiv2/schema/schema.yaml index 212bdaf79..b9302cc69 100644 --- a/apiv2/schema/schema.yaml +++ b/apiv2/schema/schema.yaml @@ -279,6 +279,9 @@ enums: InstanceSegmentation: text: InstanceSegmentation description: A volume with labels for multiple instances + InstanceSegmentationMask: + text: InstanceSegmentationMask + description: A mask with labels for multiple instances Mesh: text: Mesh description: A surface mesh volumes @@ -922,11 +925,11 @@ classes: annotations: cascade_delete: true shape_type: - description: The shape of the annotation (SegmentationMask, OrientedPoint, Point, InstanceSegmentation, Mesh) + description: The shape of the annotation (SegmentationMask, OrientedPoint, Point, InstanceSegmentation, Mesh, InstanceSegmentationMask) name: shape_type from_schema: cdp-dataset-config range: annotation_file_shape_type_enum - pattern: (^SegmentationMask$)|(^OrientedPoint$)|(^Point$)|(^InstanceSegmentation$)|(^Mesh$) + pattern: (^SegmentationMask$)|(^OrientedPoint$)|(^Point$)|(^InstanceSegmentation$)|(^Mesh$)|(^InstanceSegmentationMask$) Annotation: name: Annotation annotations: @@ -1923,7 +1926,7 @@ classes: descriptive_name: Angstrom phase_shift: name: phase_shift - description: Phase shift estimated for this tilt image in degrees. + description: Phase shift estimated for this tilt image in radians. exact_mappings: - per_section_phase_shift range: float diff --git a/apiv2/support/enums.py b/apiv2/support/enums.py index 25963c814..f281dcaf4 100644 --- a/apiv2/support/enums.py +++ b/apiv2/support/enums.py @@ -38,6 +38,7 @@ class annotation_file_shape_type_enum(enum.StrEnum): OrientedPoint = "OrientedPoint" Point = "Point" InstanceSegmentation = "InstanceSegmentation" + InstanceSegmentationMask = "InstanceSegmentationMask" Mesh = "Mesh" diff --git a/apiv2/test_infra/factories/annotation_shape.py b/apiv2/test_infra/factories/annotation_shape.py index c31650944..6de536ad4 100644 --- a/apiv2/test_infra/factories/annotation_shape.py +++ b/apiv2/test_infra/factories/annotation_shape.py @@ -32,7 +32,9 @@ class Meta: annotation = factory.SubFactory( AnnotationFactory, ) - shape_type = fuzzy.FuzzyChoice(["SegmentationMask", "OrientedPoint", "Point", "InstanceSegmentation", "Mesh"]) + shape_type = fuzzy.FuzzyChoice( + ["SegmentationMask", "OrientedPoint", "Point", "InstanceSegmentation", "InstanceSegmentationMask", "Mesh"], + ) # Auto increment integer identifiers starting with 1 id = factory.Sequence(lambda n: n + 1) diff --git a/ingestion_tools/Dockerfile b/ingestion_tools/Dockerfile index 39157d73e..927cad7fb 100644 --- a/ingestion_tools/Dockerfile +++ b/ingestion_tools/Dockerfile @@ -13,10 +13,10 @@ RUN update-ca-certificates # IMOD helps with MRC ingestion # bio3d.colorado.edu isn't sending an intermediate cert, which breaks openssl (no AIA support). So we're installing it here. -RUN wget http://crt.sectigo.com/SectigoRSAOrganizationValidationSecureServerCA.crt && \ - openssl x509 -in SectigoRSAOrganizationValidationSecureServerCA.crt -out /tmp/intermediate.pem && \ - wget --ca-certificate /tmp/intermediate.pem https://bio3d.colorado.edu/imod/AMD64-RHEL5/imod_4.11.24_RHEL7-64_CUDA10.1.sh \ - && bash imod_4.11.24_RHEL7-64_CUDA10.1.sh -yes && rm /tmp/intermediate.pem +RUN wget -O- https://sectigo.tbs-certificats.com/SectigoPublicServerAuthenticationCAOVR36.crt \ + | openssl x509 -out /tmp/intermediate.pem \ + && wget --ca-certificate /tmp/intermediate.pem https://bio3d.colorado.edu/imod/AMD64-RHEL5/imod_4.11.24_RHEL7-64_CUDA10.1.sh \ + && bash imod_4.11.24_RHEL7-64_CUDA10.1.sh -yes && rm /tmp/intermediate.pem # Install the AWS CLI RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && unzip awscliv2.zip && ./aws/install && rm -rf ./aws diff --git a/ingestion_tools/dataset_configs/10453.yaml b/ingestion_tools/dataset_configs/10453.yaml new file mode 100644 index 000000000..6a5ca5ca4 --- /dev/null +++ b/ingestion_tools/dataset_configs/10453.yaml @@ -0,0 +1,359 @@ +alignments: + - metadata: + alignment_type: GLOBAL + format: IMOD + is_portal_standard: true + method_type: fiducial_based + sources: + - source_multi_glob: + list_globs: + - Stacks/tlt_files_corrected/{run_name}.tlt + - Stacks/xf_files/{run_name}.xf +annotations: + - metadata: + annotation_method: Manual particle picking on 16x binned tomograms; 60x60x60 voxels + annotation_object: + id: GO:0033176 + name: proton-transporting V-type ATPase complex + annotation_software: manual picking + authors: &id001 + - name: Uljana Kravčenko + primary_author_status: true + corresponding_author_status: false + ORCID: 0009-0004-7176-8444 + - name: Max Ruwolt + primary_author_status: false + corresponding_author_status: false + - name: Jana Kroll + primary_author_status: false + corresponding_author_status: false + ORCID: 0000-0003-4243-4088 + - name: Artsemi Yushkevich + primary_author_status: false + corresponding_author_status: false + ORCID: 0000-0002-8729-9281 + - name: Martina Zenkner + primary_author_status: false + corresponding_author_status: false + - name: Julia Ruta + primary_author_status: false + corresponding_author_status: false + ORCID: 0009-0007-0524-6885 + - name: Rowaa Lotfy + primary_author_status: false + corresponding_author_status: false + - name: Erich E. Wanker + primary_author_status: false + corresponding_author_status: false + ORCID: 0000-0001-8072-1630 + - name: Christian Rosenmund + primary_author_status: false + corresponding_author_status: false + ORCID: 0000-0002-3905-2444 + - name: Fan Liu + primary_author_status: false + corresponding_author_status: false + - name: Mikhail Kudryashev + primary_author_status: false + corresponding_author_status: true + ORCID: 0000-0003-3550-6274 + email: mikhail.kudryashev@mdc-berlin.de + dates: &id002 + deposition_date: "2025-11-17" + last_modified_date: "2025-11-17" + release_date: "2025-11-17" + ground_truth_status: true + is_curator_recommended: true + method_links: &id003 + - custom_name: TomoBEAR + link: https://github.com/KudryashevLab/TomoBEAR + link_type: source_code + - custom_name: Dynamo + link: https://dynamo-em.org/ + link_type: website + - custom_name: RELION 4 + link: https://relion.readthedocs.io/ + link_type: documentation + method_type: manual + version: 1.0 + sources: + - OrientedPoint: + binning: 16 + order: xyz + file_format: relion4_star + glob_string: particle_tables/V_ATPases_dataset_1_relion_3D_refine_superres.star + is_visualization_default: true + filter_value: "{run_name}" + - metadata: + annotation_method: Manual particle picking on 16x binned tomograms; 116x116x116 voxels + annotation_object: + id: GO:0030118 + name: clathrin coat + annotation_software: manual picking + authors: *id001 + dates: *id002 + ground_truth_status: true + is_curator_recommended: true + method_links: *id003 + method_type: manual + version: 1.0 + sources: + - OrientedPoint: + binning: 16 + order: xyz + file_format: relion4_star + glob_string: particle_tables/CCVs_fragment_relion_dataset_1_fixed.star + is_visualization_default: true + filter_value: "{run_name}" + - metadata: + annotation_method: Final particle set of manually picked V-ATPases were used for a focused refinement on the Voa1 region (V0 domain). + annotation_object: + id: GO:0033179 + name: proton-transporting V-type ATPase, V0 domain + annotation_software: TomoBEAR, Dynamo, RELION 4, manual picking + authors: *id001 + dates: *id002 + ground_truth_status: true + is_curator_recommended: true + method_links: *id003 + method_type: hybrid + version: 1.0 + sources: + - OrientedPoint: + binning: 16 + order: xyz + file_format: relion4_star + glob_string: particle_tables/V0_Syp_focused_relion_3D_refine_superres_dataset_1.star + is_visualization_default: true + filter_value: "{run_name}" + - metadata: + annotation_method: Single clathrin triskelia were obtained by classifying CCV fragments, recentering on a clathrin hexamer with C6 symmetry, and cropping single triskelia. + annotation_object: + id: GO:0071439 + name: clathrin complex + annotation_software: manual picking, Dynamo, RELION 4 + authors: *id001 + dates: *id002 + ground_truth_status: true + is_curator_recommended: true + method_links: *id003 + method_type: hybrid + version: 1.0 + sources: + - OrientedPoint: + binning: 16 + order: xyz + file_format: relion4_star + glob_string: particle_tables/single_triskelia_dataset_1.star + is_visualization_default: true + filter_value: "{run_name}" +ctfs: + - metadata: + format: Gctf + sources: + - source_glob: + list_glob: "Stacks/defocus_files/{run_name}_defocus.txt" +dataset_keyphotos: + - sources: + - literal: + value: + snapshot: null + thumbnail: null +collection_metadata: + - sources: + - source_glob: + list_glob: combined_mdocs/{mapped_tilt_series_name}.mdoc +datasets: + - metadata: + assay: + name: microscopy assay + id: EFO:0002909 + authors: *id001 + cell_component: + name: synaptic vesicle + id: GO:0008021 + cross_references: &id004 + publications: 10.1073/pnas.2407375121 + related_database_entries: EMPIAR-12794, EMD-18578, EMD-18568, EMD-18557, EMD-18572, EMD-18556 + dataset_description: > + CryoET data of synaptic vesicles (SVs) isolated from mouse brain. Includes raw tiltseries movies, alignment, defocusm and tilt angle metadata, and reconstructed tomograms capturing vesicle ultrastructure and surrounding molecular features. + Samples were plunge-frozen and imaged on a Titan Krios microscope equipped with a Falcon 4 detector and energy filter. Data processing was performed using TomoBEAR for workflow management. + Aligned frames were motion-corrected with MotionCor2, and tiltseries alignment was carried out using DynamoTSA with manual refinement in IMOD based on 10-nm or 5-nm gold fiducial markers. + Defocus values were estimated using Gctf, and CTF correction was applied with ctfphaseflip from IMOD. Final reconstructions were generated by weighted backprojection in IMOD. + dataset_identifier: 10453 + dataset_title: CryoET of isolated synaptic vesicles from mouse brain + dates: *id002 + development_stage: + name: 5-week-old stage + id: MmusDv:0000150 + disease: + name: normal + id: PATO:0000461 + funding: + - funding_agency_name: German Research Foundation (DFG) + grant_id: INST 335/588-1 FUGG + - funding_agency_name: German Research Foundation (DFG) + grant_id: KU3222/3-1 + - funding_agency_name: German Research Foundation (DFG) + grant_id: "458275811" + - funding_agency_name: German Research Foundation (DFG) + grant_id: "399894546" + - funding_agency_name: German Research Foundation (DFG) + grant_id: "436260754" + - funding_agency_name: European Research Council (ERC) + grant_id: ERC-STG 949184 + - funding_agency_name: Leibniz Association + grant_id: P70/2018 + - funding_agency_name: Helmholtz Society + grant_id: null + - funding_agency_name: Chemical Industry Fund of the German Chemical Industry Association (Kekulé Fellowship) + grant_id: null + - funding_agency_name: DiGiTal program (Berliner Chancengleichheitsprogramm, BCP) + grant_id: null + grid_preparation: > + Glow-discharged UltrAuFoil R1.2/1.3 or Quantifoil R2/1 Cu/Au grids were coated with collagen/poly-D-lysine. + For vesicle samples, 4 µL of vesicle-gold fiducial mixture (10 nm or 5 nm gold, 1:10) was applied, blotted 3 s at 4 °C / 98% RH, + and plunge-frozen in liquid ethane (Vitrobot). For neuronal grids, 4 µL freezing buffer with 10 nm BSA-gold fiducials was added + before backside blotting (16 s, 37 °C, 80% RH) and plunge-freezing. + organism: + name: Mus musculus + taxonomy_id: 10090 + sample_preparation: > + Synaptic vesicles were isolated from hippocampus, cortex, and cerebellum of 40-day-old mice via differential + centrifugation (LP2 fraction) in HEPES-sucrose buffer. Three vesicle conditions were prepared: untreated, ATP-treated + (4.5 µM ATP, 5 µM MgCl₂), and bafilomycin A1-treated (90 nM). For neuron-on-grid samples, cells expressing + ChR2(E123T/T159C) were briefly optogenetically stimulated prior to freezing. + sample_type: organelle + tissue: + name: brain + id: UBERON:0000955 + sources: + - literal: + value: + - '10453' +depositions: + - metadata: + authors: *id001 + cross_references: *id004 + dates: *id002 + deposition_description: > + Cryo-electron tomography data of synaptic vesicles isolated from mouse brain tissue. The deposition includes raw + tiltseries movies, alignment transforms, defocus/tilt metadata, aligned stacks, and CTF-corrected tomograms. + Samples were plunge-frozen and imaged on a 300 kV Titan Krios with a Falcon 4 detector and energy filter. Processing + used TomoBEAR, with MotionCor2 for frame alignment, DynamoTSA/IMOD for tiltseries alignment, Gctf and IMOD + ctfphaseflip for CTF handling, and weighted back-projection in IMOD for reconstruction. + deposition_identifier: 10334 + deposition_title: CryoET of isolated synaptic vesicles from mouse brain + deposition_types: + - dataset + sources: + - literal: + value: + - 10334 +deposition_keyphotos: +- sources: + - literal: + value: + snapshot: cryoetportal-rawdatasets-dev/deposition_key_photos/deposition_10334_snapshot.png + thumbnail: cryoetportal-rawdatasets-dev/deposition_key_photos/deposition_10334_thumbnail.png +frames: + - metadata: + dose_rate: float {tilt_series_dose_rate} + is_gain_corrected: false + sources: + - source_multi_glob: + list_globs: + - frames_1/{mapped_tilt_series_name}*.tif + - frames_2/{mapped_tilt_series_name}*.tif +gains: + - sources: + - source_glob: + list_glob: "{gain_file}" +rawtilts: + - sources: + - source_glob: + list_glob: Stacks/rawtlt_files_corrected/{run_name}.rawtlt +runs: + - sources: + - source_glob: + list_glob: Stacks/st_files/*.st + match_regex: .* + name_regex: (.*)\.st$ + exclude: # missing frames. + - tomogram_002 + - tomogram_012 + - tomogram_023 + - tomogram_043 +standardization_config: + deposition_id: 10334 + run_data_map_file: run_to_data_map.tsv + source_prefix: EMPIAR/12794/data/ +tiltseries: + - metadata: + acceleration_voltage: 300000 + binning_from_frames: 1 + camera: + manufacturer: Gatan + model: K3 + data_acquisition_software: SerialEM + is_aligned: false + microscope: + manufacturer: TFS + model: TITAN KRIOS G3i + microscope_optical_setup: + energy_filter: Gatan Bioquantum + pixel_spacing: 0.84 + spherical_aberration_constant: 2.7 + tilt_axis: float {tilt_axis} + tilt_range: + max: float {tilt_series_max_tilt} + min: float {tilt_series_min_tilt} + tilt_series_quality: int {tilt_series_quality} + tilt_step: 3.0 + tilting_scheme: dose-symmetric + total_flux: float {tilt_series_total_flux} + sources: + - source_glob: + list_glob: Stacks/st_files/{run_name}.st +tomograms: + - metadata: + affine_transformation_matrix: + - - 1 + - 0 + - 0 + - 0 + - - 0 + - 1 + - 0 + - 0 + - - 0 + - 0 + - 1 + - 0 + - - 0 + - 0 + - 0 + - 1 + authors: *id001 + ctf_corrected: true + dates: *id002 + fiducial_alignment_status: FIDUCIAL + is_visualization_default: true + offset: + x: 0 + y: 0 + z: 0 + processing: raw + reconstruction_method: WBP + reconstruction_software: IMOD + tomogram_version: 1 + voxel_spacing: 13.440 + sources: + - source_glob: + list_glob: tomograms/{run_name}_bin_16.rec +version: 1.0.0 +voxel_spacings: + - sources: + - literal: + value: + - 13.440 diff --git a/ingestion_tools/dataset_configs/10456.yaml b/ingestion_tools/dataset_configs/10456.yaml new file mode 100644 index 000000000..10797278e --- /dev/null +++ b/ingestion_tools/dataset_configs/10456.yaml @@ -0,0 +1,499 @@ +alignments: +- metadata: + alignment_type: LOCAL + format: ARETOMO3 + is_portal_standard: true + method_type: projection_matching + sources: + - source_multi_glob: + list_globs: + - '25jul29a/{run_name}.aln' +annotations: + - metadata: + annotation_method: Prediction using membrain-seg without rescaling and weights MemBrain_seg_v10_alpha.ckpt + annotation_object: + id: GO:0016020 + name: membrane + annotation_publications: 10.1101/2024.01.05.574336 + annotation_software: membrain-seg 0.0.1 + authors: + - ORCID: 0000-0002-8063-6951 + corresponding_author_status: true + name: Jonathan Schwartz + primary_author_status: true + - ORCID: 0000-0002-5940-3897 + corresponding_author_status: true + name: Ariana Peck + primary_author_status: true + - name: Utz Heinrich Ermel + ORCID: 0000-0003-4685-037X + corresponding_author_status: true + primary_author_status: true + - ORCID: 0000-0002-3248-9678 + corresponding_author_status: true + name: Yue Yu + primary_author_status: true + dates: + deposition_date: '2025-11-24' + last_modified_date: '2025-11-24' + release_date: '2025-11-24' + ground_truth_status: false + is_curator_recommended: true + method_links: + - custom_name: membrain-seg on GitHub + link: https://github.com/teamtomo/membrain-seg + link_type: source_code + - custom_name: membrain-seg on PyPI + link: https://pypi.org/project/membrain-seg/ + link_type: other + method_type: automated + version: 1.0 + sources: + - SemanticSegmentationMask: + file_format: mrc + glob_strings: + - 'project/ExperimentRuns/{run_name}/Segmentations/9.990_membrain-seg_final_membrane.zarr' + is_visualization_default: true + mask_label: 1 + rescale: true + threshold: 0.5 + - metadata: + annotation_method: SABER prediction + annotation_object: + id: GO:0005764 + name: lysosome + annotation_software: SABER + authors: + - ORCID: 0000-0002-8063-6951 + corresponding_author_status: false + name: Jonathan Schwartz + primary_author_status: true + - name: Dari Kimanius + ORCID: 0000-0002-2662-6373 + corresponding_author_status: true + primary_author_status: false + dates: + deposition_date: '2025-11-24' + last_modified_date: '2025-11-24' + release_date: '2025-11-24' + ground_truth_status: false + is_curator_recommended: true + method_links: + - custom_name: SABER on GitHub + link: https://github.com/chanzuckerberg/saber + link_type: source_code + - custom_name: SABER Documentation + link: https://chanzuckerberg.github.io/saber/ + link_type: documentation + method_type: automated + version: 1.0 + sources: + - SemanticSegmentationMask: + file_format: mrc + glob_strings: + - 'project/ExperimentRuns/{run_name}/Segmentations/9.990_saber-refined_final_lysosome.zarr' + is_visualization_default: true + mask_label: 1 + rescale: true + threshold: 0.5 + - metadata: + annotation_method: nnInteractive prediction + mcm-cryoET smoothing + annotation_object: + id: GO:0005739 + name: mitochondrion + annotation_software: nnInteractive + authors: + - ORCID: 0000-0003-4685-037X + corresponding_author_status: true + name: Utz Heinrich Ermel + primary_author_status: true + dates: + deposition_date: '2025-11-24' + last_modified_date: '2025-11-24' + release_date: '2025-11-24' + ground_truth_status: false + is_curator_recommended: true + method_links: + - custom_name: nnInteractive on GitHub + link: https://github.com/MIC-DKFZ/nnInteractive + link_type: source_code + - custom_name: mcm-cryoET on GitHub + link: https://github.com/FrangakisLab/mcm-cryoet + link_type: source_code + method_type: automated + version: 1.0 + sources: + - SemanticSegmentationMask: + file_format: mrc + glob_strings: + - 'project/ExperimentRuns/{run_name}/Segmentations/14.985_nninteractive_final_mitochondrion.zarr' + is_visualization_default: true + mask_label: 1 + rescale: true + threshold: 0.5 + - metadata: + annotation_method: octopi prediction + 2D/3D refinement in RELION 5 + 3D classification in RELION 5 + annotation_object: + id: GO:0022626 + name: cytosolic ribosome + annotation_software: octopi 1.2.0 + authors: + - ORCID: 0009-0003-2008-9583 + corresponding_author_status: false + name: Daniel Ji + primary_author_status: true + - name: Utz Heinrich Ermel + ORCID: 0000-0003-4685-037X + corresponding_author_status: true + primary_author_status: false + dates: + deposition_date: '2025-11-24' + last_modified_date: '2025-11-24' + release_date: '2025-11-24' + ground_truth_status: false + is_curator_recommended: true + method_links: + - custom_name: Octopi on GitHub + link: https://github.com/chanzuckerberg/octopi + link_type: source_code + - custom_name: Octopi documentation + link: https://chanzuckerberg.github.io/octopi/ + link_type: documentation + - custom_name: RELION on GitHub + link: https://github.com/3dem/relion + link_type: source_code + method_type: automated + version: 1.0 + sources: + - OrientedPoint: + binning: 14.985 + file_format: copick + glob_strings: + - 'project/ExperimentRuns/{run_name}/Picks/pyrelion_refine3d-job004-orientations-0-01-cutoff_ribosome.json' + is_visualization_default: true + mesh_source_path: 'project/mesh/ribosome.glb' + - metadata: + annotation_method: nnInteractive prediction + copick-utils spline fit + 3D refinement in TOM + annotation_object: + id: GO:0005874 + name: microtubule + annotation_software: nnInteractive + authors: + - ORCID: 0000-0001-8560-7407 + corresponding_author_status: true + name: Julia Peukes + primary_author_status: true + - name: Utz Heinrich Ermel + ORCID: 0000-0003-4685-037X + corresponding_author_status: true + primary_author_status: true + dates: + deposition_date: '2025-11-24' + last_modified_date: '2025-11-24' + release_date: '2025-11-24' + ground_truth_status: false + is_curator_recommended: true + method_links: + - custom_name: nnInteractive on GitHub + link: https://github.com/MIC-DKFZ/nnInteractive + link_type: source_code + - custom_name: copick-utils on GitHub + link: https://github.com/copick/copick-utils/ + link_type: source_code + - custom_name: TOM toolbox + link: https://www.biochem.mpg.de/6348566/tom_e + link_type: website + method_type: hybrid + version: 1.0 + sources: + - OrientedPoint: + binning: 14.985 + file_format: copick + glob_strings: + - 'project/ExperimentRuns/{run_name}/Picks/points_combined_mt.json' + is_visualization_default: true + mesh_source_path: 'project/mesh/microtubule.glb' +collection_metadata: +- sources: + - source_glob: + list_glob: '25jul29a/{run_name}.mdoc' +ctfs: +- metadata: + format: CTFFIND + sources: + - source_glob: + list_glob: '25jul29a/{run_name}_CTFFIND_format_CTF.txt' +depositions: + - metadata: + authors: &id001 + - name: Hannah Siems + primary_author_status: true + ORCID: 0009-0002-6674-7601 + - name: Garrett Greenan + primary_author_status: true + ORCID: 0000-0002-9045-7666 + - name: Nikki Jean + primary_author_status: false + corresponding_author_status: false + ORCID: 0009-0007-9777-9856 + - name: Daniel Ji + ORCID: 0009-0003-2008-9583 + primary_author_status: false + - name: Utz Heinrich Ermel + ORCID: 0000-0003-4685-037X + primary_author_status: false + - name: Julia Peukes + ORCID: 0000-0001-8560-7407 + primary_author_status: false + - name: Jonathan Schwartz + ORCID: 0000-0002-8063-6951 + primary_author_status: false + - name: Ariana Peck + ORCID: 0000-0002-5940-3897 + primary_author_status: false + - name: Yue Yu + ORCID: 0000-0002-3248-9678 + primary_author_status: false + - name: Elizabeth Montabana + primary_author_status: false + corresponding_author_status: false + ORCID: 0000-0002-6731-5854 + - name: Dari Kimanius + ORCID: 0000-0002-2662-6373 + corresponding_author_status: false + primary_author_status: false + - name: Matthias Haury + primary_author_status: false + corresponding_author_status: false + ORCID: 0000-0002-1796-1479 + - name: David Agard + primary_author_status: false + corresponding_author_status: false + ORCID: 0000-0003-3512-695X + - name: Bridget Carragher + primary_author_status: false + corresponding_author_status: false + ORCID: 0000-0002-0624-5020 + - name: Daniel Serwas + primary_author_status: false + corresponding_author_status: true + ORCID: 0000-0001-9010-7298 + dates: &id002 + deposition_date: '2025-11-24' + last_modified_date: '2025-11-24' + release_date: '2025-11-24' + deposition_description: + Human HEK293T cells lacking the lysosomal protein NPC1, which normally facilitates cholesterol transport from + the lysosome lumen to other cell compartments. Loss of NPC1 causes Niemann-Pick Type C, an ultra-rare and fatal + disease with no current treatment. These datasets capture organelle interactions and membrane dynamics in + disease-relevant cellular environments, providing a resource for studying lysosomal storage disorders at + nanometer resolution. + deposition_identifier: 10337 + deposition_title: CryoET of NPC1-Deficient Human HEK293T Cells + deposition_types: + - dataset + - annotation + sources: + - literal: + value: + - 10337 +deposition_keyphotos: + - sources: + - literal: + value: + snapshot: cryoetportal-rawdatasets-dev/deposition_key_photos/deposition_10337_snapshot.png + thumbnail: cryoetportal-rawdatasets-dev/deposition_key_photos/deposition_10337_thumbnail.png +datasets: + - metadata: + authors: *id001 + dataset_description: + Human HEK293T cells lacking the lysosomal cholesterol transporter NPC1 were cultured and plunge-frozen. This + dataset contains tomograms with comprehensive annotations of cellular features including lysosome clusters + within larger membrane compartments, abnormal mitochondrial morphology, microtubules, and ribosomes throughout + the cytoplasm. Membranes are annotated to reveal the complex architecture of lysosomal compartments and their + spatial relationships with other organelles. + dataset_identifier: 10456 + dataset_title: CryoET of NPC1-Deficient Human HEK293T Cells with Annotations for Multiple Cellular Features + dates: + deposition_date: '2025-11-24' + last_modified_date: '2025-11-24' + release_date: '2025-11-24' + grid_preparation: | + Prior to cell seeding Quantifoil (Au 200, SiO2 R1/4) grids were glow discharged and 4 grids were placed onto a + 20 mm glass-bottom Matek dish. Cells were seeded at a density of 100,000 cells per dish. Cells were incubated + overnight at 37°C and 5% CO2. Prior to plunge-freezing, cells were treated with LysoTracker™ Deep Red for 15-30 + mins. Prior to blotting, excess media was removed with Whatman 1 filter paper. 3 µl of PBS was added to the + grids before blotting and plunging into liquid ethane. Leica GP2 was used for grid preparation. Thin lamella + were milled using LysoTracker™ fluorescence for targeting using a Arctis CryoFib-SEM. + sample_preparation: | + NPC1-deletion HEK293T cells (DOI: 10.1126/science.aag1417) were grown overnight on Quantifoil (Au 200, SiO2 + R1/4) grids at 37ºC and 5% CO2. Cells were labeled with LysoTracker™ Deep Red 15-30 min before plunge freezing + to facilitate targeted lamella preparation. + funding: + - funding_agency_name: Chan Zuckerberg Initiative + grant_id: "CZII-2023\u2013327779" + sample_type: cell_line + organism: + name: Homo sapiens + taxonomy_id: 9606 + cell_strain: + name: HEK293T + id: CVCL_0063 + cell_type: + name: "kidney epithelial cell" + id: CL:0002518 + tissue: + name: "kidney epithelial cell" + id: CL:0002518 + assay: + name: "microscopy assay" + id: EFO:0002909 + development_stage: + name: unknown + id: unknown + disease: + name: "Niemann-Pick disease, type C1" + id: MONDO:0009757 + sources: + - literal: + value: + - '10456' +dataset_keyphotos: + - sources: + - literal: + value: + snapshot: null + thumbnail: null +frames: +- metadata: + dose_rate: 0.7704 + is_gain_corrected: false + sources: + - source_glob: + list_glob: '25jul29a/{run_name}/*.eer' +gains: +- sources: + - source_glob: + list_glob: '25jul29a/{gain_file}' +rawtilts: +- sources: + - source_glob: + list_glob: '25jul29a/{run_name}_st.rawtlt' +runs: + - sources: + - source_glob: + list_glob: '25jul29a/*.aln' + match_regex: .*\.aln$ + name_regex: (.*)\.aln$ +standardization_config: + deposition_id: 10337 + source_prefix: CZII/20251124_NPC1_example/ + run_data_map_file: 25jul29a/run_to_data_map_updated_quality_updated.tsv +tiltseries: +- metadata: + acceleration_voltage: 300000 + binning_from_frames: 1 + camera: + manufacturer: TFS + model: FALCON 4i + data_acquisition_software: Tomo5 + is_aligned: false + microscope: + manufacturer: TFS + model: TITAN KRIOS G4 + microscope_optical_setup: + energy_filter: Selectris X + phase_plate: Volta phase plate + pixel_spacing: 3.7 + spherical_aberration_constant: 2.7 + tilt_axis: -96 + tilt_range: + max: float {tilt_series_max_tilt} + min: float {tilt_series_min_tilt} + tilt_series_quality: int {tilt_series_quality} + tilt_step: 3.0 + tilting_scheme: dose-symmetric + total_flux: float {tilt_series_total_flux} + sources: + - source_glob: + list_glob: '25jul29a/{run_name}_TS.mrc' + match_regex: .*\.mrc$ +tomograms: + - metadata: + affine_transformation_matrix: + - - 1 + - 0 + - 0 + - 0 + - - 0 + - 1 + - 0 + - 0 + - - 0 + - 0 + - 1 + - 0 + - - 0 + - 0 + - 0 + - 1 + authors: *id001 + ctf_corrected: true + dates: *id002 + fiducial_alignment_status: NON_FIDUCIAL + is_visualization_default: true + offset: + x: 0 + y: 0 + z: 0 + processing: filtered + processing_software: Aretomo3 v2.2.2 + reconstruction_method: WBP + reconstruction_software: Aretomo3 v2.2.2 + tomogram_version: 1 + voxel_spacing: 14.985 + sources: + - source_glob: + list_glob: '25jul29a/{run_name}_dctf_Vol.mrc' + - metadata: + affine_transformation_matrix: + - - 1 + - 0 + - 0 + - 0 + - - 0 + - 1 + - 0 + - 0 + - - 0 + - 0 + - 1 + - 0 + - - 0 + - 0 + - 0 + - 1 + authors: *id001 + ctf_corrected: true + dates: *id002 + fiducial_alignment_status: NON_FIDUCIAL + is_visualization_default: true + offset: + x: 0 + y: 0 + z: 0 + processing: denoised + processing_software: denoisET + reconstruction_method: WBP + reconstruction_software: Aretomo3 v2.2.2 + tomogram_version: 1 + voxel_spacing: 14.985 + sources: + - source_glob: + list_glob: '25jul29a/{run_name}_Vol.mrc' +version: 1.1.0 +voxel_spacings: + - sources: + - literal: + value: + - 14.985 diff --git a/ingestion_tools/dataset_configs/template.yaml b/ingestion_tools/dataset_configs/template.yaml index b2d77c7b4..d751a31f4 100644 --- a/ingestion_tools/dataset_configs/template.yaml +++ b/ingestion_tools/dataset_configs/template.yaml @@ -93,6 +93,15 @@ annotations: OPTIONAL delimiter: OPTIONAL, STRING (DEFAULT ',') parent_filters: see InstanceSegmentation.parent_filters exclude: SEE InstanceSegmentation.exclude + - InstanceSegmentationMask: + file_format: see InstanceSegmentation.file_format + glob_string: see InstanceSegmentation.glob_string + glob_strings: see InstanceSegmentation.glob_strings + is_visualization_default: see InstanceSegmentation.is_visualization_default + is_portal_standard: OPTIONAL, BOOLEAN (DEFAULT FALSE) + rescale: OPTIONAL, BOOLEAN (DEFAULT FALSE) + parent_filters: see InstanceSegmentation.parent_filters + exclude: SEE InstanceSegmentation.exclude - SegmentationMask: file_format: see InstanceSegmentation.file_format glob_string: see InstanceSegmentation.glob_string diff --git a/ingestion_tools/poetry.lock b/ingestion_tools/poetry.lock index 35e16f762..f245751fd 100644 --- a/ingestion_tools/poetry.lock +++ b/ingestion_tools/poetry.lock @@ -1807,7 +1807,7 @@ tqdm = ">=4.67,<5.0" [[package]] name = "cryoet-data-portal-neuroglancer" -version = "1.6.1" +version = "1.7.1" description = "Utility package for working with Neuroglancer data in the CZI Cryo-ET Data Portal" optional = false python-versions = "^3.11" @@ -1831,8 +1831,8 @@ zarr = "^2.18.2" [package.source] type = "git" url = "https://github.com/chanzuckerberg/cryoet-data-portal-neuroglancer.git" -reference = "v1.6.1" -resolved_reference = "e4cf29c84b3e8cb395fd1860894b83a6b1230e6f" +reference = "v1.7.1" +resolved_reference = "75b3d8c6427b6924a3e7cb96c40584eb15f36199" [[package]] name = "cycler" @@ -3413,6 +3413,7 @@ files = [ {file = "igneous_pipeline-4.30.0-py3-none-any.whl", hash = "sha256:88bd833613ef185d25e5f59366663a30373d50ede17e7aca90b42757865bfa29"}, {file = "igneous_pipeline-4.30.0.tar.gz", hash = "sha256:1a00440c19838edf4a92aa5315d63f72d076ab70630ded2a9dafc8a558f677bd"}, ] +develop = false [package.dependencies] click = ">=6.7" @@ -8249,4 +8250,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "b2f02857cf5111f7820579f0d89b8359402f6365b8fe0efd55f58d343d69155d" +content-hash = "78fcf6309c1173ed9e06908fe85a26974eb689b6cdecfbf646115fcba174258f" diff --git a/ingestion_tools/pyproject.toml b/ingestion_tools/pyproject.toml index d8af7ed8d..3ef1da367 100644 --- a/ingestion_tools/pyproject.toml +++ b/ingestion_tools/pyproject.toml @@ -53,7 +53,7 @@ imageio = "^2.33.1" pytest = "^8.3.2" boto3-stubs = {extras = ["s3"], version = "^1.34.34"} mypy = "^1.8.0" -cryoet-data-portal-neuroglancer = { git = "https://github.com/chanzuckerberg/cryoet-data-portal-neuroglancer.git", tag = "v1.6.1" } +cryoet-data-portal-neuroglancer = { git = "https://github.com/chanzuckerberg/cryoet-data-portal-neuroglancer.git", tag = "v1.7.1" } distinctipy = "^1.3.4" imodmodel = "0.0.12" mdocfile = "^0.1.2" diff --git a/ingestion_tools/scripts/common/config.py b/ingestion_tools/scripts/common/config.py index a9fd8962d..d46d7bf18 100644 --- a/ingestion_tools/scripts/common/config.py +++ b/ingestion_tools/scripts/common/config.py @@ -4,7 +4,7 @@ import os.path import re from copy import deepcopy -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Final import yaml @@ -15,6 +15,8 @@ else: BaseImporter = "BaseImporter" +STAGING_URL: Final[str] = "https://files.cryoet.staging.si.czi.technology" +PROD_URL: Final[str] = "https://files.cryoetdataportal.cziscience.com" class RunOverride: run_regex: re.Pattern[str] @@ -28,7 +30,7 @@ def __init__(self, run_regex: re.Pattern[str], tiltseries: dict[str, Any] | None class DepositionImportConfig: - https_prefix = os.getenv("DOMAIN_NAME", "https://files.cryoetdataportal.cziscience.com") + https_prefix: str # Core metadata source_prefix: str @@ -60,8 +62,10 @@ def __init__( output_prefix: str, input_bucket: str, object_classes: list[BaseImporter], + https_prefix: str | None = None, ): self.output_prefix = output_prefix + self.https_prefix = https_prefix if https_prefix else PROD_URL self.fs = fs self.run_to_tomo_map = {} self.run_data_map = {} diff --git a/ingestion_tools/scripts/common/ctf_converter.py b/ingestion_tools/scripts/common/ctf_converter.py index 918a2f200..7c4b6517b 100644 --- a/ingestion_tools/scripts/common/ctf_converter.py +++ b/ingestion_tools/scripts/common/ctf_converter.py @@ -12,8 +12,16 @@ class CTFInfo: cross_correlation (float): Cross correlation value. max_resolution (float): Maximum resolution (A). """ + def __init__( - self, section: int, defocus_1: float, defocus_2: float, azimuth: float, phase_shift: float, cross_correlation: float, max_resolution: float, + self, + section: int, + defocus_1: float, + defocus_2: float, + azimuth: float, + phase_shift: float, + cross_correlation: float, + max_resolution: float, ): self.section = section self.defocus_1 = defocus_1 @@ -35,7 +43,6 @@ def get_ctf_info(cls) -> list[CTFInfo]: class AreTomo3CTF(BaseCTFConverter): - def get_ctf_info(self) -> list[CTFInfo]: local_path = self.config.fs.localreadable(self.path) with open(local_path, "r") as f: @@ -57,8 +64,56 @@ def from_str(cls, line: str): max_resolution=float(parts[6]), ) + +class GctfCTF(BaseCTFConverter): + def get_ctf_info(self) -> list[CTFInfo]: + local_path = self.config.fs.localreadable(self.path) + infos: list[CTFInfo] = [] + with open(local_path, "r") as f: + for raw in f: + line = raw.strip() + if not line or line.startswith(("#", ";", "%")): + continue + infos.append(self.from_str(line)) + return infos + + @classmethod + def from_str(cls, line: str) -> CTFInfo: + parts = line.split() + if len(parts) not in (6, 7): + raise ValueError(f"Gctf summary row must have 6 or 7 columns (got {len(parts)}): {line}") + + section = int(round(float(parts[0]))) + defocus_1 = float(parts[1]) + defocus_2 = float(parts[2]) + azimuth = float(parts[3]) + + if len(parts) == 7: + # 7-col: idx, defU, defV, angle, phaseShift, CC, resolution + phase_shift = float(parts[4]) + cross_correlation = float(parts[5]) + max_resolution = float(parts[6]) + else: + # 6-col: idx, defU, defV, angle, CC, resolution (no phase shift provided) + phase_shift = 0.0 + cross_correlation = float(parts[4]) + max_resolution = float(parts[5]) + + return CTFInfo( + section=section, + defocus_1=defocus_1, + defocus_2=defocus_2, + azimuth=azimuth, + phase_shift=phase_shift, + cross_correlation=cross_correlation, + max_resolution=max_resolution, + ) + + def ctf_converter_factory(metadata: dict, config: DepositionImportConfig, path: str) -> BaseCTFConverter: - ctf_format = metadata.get("format") + ctf_format = (metadata.get("format") or "").upper() + if ctf_format == "GCTF": + return GctfCTF(config, path) if ctf_format == "CTFFIND": return AreTomo3CTF(config, path) return BaseCTFConverter(config, path) diff --git a/ingestion_tools/scripts/common/image.py b/ingestion_tools/scripts/common/image.py index 656392c62..ae3f46305 100644 --- a/ingestion_tools/scripts/common/image.py +++ b/ingestion_tools/scripts/common/image.py @@ -15,7 +15,8 @@ from mrcfile.mrcfile import MrcFile from ome_zarr.io import ZarrLocation from ome_zarr.reader import Reader as Reader -from skimage.transform import downscale_local_mean, resize_local_mean +from skimage.measure import block_reduce +from skimage.transform import resize_local_mean from common.config import DepositionImportConfig from common.fs import FileSystemApi, S3Filesystem @@ -53,12 +54,24 @@ class ZarrReader: def __init__(self, fs, zarrdir): self.fs = fs self.zarrdir = zarrdir + self._loc = None def get_data(self): - loc = ome_zarr.io.ZarrLocation(self.fs.destformat(self.zarrdir)) + loc = self._load_zarr_loc() data = loc.load("0") return data + @property + def attrs(self): + loc = self._load_zarr_loc() + group = zarr.group(loc.store) + return group.attrs + + def _load_zarr_loc(self): + if self._loc is None: + self._loc = ome_zarr.io.ZarrLocation(self.fs.destformat(self.zarrdir)) + return self._loc + class ZarrWriter: def __init__(self, fs: FileSystemApi, zarrdir: str): @@ -97,6 +110,7 @@ def write_data( voxel_spacing: List[Tuple[float, float, float]], chunk_size: Tuple[int, int, int] = (256, 256, 256), scale_z_axis: bool = True, + store_labels_metadata: bool = False, ): pyramid = [] scales = [] @@ -110,14 +124,59 @@ def write_data( pyramid.append(d) scales.append(self.ome_zarr_transforms(vs)) - # Write the pyramid to the zarr store - return ome_zarr.writer.write_multiscale( - pyramid, - group=self.root_group, - axes=self.ome_zarr_axes(), - coordinate_transformations=scales, - storage_options=dict(chunks=chunk_size, overwrite=True), - compute=True, + # Store the labels contained in the data if the flag is activated + metadata = {} + if store_labels_metadata: + arr = data[0] + labels = [int(label) for label in np.unique(arr) if label > 0] + label_values = [{"label-value": label} for label in labels] + metadata["image-label"] = {"version": "0.4", "colors": label_values} + + # TODO: Currently not being used because of memory spikes for large volumes (100GB+ memory spikes for 10GB+ data) + # Fixed in latest version of ome_zarr (0.12.2), but can't upgrade because Zarr V3 is required and we don't support Zarr V3 yet. + # # Write the pyramid to the zarr store + # return ome_zarr.writer.write_multiscale( + # pyramid, + # group=self.root_group, + # axes=self.ome_zarr_axes(), + # coordinate_transformations=scales, + # storage_options=dict(chunks=chunk_size, overwrite=True), + # compute=True, + # metadata=metadata, + # ) + # TODO: Remove this temporary workaround + datasets_meta = [] + + for i, (arr, vs) in enumerate(zip(data, voxel_spacing)): + path = str(i) + zds = self.root_group.create_dataset( + path, + shape=arr.shape, + dtype="float32", + chunks=chunk_size, + overwrite=True, + ) + + z, y, x = map(int, arr.shape) + zc, yc, xc = map(int, chunk_size) + + for z0 in range(0, z, zc): + z1 = min(z0 + zc, z) + for y0 in range(0, y, yc): + y1 = min(y0 + yc, y) + for x0 in range(0, x, xc): + x1 = min(x0 + xc, x) + zds[z0:z1, y0:y1, x0:x1] = np.asarray(arr[z0:z1, y0:y1, x0:x1], dtype=np.float32, order="C") + + datasets_meta.append( + { + "path": path, + "coordinateTransformations": self.ome_zarr_transforms(vs), + }, + ) + + ome_zarr.writer.write_multiscales_metadata( + group=self.root_group, axes=self.ome_zarr_axes(), datasets=datasets_meta, name="/", metadata=metadata, ) @@ -279,12 +338,15 @@ def get_contrast_limits(self, method: Literal["gmm", "cdf"] = "gmm"): return compute_contrast_limits(self.volume_reader.data, method=method) + def get_downscale_interpolation_func(self): + return np.mean + # Make an array of an original size image, plus `max_layers` half-scaled images def make_pyramid( self, max_layers: int = 2, scale_z_axis: bool = True, - voxel_spacing: float = None, + voxel_spacing: float | None = None, ) -> Tuple[List[np.ndarray], List[Tuple[float, float, float]]]: # Voxel size for unbinned if not voxel_spacing: @@ -297,8 +359,11 @@ def make_pyramid( pyramid_voxel_spacing = [(voxel_spacing, voxel_spacing, voxel_spacing)] z_scale = 2 if scale_z_axis else 1 # Then make a pyramid of 100/50/25 percent scale volumes + downscale_method = self.get_downscale_interpolation_func() for i in range(max_layers): - downscaled_data = self.scaled_data_transformation(downscale_local_mean(pyramid[i], (z_scale, 2, 2))) + downscaled_data = self.scaled_data_transformation( + block_reduce(pyramid[i], block_size=(z_scale, 2, 2), func=downscale_method), + ) pyramid.append(downscaled_data) pyramid_voxel_spacing.append( ( @@ -344,12 +409,19 @@ def pyramid_to_omezarr( zarrdir: str, write: bool = True, pyramid_voxel_spacing: List[Tuple[float, float, float]] = None, + chunk_size: Tuple[int, int, int] = (256, 256, 256), + store_labels_metadata: bool = False, ) -> str: destination_zarrdir = fs.destformat(zarrdir) # Write zarr data as 256^3 voxel chunks if write: writer = ZarrWriter(fs, destination_zarrdir) - writer.write_data(pyramid, voxel_spacing=pyramid_voxel_spacing, chunk_size=(256, 256, 256)) + writer.write_data( + pyramid, + voxel_spacing=pyramid_voxel_spacing, + chunk_size=chunk_size, + store_labels_metadata=store_labels_metadata, + ) else: print(f"skipping remote push for {destination_zarrdir}") return os.path.basename(zarrdir) @@ -446,6 +518,45 @@ def has_label(self) -> bool: return bool(np.any(self.volume_reader.get_pyramid_base_data() == self.label)) +class MultiLabelMaskConverter(TomoConverter): + def __init__( + self, + fs: FileSystemApi, + filename: str, + header_only: bool = False, + scale_0_dims: tuple[int, int, int] | None = None, + ): + super().__init__(fs=fs, filename=filename, header_only=header_only, scale_0_dims=scale_0_dims) + + def get_pyramid_base_data(self) -> np.ndarray: + data = self.volume_reader.get_pyramid_base_data() + + if not self.scale_0_dims: + return self.scaled_data_transformation(data) + + from scipy.ndimage import zoom + + x, y, z = data.shape + nx, ny, nz = self.scale_0_dims + zoom_factor = (nx / x, ny / y, nz / z) + if zoom_factor == (1.0, 1.0, 1.0): + return self.scaled_data_transformation(data) + + rescaled = zoom(data, zoom=zoom_factor, order=0) + + return self.scaled_data_transformation(rescaled) + + @classmethod + def scaled_data_transformation(cls, data: np.ndarray) -> np.ndarray: + # For instance segmentation masks we have multiple labels, so we want an uint 16 output. + # We used uint16 and not uint32 as it seems MRC format doesn't handle well int > 16. + # zoom will return float array even for bool input with non-binary values + return data.astype(np.uint16) + + def get_downscale_interpolation_func(self): + return np.max + + def get_volume_metadata(config: DepositionImportConfig, output_prefix: str) -> dict[str, Any]: # Generates metadata related to volume files. scales = [] @@ -494,7 +605,10 @@ def get_converter( label: int | None = None, scale_0_dims: tuple[int, int, int] | None = None, threshold: float | None = None, + multilabels: bool = False, ) -> TomoConverter | MaskConverter: + if multilabels: + return MultiLabelMaskConverter(fs, tomo_filename, scale_0_dims=scale_0_dims) if label is not None: return MaskConverter(fs, tomo_filename, label, scale_0_dims=scale_0_dims, threshold=threshold) return TomoConverter(fs, tomo_filename, scale_0_dims=scale_0_dims) @@ -508,12 +622,14 @@ def make_pyramids( write_mrc: bool = True, write_zarr: bool = True, header_mapper: Callable[[np.array], None] = None, - voxel_spacing=None, + voxel_spacing: float | None = None, label: int = None, - scale_0_dims=None, + scale_0_dims = None, threshold: float | None = None, + chunk_size: Tuple[int, int, int] = (256, 256, 256), + multilabels: bool = False, ): - tc = get_converter(fs, tomo_filename, label, scale_0_dims, threshold) + tc = get_converter(fs, tomo_filename, label, scale_0_dims, threshold, multilabels=multilabels) pyramid, pyramid_voxel_spacing = tc.make_pyramid(scale_z_axis=scale_z_axis, voxel_spacing=voxel_spacing) _ = tc.pyramid_to_omezarr( fs, @@ -521,6 +637,8 @@ def make_pyramids( f"{output_prefix}.zarr", write_zarr, pyramid_voxel_spacing=pyramid_voxel_spacing, + chunk_size=chunk_size, + store_labels_metadata=multilabels, ) _ = tc.pyramid_to_mrc(fs, pyramid, f"{output_prefix}.mrc", write_mrc, header_mapper, voxel_spacing) diff --git a/ingestion_tools/scripts/data_validation/shared/helper/mdoc_helper.py b/ingestion_tools/scripts/data_validation/shared/helper/mdoc_helper.py index 956dc9242..570327807 100644 --- a/ingestion_tools/scripts/data_validation/shared/helper/mdoc_helper.py +++ b/ingestion_tools/scripts/data_validation/shared/helper/mdoc_helper.py @@ -9,7 +9,6 @@ class MdocTestHelper: - @pytest.fixture def mdoc_sub_frame_path(self, mdoc_data: pd.DataFrame) -> list[str]: return [ @@ -24,14 +23,13 @@ def test_frames_mdoc_range(self, mdoc_data: pd.DataFrame): assert mdoc_data["TiltAngle"].min() >= -90, "Minimum tilt angle is less than -90" assert mdoc_data["TiltAngle"].max() <= 90, "Maximum tilt angle is greater than 90" - @allure.title("Mdoc: number of mdoc sections, equal number of frames files, equals number of items in frames metadata.") - def test_mdoc_frames(self, mdoc_data: pd.DataFrame, frames_files: list[str], frame_metadata: dict[str, dict]): + @allure.title("Mdoc: number of mdoc sections equal number of frames files") + def test_mdoc_frames(self, mdoc_data: pd.DataFrame, frames_files: list[str]): frames_len = len(frames_files) if frames_len == 0: pytest.skip("No frame files to compare") - frames_metadata_len = len(frame_metadata["frames"]) mdoc_len = len(mdoc_data) - assert mdoc_len == frames_len == frames_metadata_len, f"Number of mdoc sections {mdoc_len} mismatches number of frames: {frames_len} or frames metadata: {frames_metadata_len}" + assert mdoc_len == frames_len, f"Number of mdoc sections {mdoc_len} mismatches number of frames: {frames_len}" @allure.title("Mdoc: Every mdoc filename has an entry for SubFramePath.") def test_mdoc_sub_frame_paths(self, mdoc_data: pd.DataFrame): diff --git a/ingestion_tools/scripts/data_validation/shared/helper/tiltseries_helper.py b/ingestion_tools/scripts/data_validation/shared/helper/tiltseries_helper.py index 1a9602960..747a42f12 100644 --- a/ingestion_tools/scripts/data_validation/shared/helper/tiltseries_helper.py +++ b/ingestion_tools/scripts/data_validation/shared/helper/tiltseries_helper.py @@ -9,19 +9,15 @@ TILT_AXIS_ANGLE_REGEX = re.compile(r".*tilt\s*axis\s*angle\s*=\s*([-+]?(?:\d*\.*\d+))") - - -@pytest.fixture -def mdoc_tilt_axis_angle(mdoc_data: pd.DataFrame) -> float: - # To convert the data from the mdoc into a data frame, all the global records are added to each section's data - titles = mdoc_data["titles"][0] - for title in titles: - if result := re.match(TILT_AXIS_ANGLE_REGEX, title.lower()): - return float(result[1]) - pytest.fail("No Tilt axis angle found") - - class TiltSeriesHelper(HelperTestMRCZarrHeader): + @pytest.fixture + def mdoc_tilt_axis_angle(self, mdoc_data: pd.DataFrame) -> float: + # To convert the data from the mdoc into a data frame, all the global records are added to each section's data + titles = mdoc_data["titles"][0] + for title in titles: + if result := re.match(TILT_AXIS_ANGLE_REGEX, title.lower()): + return float(result[1]) + pytest.fail("No Tilt axis angle found") @pytest.fixture(autouse=True) def set_space_group(self): @@ -39,6 +35,6 @@ def tiltseries_metadata_range(self, tiltseries_metadata: dict) -> list[float]: @allure.title("Tiltseries: tilt axis angle in mdoc file matches that in tilt series metadata (+/- 10 deg).") def test_tilt_axis_angle(self, mdoc_tilt_axis_angle: float, tiltseries_metadata: dict[str, Any]): metadata_tilt_axis = tiltseries_metadata["tilt_axis"] - assert (abs( - metadata_tilt_axis - mdoc_tilt_axis_angle) <= 10 - ), f"Tilt axis angle mismatch: MDOC: {mdoc_tilt_axis_angle} vs Metadata: {metadata_tilt_axis}" + assert abs(metadata_tilt_axis - mdoc_tilt_axis_angle) <= 10, ( + f"Tilt axis angle mismatch: MDOC: {mdoc_tilt_axis_angle} vs Metadata: {metadata_tilt_axis}" + ) diff --git a/ingestion_tools/scripts/data_validation/source/README.md b/ingestion_tools/scripts/data_validation/source/README.md index 038db5ca7..eab9f2853 100644 --- a/ingestion_tools/scripts/data_validation/source/README.md +++ b/ingestion_tools/scripts/data_validation/source/README.md @@ -11,11 +11,11 @@ If new `helper_*` files are added, make sure to update the `__init__.py` file an To run (from this directory): ``` -pytest --input-bucket [BUCKET_NAME] --ingestion-config [CONFIG_FILE] --run-filter-name [regex value] --frame-filter-name [regex-value] +pytest --input-bucket [BUCKET_NAME] --ingestion-config [CONFIG_FILE] --filter-run-name [regex value] input-bucket: The S3 bucket where the data is stored. No default value available. ingestion-config: The path to the ingestion config file -run-filter-name: Similar to the ingestion, the tests can be filtered by any entities name. +filter-run-name: Similar to the ingestion, the tests can be filtered by any entities name. ``` Custom Marks: diff --git a/ingestion_tools/scripts/data_validation/standardized/fixtures/path.py b/ingestion_tools/scripts/data_validation/standardized/fixtures/path.py index 9660ae9ea..09ed4e895 100644 --- a/ingestion_tools/scripts/data_validation/standardized/fixtures/path.py +++ b/ingestion_tools/scripts/data_validation/standardized/fixtures/path.py @@ -414,6 +414,7 @@ def get_annotation_files_to_metadata_files( """[Dataset]/[ExperimentRun]/Reconstructions/VoxelSpacing[voxel_spacing]/Annotations/[annotation_name].* Helper function for retrieving annotation files and their corresponding metadata files. Fails the test if the annotation file is not found for a given metadata file OR if there are any remaining annotation files. + Skips the test if no annotation files are found. Returns a dictionary of annotation files, annotation_filename -> metadata_filename. """ diff --git a/ingestion_tools/scripts/data_validation/standardized/tests/annotation/test_segmentationmask_annotation.py b/ingestion_tools/scripts/data_validation/standardized/tests/annotation/test_segmentationmask_annotation.py index 98d9440f7..b7d769363 100644 --- a/ingestion_tools/scripts/data_validation/standardized/tests/annotation/test_segmentationmask_annotation.py +++ b/ingestion_tools/scripts/data_validation/standardized/tests/annotation/test_segmentationmask_annotation.py @@ -1,4 +1,4 @@ -from typing import Dict, List +from typing import Dict import allure import numpy as np @@ -18,7 +18,10 @@ def set_helper_test_mrc_zarr_header_class_variables( seg_mask_annotation_mrc_headers: Dict[str, MrcInterpreter], seg_mask_annotation_zarr_headers: Dict[str, Dict[str, Dict]], voxel_spacing: str, + seg_mask_annotation_files_to_metadata: Dict, ): + # unused for now, kept since this fixture determines whether these tests should be skipped or ran + del seg_mask_annotation_files_to_metadata self.spacegroup = 1 # single 3D volume self.mrc_headers = seg_mask_annotation_mrc_headers self.zarr_headers = seg_mask_annotation_zarr_headers @@ -27,11 +30,7 @@ def set_helper_test_mrc_zarr_header_class_variables( ### BEGIN Tomogram-consistency tests ### @allure.title("Segmentation mask: volumes are contained within the tomogram dimensions.") - def test_contained_in_tomo(self, - seg_mask_annotation_mrc_files: List, - seg_mask_annotation_files_to_metadata: Dict, - all_vs_tomogram_metadata: Dict): - + def test_contained_in_tomo(self, seg_mask_annotation_files_to_metadata: Dict, all_vs_tomogram_metadata: Dict): tomo_metadata = {} for filename, metadata in seg_mask_annotation_files_to_metadata.items(): for tomo_data in all_vs_tomogram_metadata: diff --git a/ingestion_tools/scripts/data_validation/standardized/tests/test_alignments.py b/ingestion_tools/scripts/data_validation/standardized/tests/test_alignments.py index 9e1c0fb10..38a2e6c6c 100644 --- a/ingestion_tools/scripts/data_validation/standardized/tests/test_alignments.py +++ b/ingestion_tools/scripts/data_validation/standardized/tests/test_alignments.py @@ -1,3 +1,4 @@ +import re from typing import Dict, List import allure @@ -5,6 +6,7 @@ import pandas as pd import pytest from data_validation.shared.helper.angles_helper import helper_angles_injection_errors +from data_validation.shared.helper.tiltseries_helper import TILT_AXIS_ANGLE_REGEX def matrix_to_angle(matrix: list[list[float]]) -> float: @@ -58,6 +60,15 @@ def alignment_tiltseries_metadata_range(self, alignment_tiltseries_metadata: Dic alignment_tiltseries_metadata["tilt_step"], ).tolist() + @pytest.fixture + def mdoc_tilt_axis_angle(self, mdoc_data: pd.DataFrame) -> float: + # To convert the data from the mdoc into a data frame, all the global records are added to each section's data + titles = mdoc_data["titles"][0] + for title in titles: + if result := re.match(TILT_AXIS_ANGLE_REGEX, title.lower()): + return float(result[1]) + pytest.fail("No Tilt axis angle found") + ### BEGIN Tilt .tlt tests ### @allure.title("Alignment: angles exist.") def test_tilt_count(self, alignment_tilt: pd.DataFrame): @@ -109,23 +120,15 @@ def test_tilt_tiltseries_range( "tilt file", "tiltseries metadata tilt_range", ) - assert len(errors) == 0, ( - "\n".join(errors) - + f"\nRange: {alignment_tiltseries_metadata['tilt_range']['min']} to {alignment_tiltseries_metadata['tilt_range']['max']}, with step {alignment_tiltseries_metadata['tilt_step']}" - ) + assert len(errors) == 0, "\n".join(errors) + f"\nRange: {alignment_tiltseries_metadata['tilt_range']['min']} to {alignment_tiltseries_metadata['tilt_range']['max']}, with step {alignment_tiltseries_metadata['tilt_step']}" @allure.title("Alignment: tilt angle in mdoc file matches that in the alignment metadata [per_section_alignment_parameters.in_plane_rotation] (+/- 10 deg)") def test_mdoc_tilt_axis_angle_in_alignment_per_section_alignment_parameters(self, mdoc_tilt_axis_angle: float, alignment_metadata: dict[str, dict]): per_section_alignment_parameters = alignment_metadata.get("per_section_alignment_parameters") if not per_section_alignment_parameters: pytest.skip("Alignment metadata missing per_section_alignment_parameters.") - # convert all in_plane_rotation angles to degrees and sort them in ascending order + # convert all in_plane_rotation matrices to angles and check against mdoc_tilt_axis_angle in_plane_rotations = [matrix_to_angle(psap["in_plane_rotation"]) for psap in per_section_alignment_parameters] - # check that all in_plane_rotation angles are equal - assert len(set(in_plane_rotations)) == 1, "in_plane_rotation angles are not all equal." - # check that in_plane_roation against mdoc_tilt_axis_angle - in_plane_rotation = in_plane_rotations[0] - assert in_plane_rotation == pytest.approx(mdoc_tilt_axis_angle, rel=10), f"Mdoc tilt axis angle {mdoc_tilt_axis_angle} does not match alignment metadata['per_section_alignment_parameters'][*]['in_plane_rotation']: {in_plane_rotation}" - + assert all(in_plane_rotation == pytest.approx(mdoc_tilt_axis_angle, abs=10) for in_plane_rotation in in_plane_rotations), f"Mdoc tilt axis angle {mdoc_tilt_axis_angle} does not match all alignment metadata in_plane_rotation angles within +/- 10 degrees: {in_plane_rotations}" ### END Tiltseries consistency tests ### diff --git a/ingestion_tools/scripts/data_validation/standardized/tests/test_frame_acquisition_file.py b/ingestion_tools/scripts/data_validation/standardized/tests/test_frame_acquisition_file.py index 97636b9af..3ddcd53cd 100644 --- a/ingestion_tools/scripts/data_validation/standardized/tests/test_frame_acquisition_file.py +++ b/ingestion_tools/scripts/data_validation/standardized/tests/test_frame_acquisition_file.py @@ -1,3 +1,5 @@ +import allure +import pandas as pd import pytest from data_validation.shared.helper.mdoc_helper import MdocTestHelper @@ -6,4 +8,11 @@ @pytest.mark.parametrize("dataset, run_name", pytest.cryoet.dataset_run_combinations, scope="session") class TestFrameAcquisitionFile(MdocTestHelper): - pass + @allure.title("Mdoc: number of mdoc sections, equal number of frames files, equals number of items in frames metadata.") + def test_mdoc_frames(self, mdoc_data: pd.DataFrame, frames_files: list[str], frame_metadata: dict[str, dict]): + frames_len = len(frames_files) + if frames_len == 0: + pytest.skip("No frame files to compare") + frames_metadata_len = len(frame_metadata["frames"]) + mdoc_len = len(mdoc_data) + assert mdoc_len == frames_len == frames_metadata_len, f"Number of mdoc sections {mdoc_len} mismatches number of frames: {frames_len} or frames metadata: {frames_metadata_len}" diff --git a/ingestion_tools/scripts/data_validation/standardized/tests/test_tiltseries.py b/ingestion_tools/scripts/data_validation/standardized/tests/test_tiltseries.py index 5e759140e..d282d2394 100644 --- a/ingestion_tools/scripts/data_validation/standardized/tests/test_tiltseries.py +++ b/ingestion_tools/scripts/data_validation/standardized/tests/test_tiltseries.py @@ -189,14 +189,14 @@ def test_max_resolution(self, tiltseries_metadata: dict[str, Any]): if len(errors) > 0: raise AssertionError("\n".join(errors)) - @allure.title("PerSectionParameters: rawAngle matches mdoc TiltAngle (+-10^-3 deg).") + @allure.title("PerSectionParameters: All raw tilt angles match to a mdoc TiltAngle (+-10^-2 deg).") def test_raw_angle(self, tiltseries_metadata: dict[str, Any], mdoc_data: pd.DataFrame): errors = helper_angles_injection_errors( - mdoc_data["TiltAngle"].to_list(), [psp["raw_angle"] for psp in tiltseries_metadata["per_section_parameter"]], - "mdoc file", + mdoc_data["TiltAngle"].to_list(), "tiltseries metadata per_section_parameter raw_angle", - angle_tolerance=10 ** -3, + "mdoc file", + angle_tolerance=10 ** -2, ) if errors: raise AssertionError("\n".join(errors)) diff --git a/ingestion_tools/scripts/enqueue_runs.py b/ingestion_tools/scripts/enqueue_runs.py index 7332d32c8..a6a4fdbbc 100644 --- a/ingestion_tools/scripts/enqueue_runs.py +++ b/ingestion_tools/scripts/enqueue_runs.py @@ -22,7 +22,7 @@ from importers.utils import IMPORTERS from standardize_dirs import common_options as ingest_common_options -from common.config import DepositionImportConfig +from common.config import PROD_URL, DepositionImportConfig from common.fs import FileSystemApi logger = logging.getLogger("db_import") @@ -131,6 +131,13 @@ def run_job( input=json.dumps(sfn_input_json), ) +def get_default_https_prefix(): + """ + For the purposes of ingestion, we are okay with the staging environment containing production URLs, + since the URLs in metadata files are not expected to change between environments. + """ + return PROD_URL + def get_aws_env(environment): # Learn more about our AWS environment @@ -267,9 +274,7 @@ def db_import( if env == "prod": s3_bucket = "cryoet-data-portal-public" if not https_prefix: - https_prefix = "https://files.cryoet.staging.si.czi.technology" - if env == "prod": - https_prefix = "https://files.cryoetdataportal.cziscience.com" + https_prefix = get_default_https_prefix() # Default to using a lot less memory than the ingestion job. if not ctx.obj.get("memory"): @@ -331,7 +336,7 @@ def db_import( @click.argument("config_file", required=True, type=str) @click.argument("input_bucket", required=True, type=str) @click.argument("output_path", required=True, type=str) -@click.option("--import-everything", is_flag=True, default=False) +@click.option("--https-prefix", required=False, type=str, help="protocol + domain for where to fetch files via HTTP") @click.option( "--write-mrc/--no-write-mrc", default=True, @@ -365,7 +370,9 @@ def queue( config_file: str, input_bucket: str, output_path: str, + https_prefix: str, import_everything: bool, + import_all_metadata: bool, write_mrc: bool, write_zarr: bool, force_overwrite: bool, @@ -377,7 +384,9 @@ def queue( fs_mode = "s3" fs = FileSystemApi.get_fs_api(mode=fs_mode, force_overwrite=force_overwrite) - config = DepositionImportConfig(fs, config_file, output_path, input_bucket, IMPORTERS) + if not https_prefix: + https_prefix = get_default_https_prefix() + config = DepositionImportConfig(fs, config_file, output_path, input_bucket, IMPORTERS, https_prefix=https_prefix) config.write_mrc = write_mrc config.write_zarr = write_zarr config.load_map_files() @@ -431,7 +440,9 @@ def queue( break per_run_args[k] = v new_args = to_args( + https_prefix=https_prefix, import_everything=import_everything, + import_all_metadata=import_all_metadata, write_mrc=write_mrc, write_zarr=write_zarr, force_overwrite=force_overwrite, diff --git a/ingestion_tools/scripts/importers/annotation.py b/ingestion_tools/scripts/importers/annotation.py index 5d2feb69d..3ad9faa65 100644 --- a/ingestion_tools/scripts/importers/annotation.py +++ b/ingestion_tools/scripts/importers/annotation.py @@ -107,6 +107,8 @@ def _instantiate( anno = PointAnnotation(**instance_args) if shape == "InstanceSegmentation": anno = InstanceSegmentationAnnotation(**instance_args) + if shape == "InstanceSegmentationMask": + anno = InstanceSegmentationMaskAnnotation(**instance_args) if shape == "TriangularMesh": anno = TriangularMeshAnnotation(**instance_args) if shape == "TriangularMeshGroup": @@ -316,6 +318,37 @@ def convert(self, output_prefix: str): ) +class InstanceSegmentationMaskAnnotation(VolumeAnnotationSource): + shape = "InstanceSegmentationMask" + rescale: bool = False + is_portal_standard: bool + + def __init__( + self, + rescale: bool = False, + is_portal_standard: bool = False, + *args, + **kwargs, + ) -> None: + super().__init__(*args, **kwargs) + self.rescale = rescale + self.is_portal_standard = is_portal_standard + + def convert(self, output_prefix: str): + output_dims = self.get_output_dim() if self.rescale else None + + return make_pyramids( + self.config.fs, + self.get_output_filename(output_prefix), + self.path, + write_mrc=self.config.write_mrc, + write_zarr=self.config.write_zarr, + voxel_spacing=self.get_voxel_spacing().as_float(), + scale_0_dims=output_dims, + multilabels=True, + ) + + class SemanticSegmentationMaskAnnotation(VolumeAnnotationSource): shape = "SegmentationMask" # Don't expose SemanticSegmentationMask to the public portal. mask_label: int diff --git a/ingestion_tools/scripts/importers/base_importer.py b/ingestion_tools/scripts/importers/base_importer.py index 728f4ab11..edfa147c7 100644 --- a/ingestion_tools/scripts/importers/base_importer.py +++ b/ingestion_tools/scripts/importers/base_importer.py @@ -267,6 +267,7 @@ def scale_mrcfile( write_zarr: bool = True, write_mrc: bool = True, voxel_spacing: float | None = None, + chunk_size: tuple[int, int, int] = (256, 256, 256), ) -> dict[str, Any]: """ Scales the MRC file into a pyramid volume and optionally writes it to MRC and Zarr formats. @@ -277,6 +278,7 @@ def scale_mrcfile( write_zarr (bool): Whether to write the zarr file. write_mrc (bool): Whether to write the mrc file. voxel_spacing (float): The voxel spacing of the volume. + chunk_size (tuple[int, int, int]): The chunk size of the output volume. """ output_prefix = self.get_output_path() return make_pyramids( @@ -288,6 +290,7 @@ def scale_mrcfile( write_zarr=write_zarr, header_mapper=self.mrc_header_mapper, voxel_spacing=voxel_spacing, + chunk_size=chunk_size, ) def get_output_path(self) -> str: diff --git a/ingestion_tools/scripts/importers/gain.py b/ingestion_tools/scripts/importers/gain.py index 0e0847caa..95b3cd565 100644 --- a/ingestion_tools/scripts/importers/gain.py +++ b/ingestion_tools/scripts/importers/gain.py @@ -24,7 +24,8 @@ def import_item(self) -> None: dest_file_name = os.path.splitext(source_file_name)[0] + ".mrc" local_input = fs.localreadable(item) local_output = fs.localwritable(os.path.join(output_dir, dest_file_name)) - subprocess.check_output(["/usr/local/IMOD/bin/dm2mrc", local_input, local_output]) + d2mrc_path = "/usr/local/IMOD/bin/dm2mrc" if os.path.exists("/usr/local/IMOD/bin/dm2mrc") else "dm2mrc" + subprocess.check_output([d2mrc_path, local_input, local_output]) fs.push(local_output) else: dest_file_path = os.path.join(output_dir, source_file_name) diff --git a/ingestion_tools/scripts/importers/tiltseries.py b/ingestion_tools/scripts/importers/tiltseries.py index c3b2c221a..509c921a3 100644 --- a/ingestion_tools/scripts/importers/tiltseries.py +++ b/ingestion_tools/scripts/importers/tiltseries.py @@ -80,6 +80,8 @@ def import_item(self) -> None: write_mrc=self.config.write_mrc, write_zarr=self.config.write_zarr, voxel_spacing=self.get_pixel_spacing(), + # since tiltseries is 2D stack of images, set z slice chunk size to 1 + chunk_size=(1, 1024, 1024), ) def get_frames_count(self) -> int: diff --git a/ingestion_tools/scripts/importers/visualization_config.py b/ingestion_tools/scripts/importers/visualization_config.py index 767fe0a89..a82ee7823 100644 --- a/ingestion_tools/scripts/importers/visualization_config.py +++ b/ingestion_tools/scripts/importers/visualization_config.py @@ -9,7 +9,7 @@ from common import colors from common.colors import generate_hash, to_base_hash_input from common.finders import DefaultImporterFactory -from common.image import VolumeInfo +from common.image import VolumeInfo, ZarrReader from common.metadata import NeuroglancerMetadata from importers.annotation import OrientedPointAnnotation from importers.base_importer import BaseImporter @@ -83,17 +83,19 @@ def _to_segmentation_mask_layer( color: str, resolution: tuple[float, float, float], output_resolution: tuple[float, float, float] | None = None, + visible_segments: tuple[int, ...] = (1,), **kwargs, ) -> dict[str, Any]: output_resolution = output_resolution or resolution return state_generator.generate_segmentation_mask_layer( source=source_path, - name=f"{name_prefix} segmentation", + name=f"{name_prefix} {'segmentation' if len(visible_segments) == 1 else 'instancesegmentation' }", url=self.config.https_prefix, color=color, scale=resolution, output_scale=output_resolution, is_visible=file_metadata.get("is_visualization_default"), + visible_segments=visible_segments, ) def _to_point_layer( @@ -207,6 +209,7 @@ def get_annotation_layer_info(self, alignment_metadata_path: str) -> dict[str, A "InstanceSegmentation", "TriangularMesh", "TriangularMeshGroup", + "InstanceSegmentationMask", }: print(f"Skipping file with unknown shape {shape}") continue @@ -223,8 +226,15 @@ def get_annotation_layer_info(self, alignment_metadata_path: str) -> dict[str, A print(f"Skipping file with unsupported format {file.get('format')}") continue + nb_colors = 1 + visible_segments = None + if shape == "InstanceSegmentationMask": + # We load the ome zarr file and get the unique non zero labels and then set of those as visible + visible_segments = self._get_labels(file.get("path")) + nb_colors = len(visible_segments) + color_seed = generate_hash({**annotation_hash_input, **{"shape": shape}}) - hex_colors, float_colors = colors.get_hex_colors(1, exclude=colors_used, seed=color_seed) + hex_colors, float_colors = colors.get_hex_colors(nb_colors, exclude=colors_used, seed=color_seed) result = self._find_annotation_metadata(output_annotation_path, shape) if result is None: @@ -233,21 +243,23 @@ def get_annotation_layer_info(self, alignment_metadata_path: str) -> dict[str, A file_path, voxel_spacing, ratio = result path = self.config.to_formatted_path(file_path) - is_instance_seg = shape == "InstanceSegmentation" + is_instance_seg = shape == "InstanceSegmentation" or shape == "InstanceSegmentationMask" - annotation_layer_info[file.get("path")] = { + args = { + "source_path": path, + "file_metadata": file, + "name_prefix": name_prefix, + "color": hex_colors[0], "shape": shape, - "voxel_spacing_ratio": ratio, - "args": { - "source_path": path, - "file_metadata": file, - "name_prefix": name_prefix, - "color": hex_colors[0], - "shape": shape, - "resolution": (voxel_spacing * 1e-10,) * 3, - }, + "resolution": (voxel_spacing * 1e-10,) * 3, } + if shape == "InstanceSegmentationMask": + args["visible_segments"] = visible_segments + args["color"] = dict(zip(visible_segments, hex_colors)) + + annotation_layer_info[file.get("path")] = {"shape": shape, "voxel_spacing_ratio": ratio, "args": args} + if not is_instance_seg: colors_used.append(float_colors[0]) @@ -280,6 +292,23 @@ def _has_oriented_mesh(self, path: str): mesh_folder_path = os.path.join(self.config.output_prefix, oriented_mesh_filename) return fs.exists(mesh_folder_path) + def _get_labels(self, path: str): + segmentation_filename = os.path.join(self.config.output_prefix, path) + + reader = ZarrReader(self.config.fs, segmentation_filename) + try: + labels_info = reader.attrs["multiscales"][0]["metadata"]["image-label"]["colors"] + labels = [label["label-value"] for label in labels_info] + except Exception: + # Get labels iterating by chunks over the tab + # We lazy import dask and numpy + import dask.array as da + + arr = reader.get_data() + labels = da.unique(arr).compute() + labels = labels[labels > 0].astype(int) + return tuple(labels) + def _create_config(self, alignment_metadata_path: str) -> dict[str, Any]: tomogram = self.get_tomogram() volume_info = tomogram.get_output_volume_info() @@ -300,7 +329,7 @@ def _create_config(self, alignment_metadata_path: str) -> dict[str, Any]: for _, info in annotation_layer_info.items(): shape = info["shape"] args = {**info["args"], "output_resolution": resolution} - if shape == "SegmentationMask": + if shape == "SegmentationMask" or shape == "InstanceSegmentationMask": layers.append(self._to_segmentation_mask_layer(**args)) elif shape in {"Point", "OrientedPoint", "InstanceSegmentation"}: if shape == "OrientedPoint": diff --git a/ingestion_tools/scripts/importers/visualization_precompute.py b/ingestion_tools/scripts/importers/visualization_precompute.py index 9816cae64..08bdcd9ea 100644 --- a/ingestion_tools/scripts/importers/visualization_precompute.py +++ b/ingestion_tools/scripts/importers/visualization_precompute.py @@ -10,6 +10,7 @@ AbstractTriangularMeshAnnotation, BaseAnnotationSource, InstanceSegmentationAnnotation, + InstanceSegmentationMaskAnnotation, OrientedPointAnnotation, VolumeAnnotationSource, ) @@ -75,8 +76,10 @@ def load(cls, annotation: BaseAnnotationSource, config: DepositionImportConfig) return OrientedPointAnnotationPrecompute(**params) elif shape == "InstanceSegmentation": return InstanceSegmentationAnnotationPrecompute(**params) - elif shape == "SegmentationMask" or shape == "SemanticSegmentationMask": + elif shape in ["SegmentationMask", "SemanticSegmentationMask"]: return SegmentationMaskAnnotationPrecompute(**params) + elif shape == "InstanceSegmentationMask": + return InstanceSegmentationMaskAnnotationPrecompute(**params) elif shape == "TriangularMesh": return MeshAnnotatationPrecompute(**params) @@ -156,6 +159,7 @@ def neuroglancer_precompute(self, output_prefix: str, voxel_spacing: float) -> N max_lod=2, max_faces_for_first_lod=10e6, decimation_aggressiveness=5.5, + data_scale_in_nm=voxel_spacing * 0.1, # Convert from angstrom ) # Dump the precomputed version on the output folder @@ -217,6 +221,33 @@ def neuroglancer_precompute(self, output_prefix: str, voxel_spacing: float) -> N fs.push(tmp_path) +class InstanceSegmentationMaskAnnotationPrecompute(BaseAnnotationPrecompute): + annotation: InstanceSegmentationMaskAnnotation + + def _get_shape(self) -> str: + return "InstanceSegmentationMask" + + def neuroglancer_precompute(self, output_prefix: str, voxel_spacing: float) -> None: + fs = self.config.fs + annotation_path = self.annotation.get_output_path() + precompute_path = self._get_neuroglancer_precompute_path(annotation_path, output_prefix) + tmp_path = fs.localwritable(precompute_path) + zarr_file_path = fs.destformat(self.annotation.get_output_filename(annotation_path, "zarr")) + # Importing this at runtime instead of compile time since zfpy (a dependency of this + # module) cannot be imported successfully on darwin/ARM machines. + from cryoet_data_portal_neuroglancer.precompute import segmentation_mask + + resolution_in_nm = voxel_spacing * 0.1 # original in angstrom + segmentation_mask.encode_segmentation( + zarr_file_path, + Path(tmp_path), + resolution=(resolution_in_nm,) * 3, + delete_existing=True, + include_mesh=True, + ) + fs.push(tmp_path) + + class MeshAnnotatationPrecompute(BaseAnnotationPrecompute): annotation: AbstractTriangularMeshAnnotation diff --git a/ingestion_tools/scripts/standardize_dirs.py b/ingestion_tools/scripts/standardize_dirs.py index 5deb71801..d23b0a5f0 100644 --- a/ingestion_tools/scripts/standardize_dirs.py +++ b/ingestion_tools/scripts/standardize_dirs.py @@ -16,6 +16,8 @@ def cli(ctx): def common_options(func): options = [] + options.append(click.option("--import-everything", is_flag=True, default=False)) + options.append(click.option("--import-all-metadata", is_flag=True, default=False)) for cls in IMPORTERS: plural_key = cls.plural_key.replace("_", "-") importer_key = cls.type_key.replace("_", "-") @@ -80,7 +82,7 @@ def do_import(config, tree, to_import, metadata_import, to_iterate, kwargs, pare @click.argument("config_file", required=True, type=str) @click.argument("input_bucket", required=True, type=str) @click.argument("output_path", required=True, type=str) -@click.option("--import-everything", is_flag=True, default=False) +@click.option("--https-prefix", required=False, type=str, help="protocol + domain for where to fetch files via HTTP") @click.option("--write-mrc/--no-write-mrc", default=True) @click.option("--write-zarr/--no-write-zarr", default=True) @click.option("--force-overwrite", is_flag=True, default=False) @@ -92,7 +94,9 @@ def convert( config_file: str, input_bucket: str, output_path: str, + https_prefix: str, import_everything: bool, + import_all_metadata: bool, write_mrc: bool, write_zarr: bool, force_overwrite: bool, @@ -105,7 +109,7 @@ def convert( fs = FileSystemApi.get_fs_api(mode=fs_mode, force_overwrite=force_overwrite) - config = DepositionImportConfig(fs, config_file, output_path, input_bucket, IMPORTERS) + config = DepositionImportConfig(fs, config_file, output_path, input_bucket, IMPORTERS, https_prefix=https_prefix) config.write_mrc = write_mrc config.write_zarr = write_zarr config.load_map_files() @@ -115,6 +119,10 @@ def convert( to_import = set(IMPORTERS) metadata_import = set(IMPORTERS) to_iterate = set(IMPORTERS) + elif import_all_metadata: + to_import = set() + metadata_import = set(IMPORTERS) + to_iterate = set(IMPORTERS) else: to_import = {k for k in IMPORTERS if kwargs.get(f"import_{k.plural_key}")} metadata_import = {k for k in IMPORTERS if kwargs.get(f"import_{k.type_key}_metadata")} diff --git a/ingestion_tools/scripts/tests/s3_import/test_annotations.py b/ingestion_tools/scripts/tests/s3_import/test_annotations.py index da19624d5..ea2c3da60 100644 --- a/ingestion_tools/scripts/tests/s3_import/test_annotations.py +++ b/ingestion_tools/scripts/tests/s3_import/test_annotations.py @@ -9,6 +9,7 @@ import trimesh from importers.annotation import ( InstanceSegmentationAnnotation, + InstanceSegmentationMaskAnnotation, OrientedPointAnnotation, PointAnnotation, SegmentationMaskAnnotation, @@ -1637,3 +1638,142 @@ def test_ingest_triangular_mesh_hff( expected_hash = trimesh.comparison.identifier_hash(trimesh.comparison.identifier_simple(expected_mesh)) assert actual_hash == expected_hash + + +ingest_instancemask_test_cases = [ + # Mask with 3 labels (1,2,3) and background 0 + { + "case": "InstanceSegmentationMaskAnnotation, MRC", + "source_cfg": { + "InstanceSegmentationMask": { + "file_format": "mrc", + "is_visualization_default": True, + "glob_string": "annotations/semantic_mask.mrc", + }, + }, + "out_data": [ + { + "volume": [ + [[3, 3, 3, 3], [0, 2, 0, 0], [0, 2, 0, 0], [0, 0, 0, 0]], + [[0, 0, 0, 0], [0, 2, 0, 0], [0, 2, 0, 0], [0, 0, 0, 0]], + [[0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 1]], + [[0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 1]], + ], + "shape": (4, 4, 4), + }, + ], + }, + { + "case": "InstanceSegmentationMaskAnnotation, rescale=True, MRC", + "source_cfg": { + "InstanceSegmentationMask": { + "file_format": "mrc", + "is_visualization_default": True, + "rescale": True, + "glob_string": "annotations/semantic_mask.mrc", + }, + }, + "out_data": [ + { + "volume": [ + [[3, 3, 3, 3], [0, 2, 0, 0], [0, 2, 0, 0], [0, 0, 0, 0]], + [[0, 0, 0, 0], [0, 2, 0, 0], [0, 2, 0, 0], [0, 0, 0, 0]], + [[0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 1]], + [[0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 1]], + ], + "shape": (4, 4, 4), + }, + ], + }, + { + "case": "InstanceSegmentationMaskAnnotation, small mask, rescale=True, MRC", + "source_cfg": { + "InstanceSegmentationMask": { + "file_format": "mrc", + "is_visualization_default": True, + "rescale": True, + "glob_string": "annotations/small_semantic_mask.mrc", + }, + }, + "out_data": [ + { + "volume": [ + [[2, 2, 2, 2], [2, 2, 2, 2], [0, 0, 0, 0], [0, 0, 0, 0]], + [[2, 2, 2, 2], [2, 2, 2, 2], [0, 0, 0, 0], [0, 0, 0, 0]], + [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], + [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], + ], + "shape": (4, 4, 4), + }, + ], + }, + { + "case": "InstanceSegmentationMaskAnnotation, small mask, rescale=False, MRC", + "source_cfg": { + "InstanceSegmentationMask": { + "file_format": "mrc", + "is_visualization_default": True, + "rescale": False, + "glob_string": "annotations/small_semantic_mask.mrc", + }, + }, + "out_data": [ + { + "volume": [ + [[2, 2], [0, 0]], + [[0, 0], [0, 0]], + ], + "shape": (2, 2, 2), + }, + ], + }, +] + + +@pytest.mark.parametrize("case", ingest_instancemask_test_cases) +def test_ingest_instancesegmentationmask( + s3_fs: FileSystemApi, + test_output_bucket: str, + voxel_spacing_importer_s3, + deposition_config_s3: DepositionImportConfig, + s3_client: S3Client, + case: Dict[str, Any], +): + # loop through test cases + anno_config = { + "metadata": default_anno_metadata, + "sources": [ + case["source_cfg"], + ], + } + deposition_config_s3._set_object_configs("annotation", [anno_config]) + + args = dict( + config=deposition_config_s3, + metadata=default_anno_metadata, + path="test-public-bucket/input_bucket/20002/" + + case["source_cfg"]["InstanceSegmentationMask"].get("glob_string"), + parents={"voxel_spacing": voxel_spacing_importer_s3, **voxel_spacing_importer_s3.parents}, + identifier=100, + alignment_metadata_path="foo", + **case["source_cfg"]["InstanceSegmentationMask"], + ) + + anno = InstanceSegmentationMaskAnnotation(**args) + anno.import_item() + + # Strip the bucket name and annotation name from the annotation's output path. + anno_file = anno.get_output_path() + "_instancesegmentationmask.mrc" + + # Sanity check the mrc file + with s3_fs.open(anno_file, "rb") as fh: + mrc = MrcInterpreter(fh) + data = mrc.data + + exp_data = case["out_data"][0]["volume"] + shape = case["out_data"][0]["shape"] + + # Mask shape + assert data.shape == shape, f"Incorrect shape for {case['case']}" + # Mask data + assert np.all(data == np.array(exp_data, dtype=int)), f"Incorrect data for {case['case']}" diff --git a/ingestion_tools/scripts/tests/s3_import/test_visualization_config.py b/ingestion_tools/scripts/tests/s3_import/test_visualization_config.py index df5951cb0..dfd78214c 100644 --- a/ingestion_tools/scripts/tests/s3_import/test_visualization_config.py +++ b/ingestion_tools/scripts/tests/s3_import/test_visualization_config.py @@ -54,9 +54,8 @@ def expected_url() -> str: @pytest.fixture def config(s3_fs: FileSystemApi, test_output_bucket: str, expected_url: str) -> DepositionImportConfig: - config = create_config(s3_fs, test_output_bucket) + config = create_config(s3_fs, test_output_bucket, https_prefix=expected_url) config.write_zarr = True - config.https_prefix = expected_url return config @@ -146,6 +145,7 @@ def test_viz_config_with_only_tomogram( ("OrientedPointMesh", "ndjson"), ("InstanceSegmentation", "ndjson"), ("SegmentationMask", "zarr"), + ("InstanceSegmentationMask", "zarr"), ("SegmentationMask", "mrc"), ("Mesh", "glb"), ], @@ -196,6 +196,13 @@ def annotation_usecases( elif shape == "SegmentationMask" and format == "zarr": generator_method = "generate_segmentation_mask_layer" input_args["name"] += "segmentation" + input_args["visible_segments"] = (1,) + return_value = {"key": generator_method, "random": "value"} + elif shape == "InstanceSegmentationMask" and format == "zarr": + generator_method = "generate_segmentation_mask_layer" + input_args["name"] += "instancesegmentation" + input_args["visible_segments"] = (1, 2, 3) + input_args["color"] = {1: "#51508b", 2: "#ffff00", 3: "#00ffff"} return_value = {"key": generator_method, "random": "value"} args = { @@ -277,6 +284,7 @@ def test_viz_config_with_tomogram_and_annotation( config: DepositionImportConfig, validate_config: Callable[[str, dict[str, BaseImporter], list[dict]], None], mock_state_generator: MagicMock, + monkeypatch: pytest.MonkeyPatch, annotation_usecases: dict[str, Any], ) -> None: parents = get_parents(config) @@ -313,6 +321,12 @@ def test_viz_config_with_tomogram_and_annotation( anno_layers.append(mock_state_generator.generate_oriented_point_mesh_layer.return_value) anno_layers[0], anno_layers[1] = anno_layers[1], anno_layers[0] # Ensure mesh layer is first + if annotation_usecases["shape"] == "InstanceSegmentationMask": + monkeypatch.setattr( + "importers.visualization_config.VisualizationConfigImporter._get_labels", + MagicMock(return_value=(1, 2, 3)), + ) + viz_config = list(VisualizationConfigImporter.finder(config, **parents)) for item in viz_config: item.import_item() diff --git a/ingestion_tools/scripts/tests/s3_import/util.py b/ingestion_tools/scripts/tests/s3_import/util.py index 9fd89b6ec..849c829ef 100644 --- a/ingestion_tools/scripts/tests/s3_import/util.py +++ b/ingestion_tools/scripts/tests/s3_import/util.py @@ -21,13 +21,13 @@ def list_dir(s3_client: S3Client, bucket: str, prefix: str, assert_non_zero_size return [item["Key"] for item in files["Contents"]] if "Contents" in files else [] -def create_config(s3_fs: FileSystemApi, test_output_bucket: str, config_path: str = None) -> DepositionImportConfig: +def create_config(s3_fs: FileSystemApi, test_output_bucket: str, config_path: str = None, https_prefix: str = None) -> DepositionImportConfig: output_path = f"{test_output_bucket}/output" input_bucket = "test-public-bucket" if config_path is None: config_path = "dataset1.yaml" import_config = f"tests/fixtures/{config_path}" - return DepositionImportConfig(s3_fs, import_config, output_path, input_bucket, IMPORTERS) + return DepositionImportConfig(s3_fs, import_config, output_path, input_bucket, IMPORTERS, https_prefix=https_prefix) def get_run_and_parents( diff --git a/schema/core/v1.1.0/codegen/metadata_models.py b/schema/core/v1.1.0/codegen/metadata_models.py index 6f58e800f..29d4a9459 100644 --- a/schema/core/v1.1.0/codegen/metadata_models.py +++ b/schema/core/v1.1.0/codegen/metadata_models.py @@ -1,11 +1,11 @@ from __future__ import annotations -from datetime import datetime, date -from decimal import Decimal -from enum import Enum + import re -import sys -from typing import Any, ClassVar, List, Literal, Dict, Optional, Union -from pydantic import BaseModel, ConfigDict, Field, RootModel, field_validator, conlist +from datetime import date +from enum import Enum +from typing import Any, ClassVar, Dict, List, Optional, Union + +from pydantic import BaseModel, ConfigDict, Field, RootModel, conlist, field_validator metamodel_version = "None" version = "1.1.0" @@ -88,38 +88,38 @@ def __contains__(self, key: str) -> bool: }, "EMDB_ID": { "base": "str", - "description": "An Electron Microscopy Data Bank " "identifier", + "description": "An Electron Microscopy Data Bank identifier", "from_schema": "metadata", "name": "EMDB_ID", "pattern": "^EMD-[0-9]{4,5}$", }, "EMPIAR_EMDB_DOI_PDB_LIST": { "base": "str", - "description": "A list of EMPIAR, " "EMDB, DOI, and PDB " "identifiers", + "description": "A list of EMPIAR, EMDB, DOI, and PDB identifiers", "from_schema": "metadata", "name": "EMPIAR_EMDB_DOI_PDB_LIST", "pattern": "^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|PDB-[0-9a-zA-Z]{4,8})(\\s*,\\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|PDB-[0-9a-zA-Z]{4,8}))*$", }, "EMPIAR_EMDB_PDB_LIST": { "base": "str", - "description": "A list of EMPIAR, EMDB, " "and PDB identifiers", + "description": "A list of EMPIAR, EMDB, and PDB identifiers", "from_schema": "metadata", "name": "EMPIAR_EMDB_PDB_LIST", "pattern": "^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\\s*,\\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$", }, "EMPIAR_ID": { "base": "str", - "description": "An Electron Microscopy Public Image " "Archive identifier", + "description": "An Electron Microscopy Public Image Archive identifier", "from_schema": "metadata", "name": "EMPIAR_ID", "pattern": "^EMPIAR-[0-9]+$", }, "FloatFormattedString": { "base": "str", - "description": "A formatted string that " "represents a floating " "point number.", + "description": "A formatted string that represents a floating point number.", "from_schema": "metadata", "name": "FloatFormattedString", - "pattern": "^float[ " "]*\\{[a-zA-Z0-9_-]+\\}[ ]*$", + "pattern": "^float[ ]*\\{[a-zA-Z0-9_-]+\\}[ ]*$", }, "GO_ID": { "base": "str", @@ -130,10 +130,10 @@ def __contains__(self, key: str) -> bool: }, "IntegerFormattedString": { "base": "str", - "description": "A formatted string that " "represents an integer.", + "description": "A formatted string that represents an integer.", "from_schema": "metadata", "name": "IntegerFormattedString", - "pattern": "^int[ " "]*\\{[a-zA-Z0-9_-]+\\}[ ]*$", + "pattern": "^int[ ]*\\{[a-zA-Z0-9_-]+\\}[ ]*$", }, "ONTOLOGY_ID": { "base": "str", @@ -144,7 +144,7 @@ def __contains__(self, key: str) -> bool: }, "ORCID": { "base": "str", - "description": "A unique, persistent identifier for " "researchers, provided by ORCID.", + "description": "A unique, persistent identifier for researchers, provided by ORCID.", "from_schema": "metadata", "name": "ORCID", "pattern": "[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$", @@ -158,10 +158,10 @@ def __contains__(self, key: str) -> bool: }, "StringFormattedString": { "base": "str", - "description": "A formatted string " "(variable) that " "represents a string.", + "description": "A formatted string (variable) that represents a string.", "from_schema": "metadata", "name": "StringFormattedString", - "pattern": "^[ ]*\\{[a-zA-Z0-9_-]+\\}[ " "]*$", + "pattern": "^[ ]*\\{[a-zA-Z0-9_-]+\\}[ ]*$", }, "UNIPROT_ID": { "base": "str", @@ -179,7 +179,7 @@ def __contains__(self, key: str) -> bool: }, "VersionString": { "base": "float", - "description": "A version number (only major, " "minor versions)", + "description": "A version number (only major, minor versions)", "from_schema": "metadata", "minimum_value": 0, "name": "VersionString", @@ -200,7 +200,7 @@ def __contains__(self, key: str) -> bool: "notes": [ "If you are authoring schemas in LinkML YAML, " "the type is referenced with the lower case " - '"boolean".' + '"boolean".', ], "repr": "bool", "uri": "xsd:boolean", @@ -208,8 +208,8 @@ def __contains__(self, key: str) -> bool: "curie": { "base": "Curie", "comments": [ - "in RDF serializations this MUST be expanded " "to a URI", - "in non-RDF serializations MAY be serialized " "as the compact representation", + "in RDF serializations this MUST be expanded to a URI", + "in non-RDF serializations MAY be serialized as the compact representation", ], "conforms_to": "https://www.w3.org/TR/curie/", "description": "a compact URI", @@ -218,19 +218,19 @@ def __contains__(self, key: str) -> bool: "notes": [ "If you are authoring schemas in LinkML YAML, " "the type is referenced with the lower case " - '"curie".' + '"curie".', ], "repr": "str", "uri": "xsd:string", }, "date": { "base": "XSDDate", - "description": "a date (year, month and day) in an " "idealized calendar", + "description": "a date (year, month and day) in an idealized calendar", "exact_mappings": ["schema:Date"], "from_schema": "metadata", "name": "date", "notes": [ - "URI is dateTime because OWL reasoners don't " "work with straight date or time", + "URI is dateTime because OWL reasoners don't work with straight date or time", "If you are authoring schemas in LinkML YAML, " "the type is referenced with the lower case " '"date".', @@ -247,7 +247,7 @@ def __contains__(self, key: str) -> bool: "If you are authoring schemas in " "LinkML YAML, the type is referenced " "with the lower case " - '"date_or_datetime".' + '"date_or_datetime".', ], "repr": "str", "uri": "linkml:DateOrDatetime", @@ -261,7 +261,7 @@ def __contains__(self, key: str) -> bool: "notes": [ "If you are authoring schemas in LinkML " "YAML, the type is referenced with the lower " - 'case "datetime".' + 'case "datetime".', ], "repr": "str", "uri": "xsd:dateTime", @@ -277,33 +277,33 @@ def __contains__(self, key: str) -> bool: "notes": [ "If you are authoring schemas in LinkML YAML, " "the type is referenced with the lower case " - '"decimal".' + '"decimal".', ], "uri": "xsd:decimal", }, "double": { "base": "float", "close_mappings": ["schema:Float"], - "description": "A real number that conforms to the " "xsd:double specification", + "description": "A real number that conforms to the xsd:double specification", "from_schema": "metadata", "name": "double", "notes": [ "If you are authoring schemas in LinkML YAML, " "the type is referenced with the lower case " - '"double".' + '"double".', ], "uri": "xsd:double", }, "float": { "base": "float", - "description": "A real number that conforms to the " "xsd:float specification", + "description": "A real number that conforms to the xsd:float specification", "exact_mappings": ["schema:Float"], "from_schema": "metadata", "name": "float", "notes": [ "If you are authoring schemas in LinkML YAML, " "the type is referenced with the lower case " - '"float".' + '"float".', ], "uri": "xsd:float", }, @@ -316,7 +316,7 @@ def __contains__(self, key: str) -> bool: "notes": [ "If you are authoring schemas in LinkML YAML, " "the type is referenced with the lower case " - '"integer".' + '"integer".', ], "uri": "xsd:integer", }, @@ -334,7 +334,7 @@ def __contains__(self, key: str) -> bool: "notes": [ "If you are authoring schemas in LinkML " "YAML, the type is referenced with the lower " - 'case "jsonpath".' + 'case "jsonpath".', ], "repr": "str", "uri": "xsd:string", @@ -353,7 +353,7 @@ def __contains__(self, key: str) -> bool: "notes": [ "If you are authoring schemas in LinkML " "YAML, the type is referenced with the " - 'lower case "jsonpointer".' + 'lower case "jsonpointer".', ], "repr": "str", "uri": "xsd:string", @@ -366,36 +366,36 @@ def __contains__(self, key: str) -> bool: "notes": [ "If you are authoring schemas in LinkML YAML, " "the type is referenced with the lower case " - '"ncname".' + '"ncname".', ], "repr": "str", "uri": "xsd:string", }, "nodeidentifier": { "base": "NodeIdentifier", - "description": "A URI, CURIE or BNODE that " "represents a node in a model.", + "description": "A URI, CURIE or BNODE that represents a node in a model.", "from_schema": "metadata", "name": "nodeidentifier", "notes": [ "If you are authoring schemas in " "LinkML YAML, the type is referenced " "with the lower case " - '"nodeidentifier".' + '"nodeidentifier".', ], "repr": "str", "uri": "shex:nonLiteral", }, "objectidentifier": { "base": "ElementIdentifier", - "comments": ["Used for inheritance and type " "checking"], - "description": "A URI or CURIE that represents " "an object in the model.", + "comments": ["Used for inheritance and type checking"], + "description": "A URI or CURIE that represents an object in the model.", "from_schema": "metadata", "name": "objectidentifier", "notes": [ "If you are authoring schemas in " "LinkML YAML, the type is referenced " "with the lower case " - '"objectidentifier".' + '"objectidentifier".', ], "repr": "str", "uri": "shex:iri", @@ -414,7 +414,7 @@ def __contains__(self, key: str) -> bool: "notes": [ "If you are authoring schemas in LinkML " "YAML, the type is referenced with the " - 'lower case "sparqlpath".' + 'lower case "sparqlpath".', ], "repr": "str", "uri": "xsd:string", @@ -430,18 +430,18 @@ def __contains__(self, key: str) -> bool: "string is treated as a literal or type " "xsd:string. If you are authoring schemas in " "LinkML YAML, the type is referenced with the " - 'lower case "string".' + 'lower case "string".', ], "uri": "xsd:string", }, "time": { "base": "XSDTime", - "description": "A time object represents a (local) time of " "day, independent of any particular day", + "description": "A time object represents a (local) time of day, independent of any particular day", "exact_mappings": ["schema:Time"], "from_schema": "metadata", "name": "time", "notes": [ - "URI is dateTime because OWL reasoners do not " "work with straight date or time", + "URI is dateTime because OWL reasoners do not work with straight date or time", "If you are authoring schemas in LinkML YAML, " "the type is referenced with the lower case " '"time".', @@ -457,14 +457,14 @@ def __contains__(self, key: str) -> bool: "uri is treated as a literal or type " "xsd:anyURI unless it is an identifier or a " "reference to an identifier, in which case it " - "is translated directly to a node" + "is translated directly to a node", ], "conforms_to": "https://www.ietf.org/rfc/rfc3987.txt", "description": "a complete URI", "from_schema": "metadata", "name": "uri", "notes": [ - "If you are authoring schemas in LinkML YAML, the " 'type is referenced with the lower case "uri".' + "If you are authoring schemas in LinkML YAML, the " 'type is referenced with the lower case "uri".', ], "repr": "str", "uri": "xsd:anyURI", @@ -477,13 +477,13 @@ def __contains__(self, key: str) -> bool: "notes": [ "If you are authoring schemas in LinkML " "YAML, the type is referenced with the " - 'lower case "uriorcurie".' + 'lower case "uriorcurie".', ], "repr": "str", "uri": "xsd:anyURI", }, }, - } + }, ) @@ -722,7 +722,7 @@ class PicturePath(ConfiguredBaseModel): "domain_of": ["PicturePath"], "exact_mappings": ["cdp-common:snapshot"], "recommended": True, - } + }, }, ) thumbnail: Optional[str] = Field( @@ -734,7 +734,7 @@ class PicturePath(ConfiguredBaseModel): "domain_of": ["PicturePath"], "exact_mappings": ["cdp-common:thumbnail"], "recommended": True, - } + }, }, ) @@ -779,7 +779,7 @@ class FundingDetails(ConfiguredBaseModel): "domain_of": ["FundingDetails"], "exact_mappings": ["cdp-common:funding_agency_name"], "recommended": True, - } + }, }, ) grant_id: Optional[str] = Field( @@ -791,7 +791,7 @@ class FundingDetails(ConfiguredBaseModel): "domain_of": ["FundingDetails"], "exact_mappings": ["cdp-common:funding_grant_id"], "recommended": True, - } + }, }, ) @@ -810,7 +810,7 @@ class DateStampedEntity(ConfiguredBaseModel): "linkml_meta": { "alias": "dates", "domain_of": ["DateStampedEntity", "Tomogram", "Dataset", "Deposition", "Annotation"], - } + }, }, ) @@ -831,7 +831,7 @@ class AuthoredEntity(ConfiguredBaseModel): "alias": "authors", "domain_of": ["AuthoredEntity", "Dataset", "Deposition", "Tomogram", "Annotation"], "list_elements_ordered": True, - } + }, }, ) @@ -852,7 +852,7 @@ class FundedEntity(ConfiguredBaseModel): "domain_of": ["FundedEntity", "Dataset"], "list_elements_ordered": True, "recommended": True, - } + }, }, ) @@ -871,7 +871,7 @@ class CrossReferencedEntity(ConfiguredBaseModel): "linkml_meta": { "alias": "cross_references", "domain_of": ["CrossReferencedEntity", "Tomogram", "Dataset", "Deposition"], - } + }, }, ) @@ -915,7 +915,7 @@ class OrganismDetails(ConfiguredBaseModel): "Author", ], "exact_mappings": ["cdp-common:organism_name"], - } + }, }, ) taxonomy_id: Optional[int] = Field( @@ -928,7 +928,7 @@ class OrganismDetails(ConfiguredBaseModel): "domain_of": ["OrganismDetails"], "exact_mappings": ["cdp-common:organism_taxid"], "recommended": True, - } + }, }, ) @@ -958,7 +958,7 @@ class TissueDetails(ConfiguredBaseModel): "Author", ], "exact_mappings": ["cdp-common:tissue_name"], - } + }, }, ) id: Optional[str] = Field( @@ -970,7 +970,7 @@ class TissueDetails(ConfiguredBaseModel): "domain_of": ["TissueDetails", "CellType", "CellStrain", "CellComponent", "AnnotationObject"], "exact_mappings": ["cdp-common:tissue_id"], "recommended": True, - } + }, }, ) @@ -1012,7 +1012,7 @@ class CellType(ConfiguredBaseModel): "Author", ], "exact_mappings": ["cdp-common:cell_name"], - } + }, }, ) id: Optional[str] = Field( @@ -1024,7 +1024,7 @@ class CellType(ConfiguredBaseModel): "domain_of": ["TissueDetails", "CellType", "CellStrain", "CellComponent", "AnnotationObject"], "exact_mappings": ["cdp-common:cell_type_id"], "recommended": True, - } + }, }, ) @@ -1066,7 +1066,7 @@ class CellStrain(ConfiguredBaseModel): "Author", ], "exact_mappings": ["cdp-common:cell_strain_name"], - } + }, }, ) id: Optional[str] = Field( @@ -1079,7 +1079,7 @@ class CellStrain(ConfiguredBaseModel): "domain_of": ["TissueDetails", "CellType", "CellStrain", "CellComponent", "AnnotationObject"], "exact_mappings": ["cdp-common:cell_strain_id"], "recommended": True, - } + }, }, ) @@ -1121,7 +1121,7 @@ class CellComponent(ConfiguredBaseModel): "Author", ], "exact_mappings": ["cdp-common:cell_component_name"], - } + }, }, ) id: Optional[str] = Field( @@ -1133,7 +1133,7 @@ class CellComponent(ConfiguredBaseModel): "domain_of": ["TissueDetails", "CellType", "CellStrain", "CellComponent", "AnnotationObject"], "exact_mappings": ["cdp-common:cell_component_id"], "recommended": True, - } + }, }, ) @@ -1165,7 +1165,7 @@ class ExperimentMetadata(ConfiguredBaseModel): "alias": "sample_type", "domain_of": ["ExperimentMetadata", "Dataset"], "exact_mappings": ["cdp-common:preparation_sample_type"], - } + }, }, ) sample_preparation: Optional[str] = Field( @@ -1177,7 +1177,7 @@ class ExperimentMetadata(ConfiguredBaseModel): "domain_of": ["ExperimentMetadata", "Dataset"], "exact_mappings": ["cdp-common:sample_preparation"], "recommended": True, - } + }, }, ) grid_preparation: Optional[str] = Field( @@ -1189,7 +1189,7 @@ class ExperimentMetadata(ConfiguredBaseModel): "domain_of": ["ExperimentMetadata", "Dataset"], "exact_mappings": ["cdp-common:grid_preparation"], "recommended": True, - } + }, }, ) other_setup: Optional[str] = Field( @@ -1201,7 +1201,7 @@ class ExperimentMetadata(ConfiguredBaseModel): "domain_of": ["ExperimentMetadata", "Dataset"], "exact_mappings": ["cdp-common:preparation_other_setup"], "recommended": True, - } + }, }, ) organism: Optional[OrganismDetails] = Field( @@ -1233,7 +1233,7 @@ class ExperimentMetadata(ConfiguredBaseModel): @field_validator("sample_type") def pattern_sample_type(cls, v): pattern = re.compile( - r"(^cell$)|(^tissue$)|(^organism$)|(^organelle$)|(^virus$)|(^in_vitro$)|(^in_silico$)|(^other$)" + r"(^cell$)|(^tissue$)|(^organism$)|(^organelle$)|(^virus$)|(^in_vitro$)|(^in_silico$)|(^other$)", ) if isinstance(v, list): for element in v: @@ -1260,7 +1260,7 @@ class Dataset(ExperimentMetadata, CrossReferencedEntity, FundedEntity, AuthoredE "CrossReferencedEntity", "ExperimentMetadata", ], - } + }, ) dataset_identifier: int = Field( @@ -1271,7 +1271,7 @@ class Dataset(ExperimentMetadata, CrossReferencedEntity, FundedEntity, AuthoredE "alias": "dataset_identifier", "domain_of": ["Dataset"], "exact_mappings": ["cdp-common:dataset_identifier"], - } + }, }, ) dataset_title: str = Field( @@ -1282,7 +1282,7 @@ class Dataset(ExperimentMetadata, CrossReferencedEntity, FundedEntity, AuthoredE "alias": "dataset_title", "domain_of": ["Dataset"], "exact_mappings": ["cdp-common:dataset_title"], - } + }, }, ) dataset_description: str = Field( @@ -1293,7 +1293,7 @@ class Dataset(ExperimentMetadata, CrossReferencedEntity, FundedEntity, AuthoredE "alias": "dataset_description", "domain_of": ["Dataset"], "exact_mappings": ["cdp-common:dataset_description"], - } + }, }, ) dates: DateStamp = Field( @@ -1303,7 +1303,7 @@ class Dataset(ExperimentMetadata, CrossReferencedEntity, FundedEntity, AuthoredE "linkml_meta": { "alias": "dates", "domain_of": ["DateStampedEntity", "Tomogram", "Dataset", "Deposition", "Annotation"], - } + }, }, ) authors: List[Author] = Field( @@ -1315,7 +1315,7 @@ class Dataset(ExperimentMetadata, CrossReferencedEntity, FundedEntity, AuthoredE "alias": "authors", "domain_of": ["AuthoredEntity", "Dataset", "Deposition", "Tomogram", "Annotation"], "list_elements_ordered": True, - } + }, }, ) funding: Optional[List[FundingDetails]] = Field( @@ -1327,7 +1327,7 @@ class Dataset(ExperimentMetadata, CrossReferencedEntity, FundedEntity, AuthoredE "domain_of": ["FundedEntity", "Dataset"], "list_elements_ordered": True, "recommended": True, - } + }, }, ) cross_references: Optional[CrossReferences] = Field( @@ -1337,7 +1337,7 @@ class Dataset(ExperimentMetadata, CrossReferencedEntity, FundedEntity, AuthoredE "linkml_meta": { "alias": "cross_references", "domain_of": ["CrossReferencedEntity", "Tomogram", "Dataset", "Deposition"], - } + }, }, ) sample_type: SampleTypeEnum = Field( @@ -1348,7 +1348,7 @@ class Dataset(ExperimentMetadata, CrossReferencedEntity, FundedEntity, AuthoredE "alias": "sample_type", "domain_of": ["ExperimentMetadata", "Dataset"], "exact_mappings": ["cdp-common:preparation_sample_type"], - } + }, }, ) sample_preparation: Optional[str] = Field( @@ -1360,7 +1360,7 @@ class Dataset(ExperimentMetadata, CrossReferencedEntity, FundedEntity, AuthoredE "domain_of": ["ExperimentMetadata", "Dataset"], "exact_mappings": ["cdp-common:sample_preparation"], "recommended": True, - } + }, }, ) grid_preparation: Optional[str] = Field( @@ -1372,7 +1372,7 @@ class Dataset(ExperimentMetadata, CrossReferencedEntity, FundedEntity, AuthoredE "domain_of": ["ExperimentMetadata", "Dataset"], "exact_mappings": ["cdp-common:grid_preparation"], "recommended": True, - } + }, }, ) other_setup: Optional[str] = Field( @@ -1384,7 +1384,7 @@ class Dataset(ExperimentMetadata, CrossReferencedEntity, FundedEntity, AuthoredE "domain_of": ["ExperimentMetadata", "Dataset"], "exact_mappings": ["cdp-common:preparation_other_setup"], "recommended": True, - } + }, }, ) organism: Optional[OrganismDetails] = Field( @@ -1416,7 +1416,7 @@ class Dataset(ExperimentMetadata, CrossReferencedEntity, FundedEntity, AuthoredE @field_validator("sample_type") def pattern_sample_type(cls, v): pattern = re.compile( - r"(^cell$)|(^tissue$)|(^organism$)|(^organelle$)|(^virus$)|(^in_vitro$)|(^in_silico$)|(^other$)" + r"(^cell$)|(^tissue$)|(^organism$)|(^organelle$)|(^virus$)|(^in_vitro$)|(^in_silico$)|(^other$)", ) if isinstance(v, list): for element in v: @@ -1434,7 +1434,7 @@ class Deposition(CrossReferencedEntity, AuthoredEntity, DateStampedEntity): """ linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( - {"from_schema": "metadata", "mixins": ["DateStampedEntity", "AuthoredEntity", "CrossReferencedEntity"]} + {"from_schema": "metadata", "mixins": ["DateStampedEntity", "AuthoredEntity", "CrossReferencedEntity"]}, ) deposition_description: str = Field( @@ -1445,7 +1445,7 @@ class Deposition(CrossReferencedEntity, AuthoredEntity, DateStampedEntity): "alias": "deposition_description", "domain_of": ["Deposition"], "exact_mappings": ["cdp-common:deposition_description"], - } + }, }, ) deposition_identifier: int = Field( @@ -1456,7 +1456,7 @@ class Deposition(CrossReferencedEntity, AuthoredEntity, DateStampedEntity): "alias": "deposition_identifier", "domain_of": ["Deposition"], "exact_mappings": ["cdp-common:deposition_identifier"], - } + }, }, ) deposition_title: str = Field( @@ -1467,7 +1467,7 @@ class Deposition(CrossReferencedEntity, AuthoredEntity, DateStampedEntity): "alias": "deposition_title", "domain_of": ["Deposition"], "exact_mappings": ["cdp-common:deposition_title"], - } + }, }, ) deposition_types: List[DepositionTypesEnum] = Field( @@ -1479,7 +1479,7 @@ class Deposition(CrossReferencedEntity, AuthoredEntity, DateStampedEntity): "alias": "deposition_types", "domain_of": ["Deposition"], "exact_mappings": ["cdp-common:deposition_types"], - } + }, }, ) dates: DateStamp = Field( @@ -1489,7 +1489,7 @@ class Deposition(CrossReferencedEntity, AuthoredEntity, DateStampedEntity): "linkml_meta": { "alias": "dates", "domain_of": ["DateStampedEntity", "Tomogram", "Dataset", "Deposition", "Annotation"], - } + }, }, ) authors: List[Author] = Field( @@ -1501,7 +1501,7 @@ class Deposition(CrossReferencedEntity, AuthoredEntity, DateStampedEntity): "alias": "authors", "domain_of": ["AuthoredEntity", "Dataset", "Deposition", "Tomogram", "Annotation"], "list_elements_ordered": True, - } + }, }, ) cross_references: Optional[CrossReferences] = Field( @@ -1511,7 +1511,7 @@ class Deposition(CrossReferencedEntity, AuthoredEntity, DateStampedEntity): "linkml_meta": { "alias": "cross_references", "domain_of": ["CrossReferencedEntity", "Tomogram", "Dataset", "Deposition"], - } + }, }, ) @@ -1544,7 +1544,7 @@ class CameraDetails(ConfiguredBaseModel): "any_of": [{"range": "StringFormattedString"}, {"range": "tiltseries_camera_acquire_mode_enum"}], "domain_of": ["CameraDetails"], "exact_mappings": ["cdp-common:tiltseries_camera_acquire_mode"], - } + }, }, ) manufacturer: TiltseriesCameraManufacturerEnum = Field( @@ -1555,7 +1555,7 @@ class CameraDetails(ConfiguredBaseModel): "alias": "manufacturer", "domain_of": ["CameraDetails", "MicroscopeDetails"], "exact_mappings": ["cdp-common:tiltseries_camera_manufacturer"], - } + }, }, ) model: str = Field( @@ -1566,7 +1566,7 @@ class CameraDetails(ConfiguredBaseModel): "alias": "model", "domain_of": ["CameraDetails", "MicroscopeDetails"], "exact_mappings": ["cdp-common:tiltseries_camera_model"], - } + }, }, ) @@ -1610,7 +1610,7 @@ class MicroscopeDetails(ConfiguredBaseModel): "alias": "additional_info", "domain_of": ["MicroscopeDetails"], "exact_mappings": ["cdp-common:tiltseries_microscope_additional_info"], - } + }, }, ) manufacturer: Union[TiltseriesMicroscopeManufacturerEnum, str] = Field( @@ -1629,7 +1629,7 @@ class MicroscopeDetails(ConfiguredBaseModel): {"range": "StringFormattedString"}, ], "domain_of": ["CameraDetails", "MicroscopeDetails"], - } + }, }, ) model: str = Field( @@ -1640,7 +1640,7 @@ class MicroscopeDetails(ConfiguredBaseModel): "alias": "model", "domain_of": ["CameraDetails", "MicroscopeDetails"], "exact_mappings": ["cdp-common:tiltseries_microscope_model"], - } + }, }, ) @@ -1672,7 +1672,7 @@ class MicroscopeOpticalSetup(ConfiguredBaseModel): "alias": "energy_filter", "domain_of": ["MicroscopeOpticalSetup"], "exact_mappings": ["cdp-common:tiltseries_microscope_energy_filter"], - } + }, }, ) phase_plate: Optional[str] = Field( @@ -1683,7 +1683,7 @@ class MicroscopeOpticalSetup(ConfiguredBaseModel): "alias": "phase_plate", "domain_of": ["MicroscopeOpticalSetup"], "exact_mappings": ["cdp-common:tiltseries_microscope_phase_plate"], - } + }, }, ) image_corrector: Optional[str] = Field( @@ -1694,7 +1694,7 @@ class MicroscopeOpticalSetup(ConfiguredBaseModel): "alias": "image_corrector", "domain_of": ["MicroscopeOpticalSetup"], "exact_mappings": ["cdp-common:tiltseries_microscope_image_corrector"], - } + }, }, ) @@ -1728,7 +1728,7 @@ class TiltRange(ConfiguredBaseModel): ], "domain_of": ["TiltRange"], "unit": {"descriptive_name": "degrees", "symbol": "°"}, - } + }, }, ) max: Union[float, str] = Field( @@ -1753,7 +1753,7 @@ class TiltRange(ConfiguredBaseModel): ], "domain_of": ["TiltRange"], "unit": {"descriptive_name": "degrees", "symbol": "°"}, - } + }, }, ) @@ -1799,7 +1799,7 @@ class TiltSeries(ConfiguredBaseModel): "domain_of": ["TiltSeries"], "exact_mappings": ["cdp-common:tiltseries_acceleration_voltage"], "unit": {"descriptive_name": "volts", "symbol": "V"}, - } + }, }, ) aligned_tiltseries_binning: Optional[Union[float, str]] = Field( @@ -1820,7 +1820,7 @@ class TiltSeries(ConfiguredBaseModel): ], "domain_of": ["TiltSeries"], "ifabsent": "float(1)", - } + }, }, ) binning_from_frames: Optional[Union[float, str]] = Field( @@ -1832,7 +1832,7 @@ class TiltSeries(ConfiguredBaseModel): "alias": "binning_from_frames", "any_of": [ { - "description": "Describes the binning factor from frames to tilt " "series file", + "description": "Describes the binning factor from frames to tilt series file", "exact_mappings": ["cdp-common:tiltseries_binning_from_frames"], "minimum_value": 0, "range": "float", @@ -1841,7 +1841,7 @@ class TiltSeries(ConfiguredBaseModel): ], "domain_of": ["TiltSeries"], "ifabsent": "float(1)", - } + }, }, ) camera: CameraDetails = Field( @@ -1857,7 +1857,7 @@ class TiltSeries(ConfiguredBaseModel): "alias": "data_acquisition_software", "domain_of": ["TiltSeries"], "exact_mappings": ["cdp-common:tiltseries_data_acquisition_software"], - } + }, }, ) frames_count: Optional[int] = Field( @@ -1868,7 +1868,7 @@ class TiltSeries(ConfiguredBaseModel): "alias": "frames_count", "domain_of": ["TiltSeries"], "exact_mappings": ["cdp-common:tiltseries_frames_count"], - } + }, }, ) is_aligned: bool = Field( @@ -1879,7 +1879,7 @@ class TiltSeries(ConfiguredBaseModel): "alias": "is_aligned", "domain_of": ["TiltSeries"], "exact_mappings": ["cdp-common:tiltseries_is_aligned"], - } + }, }, ) microscope: MicroscopeDetails = Field( @@ -1900,7 +1900,7 @@ class TiltSeries(ConfiguredBaseModel): "alias": "related_empiar_entry", "domain_of": ["TiltSeries"], "exact_mappings": ["cdp-common:tiltseries_related_empiar_entry"], - } + }, }, ) spherical_aberration_constant: Union[float, str] = Field( @@ -1912,7 +1912,7 @@ class TiltSeries(ConfiguredBaseModel): "alias": "spherical_aberration_constant", "any_of": [ { - "description": "Spherical Aberration Constant of the objective " "lens in millimeters", + "description": "Spherical Aberration Constant of the objective lens in millimeters", "exact_mappings": ["cdp-common:tiltseries_spherical_aberration_constant"], "minimum_value": 0, "range": "float", @@ -1923,7 +1923,7 @@ class TiltSeries(ConfiguredBaseModel): ], "domain_of": ["TiltSeries"], "unit": {"descriptive_name": "millimeters", "symbol": "mm"}, - } + }, }, ) tilt_alignment_software: Optional[str] = Field( @@ -1934,7 +1934,7 @@ class TiltSeries(ConfiguredBaseModel): "alias": "tilt_alignment_software", "domain_of": ["TiltSeries"], "exact_mappings": ["cdp-common:tiltseries_tilt_alignment_software"], - } + }, }, ) tilt_axis: Union[float, str] = Field( @@ -1959,7 +1959,7 @@ class TiltSeries(ConfiguredBaseModel): ], "domain_of": ["TiltSeries"], "unit": {"descriptive_name": "degrees", "symbol": "°"}, - } + }, }, ) tilt_range: TiltRange = Field( @@ -1988,7 +1988,7 @@ class TiltSeries(ConfiguredBaseModel): {"range": "IntegerFormattedString"}, ], "domain_of": ["TiltSeries"], - } + }, }, ) tilt_step: Union[float, str] = Field( @@ -2013,7 +2013,7 @@ class TiltSeries(ConfiguredBaseModel): ], "domain_of": ["TiltSeries"], "unit": {"descriptive_name": "degrees", "symbol": "°"}, - } + }, }, ) tilting_scheme: str = Field( @@ -2024,7 +2024,7 @@ class TiltSeries(ConfiguredBaseModel): "alias": "tilting_scheme", "domain_of": ["TiltSeries"], "exact_mappings": ["cdp-common:tiltseries_tilting_scheme"], - } + }, }, ) total_flux: Union[float, str] = Field( @@ -2048,7 +2048,7 @@ class TiltSeries(ConfiguredBaseModel): ], "domain_of": ["TiltSeries"], "unit": {"descriptive_name": "electrons per square Angstrom", "symbol": "e^-/Å^2"}, - } + }, }, ) pixel_spacing: Union[float, str] = Field( @@ -2071,7 +2071,7 @@ class TiltSeries(ConfiguredBaseModel): ], "domain_of": ["TiltSeries"], "unit": {"descriptive_name": "Angstroms per pixel", "symbol": "Å/px"}, - } + }, }, ) @@ -2200,7 +2200,7 @@ class TomogramSize(ConfiguredBaseModel): "alias": "x", "domain_of": ["TomogramSize", "TomogramOffset", "AlignmentSize", "AlignmentOffset"], "unit": {"descriptive_name": "pixels", "symbol": "px"}, - } + }, }, ) y: int = Field( @@ -2212,7 +2212,7 @@ class TomogramSize(ConfiguredBaseModel): "alias": "y", "domain_of": ["TomogramSize", "TomogramOffset", "AlignmentSize", "AlignmentOffset"], "unit": {"descriptive_name": "pixels", "symbol": "px"}, - } + }, }, ) z: int = Field( @@ -2224,7 +2224,7 @@ class TomogramSize(ConfiguredBaseModel): "alias": "z", "domain_of": ["TomogramSize", "TomogramOffset", "AlignmentSize", "AlignmentOffset"], "unit": {"descriptive_name": "pixels", "symbol": "px"}, - } + }, }, ) @@ -2245,7 +2245,7 @@ class TomogramOffset(ConfiguredBaseModel): "any_of": [{"range": "integer"}, {"range": "IntegerFormattedString"}], "domain_of": ["TomogramSize", "TomogramOffset", "AlignmentSize", "AlignmentOffset"], "unit": {"descriptive_name": "pixels", "symbol": "px"}, - } + }, }, ) y: Union[int, str] = Field( @@ -2257,7 +2257,7 @@ class TomogramOffset(ConfiguredBaseModel): "any_of": [{"range": "integer"}, {"range": "IntegerFormattedString"}], "domain_of": ["TomogramSize", "TomogramOffset", "AlignmentSize", "AlignmentOffset"], "unit": {"descriptive_name": "pixels", "symbol": "px"}, - } + }, }, ) z: Union[int, str] = Field( @@ -2269,7 +2269,7 @@ class TomogramOffset(ConfiguredBaseModel): "any_of": [{"range": "integer"}, {"range": "IntegerFormattedString"}], "domain_of": ["TomogramSize", "TomogramOffset", "AlignmentSize", "AlignmentOffset"], "unit": {"descriptive_name": "pixels", "symbol": "px"}, - } + }, }, ) @@ -2326,7 +2326,7 @@ class Tomogram(AuthoredEntity): "alias": "voxel_spacing", "any_of": [ { - "description": "Voxel spacing equal in all three axes in " "angstroms", + "description": "Voxel spacing equal in all three axes in angstroms", "exact_mappings": ["cdp-common:tomogram_voxel_spacing"], "minimum_value": 0.001, "range": "float", @@ -2337,7 +2337,7 @@ class Tomogram(AuthoredEntity): ], "domain_of": ["Tomogram"], "unit": {"descriptive_name": "Angstroms per voxel", "symbol": "Å/voxel"}, - } + }, }, ) fiducial_alignment_status: Union[FiducialAlignmentStatusEnum, str] = Field( @@ -2348,7 +2348,7 @@ class Tomogram(AuthoredEntity): "alias": "fiducial_alignment_status", "any_of": [ { - "description": "Whether the tomographic alignment was computed " "based on fiducial markers.", + "description": "Whether the tomographic alignment was computed based on fiducial markers.", "exact_mappings": ["cdp-common:tomogram_fiducial_alignment_status"], "range": "fiducial_alignment_status_enum", "required": True, @@ -2356,7 +2356,7 @@ class Tomogram(AuthoredEntity): {"range": "StringFormattedString"}, ], "domain_of": ["Tomogram"], - } + }, }, ) ctf_corrected: Optional[bool] = Field( @@ -2368,7 +2368,7 @@ class Tomogram(AuthoredEntity): "domain_of": ["Tomogram"], "exact_mappings": ["cdp-common:tomogram_ctf_corrected"], "recommended": True, - } + }, }, ) align_software: Optional[str] = Field( @@ -2379,7 +2379,7 @@ class Tomogram(AuthoredEntity): "alias": "align_software", "domain_of": ["Tomogram"], "exact_mappings": ["cdp-common:tomogram_align_software"], - } + }, }, ) reconstruction_method: Union[TomogramReconstructionMethodEnum, str] = Field( @@ -2398,7 +2398,7 @@ class Tomogram(AuthoredEntity): {"range": "StringFormattedString"}, ], "domain_of": ["Tomogram"], - } + }, }, ) reconstruction_software: str = Field( @@ -2409,7 +2409,7 @@ class Tomogram(AuthoredEntity): "alias": "reconstruction_software", "domain_of": ["Tomogram"], "exact_mappings": ["cdp-common:tomogram_reconstruction_software"], - } + }, }, ) processing: TomogramProcessingEnum = Field( @@ -2420,7 +2420,7 @@ class Tomogram(AuthoredEntity): "alias": "processing", "domain_of": ["Tomogram"], "exact_mappings": ["cdp-common:tomogram_processing"], - } + }, }, ) processing_software: Optional[str] = Field( @@ -2432,7 +2432,7 @@ class Tomogram(AuthoredEntity): "domain_of": ["Tomogram"], "exact_mappings": ["cdp-common:tomogram_processing_software"], "recommended": True, - } + }, }, ) tomogram_version: float = Field( @@ -2443,7 +2443,7 @@ class Tomogram(AuthoredEntity): "alias": "tomogram_version", "domain_of": ["Tomogram"], "exact_mappings": ["cdp-common:tomogram_version"], - } + }, }, ) affine_transformation_matrix: Optional[ @@ -2459,7 +2459,7 @@ class Tomogram(AuthoredEntity): "exact_number_dimensions": 2, }, "domain_of": ["Tomogram", "Alignment"], - } + }, }, ) size: Optional[TomogramSize] = Field( @@ -2490,7 +2490,7 @@ class Tomogram(AuthoredEntity): "AnnotationTriangularMeshGroupFile", ], "ifabsent": "True", - } + }, }, ) cross_references: Optional[CrossReferences] = Field( @@ -2500,7 +2500,7 @@ class Tomogram(AuthoredEntity): "linkml_meta": { "alias": "cross_references", "domain_of": ["CrossReferencedEntity", "Tomogram", "Dataset", "Deposition"], - } + }, }, ) dates: DateStamp = Field( @@ -2510,7 +2510,7 @@ class Tomogram(AuthoredEntity): "linkml_meta": { "alias": "dates", "domain_of": ["DateStampedEntity", "Tomogram", "Dataset", "Deposition", "Annotation"], - } + }, }, ) authors: List[Author] = Field( @@ -2522,7 +2522,7 @@ class Tomogram(AuthoredEntity): "alias": "authors", "domain_of": ["AuthoredEntity", "Dataset", "Deposition", "Tomogram", "Annotation"], "list_elements_ordered": True, - } + }, }, ) @@ -2593,7 +2593,7 @@ class AnnotationConfidence(ConfiguredBaseModel): "domain_of": ["AnnotationConfidence"], "exact_mappings": ["cdp-common:annotation_confidence_precision"], "unit": {"descriptive_name": "percentage", "symbol": "%"}, - } + }, }, ) recall: Optional[float] = Field( @@ -2607,7 +2607,7 @@ class AnnotationConfidence(ConfiguredBaseModel): "domain_of": ["AnnotationConfidence"], "exact_mappings": ["cdp-common:annotation_confidence_recall"], "unit": {"descriptive_name": "percentage", "symbol": "%"}, - } + }, }, ) ground_truth_used: Optional[str] = Field( @@ -2618,7 +2618,7 @@ class AnnotationConfidence(ConfiguredBaseModel): "alias": "ground_truth_used", "domain_of": ["AnnotationConfidence"], "exact_mappings": ["cdp-common:annotation_ground_truth_used"], - } + }, }, ) @@ -2639,7 +2639,7 @@ class AnnotationObject(ConfiguredBaseModel): "any_of": [{"range": "GO_ID"}, {"range": "UNIPROT_ID"}], "domain_of": ["TissueDetails", "CellType", "CellStrain", "CellComponent", "AnnotationObject"], "exact_mappings": ["cdp-common:annotation_object_id"], - } + }, }, ) name: str = Field( @@ -2660,7 +2660,7 @@ class AnnotationObject(ConfiguredBaseModel): "Author", ], "exact_mappings": ["cdp-common:annotation_object_name"], - } + }, }, ) description: Optional[str] = Field( @@ -2671,7 +2671,7 @@ class AnnotationObject(ConfiguredBaseModel): "alias": "description", "domain_of": ["AnnotationObject"], "exact_mappings": ["cdp-common:annotation_object_description"], - } + }, }, ) state: Optional[str] = Field( @@ -2682,14 +2682,14 @@ class AnnotationObject(ConfiguredBaseModel): "alias": "state", "domain_of": ["AnnotationObject"], "exact_mappings": ["cdp-common:annotation_object_state"], - } + }, }, ) @field_validator("id") def pattern_id(cls, v): pattern = re.compile( - r"(^GO:[0-9]{7}$)|(^UniProtKB:[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}$)" + r"(^GO:[0-9]{7}$)|(^UniProtKB:[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}$)", ) if isinstance(v, list): for element in v: @@ -2716,7 +2716,7 @@ class AnnotationMethodLinks(ConfiguredBaseModel): "alias": "link", "domain_of": ["AnnotationMethodLinks"], "exact_mappings": ["cdp-common:annotation_method_link"], - } + }, }, ) link_type: AnnotationMethodLinkTypeEnum = Field( @@ -2727,7 +2727,7 @@ class AnnotationMethodLinks(ConfiguredBaseModel): "alias": "link_type", "domain_of": ["AnnotationMethodLinks"], "exact_mappings": ["cdp-common:annotation_method_link_type"], - } + }, }, ) custom_name: Optional[str] = Field( @@ -2739,7 +2739,7 @@ class AnnotationMethodLinks(ConfiguredBaseModel): "domain_of": ["AnnotationMethodLinks"], "exact_mappings": ["cdp-common:annotation_method_link_custom_name"], "recommended": True, - } + }, }, ) @@ -2780,7 +2780,7 @@ class AnnotationSourceFile(ConfiguredBaseModel): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_format"], - } + }, }, ) glob_string: Optional[str] = Field( @@ -2800,7 +2800,7 @@ class AnnotationSourceFile(ConfiguredBaseModel): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_glob_string"], - } + }, }, ) glob_strings: Optional[List[str]] = Field( @@ -2820,7 +2820,7 @@ class AnnotationSourceFile(ConfiguredBaseModel): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_glob_strings"], - } + }, }, ) is_visualization_default: Optional[bool] = Field( @@ -2842,7 +2842,7 @@ class AnnotationSourceFile(ConfiguredBaseModel): ], "exact_mappings": ["cdp-common:annotation_source_file_is_visualization_default"], "ifabsent": "False", - } + }, }, ) is_portal_standard: Optional[bool] = Field( @@ -2864,7 +2864,7 @@ class AnnotationSourceFile(ConfiguredBaseModel): ], "exact_mappings": ["cdp-common:annotation_source_file_is_portal_standard"], "ifabsent": "False", - } + }, }, ) @@ -2890,7 +2890,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): ], "exact_mappings": ["cdp-common:annotation_source_file_binning"], "ifabsent": "float(1)", - } + }, }, ) filter_value: Optional[str] = Field( @@ -2905,7 +2905,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): "AnnotationInstanceSegmentationFile", ], "exact_mappings": ["cdp-common:annotation_source_file_filter_value"], - } + }, }, ) order: Optional[str] = Field( @@ -2917,7 +2917,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): "domain_of": ["AnnotationOrientedPointFile", "AnnotationInstanceSegmentationFile"], "exact_mappings": ["cdp-common:annotation_source_file_order"], "ifabsent": "string(xyz)", - } + }, }, ) file_format: str = Field( @@ -2937,7 +2937,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_format"], - } + }, }, ) glob_string: Optional[str] = Field( @@ -2957,7 +2957,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_glob_string"], - } + }, }, ) glob_strings: Optional[List[str]] = Field( @@ -2977,7 +2977,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_glob_strings"], - } + }, }, ) is_visualization_default: Optional[bool] = Field( @@ -2999,7 +2999,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): ], "exact_mappings": ["cdp-common:annotation_source_file_is_visualization_default"], "ifabsent": "False", - } + }, }, ) is_portal_standard: Optional[bool] = Field( @@ -3021,7 +3021,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): ], "exact_mappings": ["cdp-common:annotation_source_file_is_portal_standard"], "ifabsent": "False", - } + }, }, ) @@ -3047,7 +3047,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): ], "exact_mappings": ["cdp-common:annotation_source_file_binning"], "ifabsent": "float(1)", - } + }, }, ) filter_value: Optional[str] = Field( @@ -3062,7 +3062,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): "AnnotationInstanceSegmentationFile", ], "exact_mappings": ["cdp-common:annotation_source_file_filter_value"], - } + }, }, ) order: Optional[str] = Field( @@ -3074,7 +3074,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): "domain_of": ["AnnotationOrientedPointFile", "AnnotationInstanceSegmentationFile"], "exact_mappings": ["cdp-common:annotation_source_file_order"], "ifabsent": "string(xyz)", - } + }, }, ) file_format: str = Field( @@ -3094,7 +3094,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_format"], - } + }, }, ) glob_string: Optional[str] = Field( @@ -3114,7 +3114,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_glob_string"], - } + }, }, ) glob_strings: Optional[List[str]] = Field( @@ -3134,7 +3134,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_glob_strings"], - } + }, }, ) is_visualization_default: Optional[bool] = Field( @@ -3156,7 +3156,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): ], "exact_mappings": ["cdp-common:annotation_source_file_is_visualization_default"], "ifabsent": "False", - } + }, }, ) is_portal_standard: Optional[bool] = Field( @@ -3178,7 +3178,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): ], "exact_mappings": ["cdp-common:annotation_source_file_is_portal_standard"], "ifabsent": "False", - } + }, }, ) @@ -3204,7 +3204,7 @@ class AnnotationPointFile(AnnotationSourceFile): ], "exact_mappings": ["cdp-common:annotation_source_file_binning"], "ifabsent": "float(1)", - } + }, }, ) columns: Optional[str] = Field( @@ -3216,7 +3216,7 @@ class AnnotationPointFile(AnnotationSourceFile): "domain_of": ["AnnotationPointFile"], "exact_mappings": ["cdp-common:annotation_source_file_columns"], "ifabsent": "string(xyz)", - } + }, }, ) delimiter: Optional[str] = Field( @@ -3228,7 +3228,7 @@ class AnnotationPointFile(AnnotationSourceFile): "domain_of": ["AnnotationPointFile"], "exact_mappings": ["cdp-common:annotation_source_file_delimiter"], "ifabsent": "string(,)", - } + }, }, ) filter_value: Optional[str] = Field( @@ -3243,7 +3243,7 @@ class AnnotationPointFile(AnnotationSourceFile): "AnnotationInstanceSegmentationFile", ], "exact_mappings": ["cdp-common:annotation_source_file_filter_value"], - } + }, }, ) file_format: str = Field( @@ -3263,7 +3263,7 @@ class AnnotationPointFile(AnnotationSourceFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_format"], - } + }, }, ) glob_string: Optional[str] = Field( @@ -3283,7 +3283,7 @@ class AnnotationPointFile(AnnotationSourceFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_glob_string"], - } + }, }, ) glob_strings: Optional[List[str]] = Field( @@ -3303,7 +3303,7 @@ class AnnotationPointFile(AnnotationSourceFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_glob_strings"], - } + }, }, ) is_visualization_default: Optional[bool] = Field( @@ -3325,7 +3325,7 @@ class AnnotationPointFile(AnnotationSourceFile): ], "exact_mappings": ["cdp-common:annotation_source_file_is_visualization_default"], "ifabsent": "False", - } + }, }, ) is_portal_standard: Optional[bool] = Field( @@ -3347,7 +3347,7 @@ class AnnotationPointFile(AnnotationSourceFile): ], "exact_mappings": ["cdp-common:annotation_source_file_is_portal_standard"], "ifabsent": "False", - } + }, }, ) @@ -3376,7 +3376,7 @@ class AnnotationSegmentationMaskFile(AnnotationSourceFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_format"], - } + }, }, ) glob_string: Optional[str] = Field( @@ -3396,7 +3396,7 @@ class AnnotationSegmentationMaskFile(AnnotationSourceFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_glob_string"], - } + }, }, ) glob_strings: Optional[List[str]] = Field( @@ -3416,7 +3416,7 @@ class AnnotationSegmentationMaskFile(AnnotationSourceFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_glob_strings"], - } + }, }, ) is_visualization_default: Optional[bool] = Field( @@ -3438,7 +3438,7 @@ class AnnotationSegmentationMaskFile(AnnotationSourceFile): ], "exact_mappings": ["cdp-common:annotation_source_file_is_visualization_default"], "ifabsent": "False", - } + }, }, ) is_portal_standard: Optional[bool] = Field( @@ -3460,7 +3460,7 @@ class AnnotationSegmentationMaskFile(AnnotationSourceFile): ], "exact_mappings": ["cdp-common:annotation_source_file_is_portal_standard"], "ifabsent": "False", - } + }, }, ) @@ -3481,7 +3481,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): "domain_of": ["AnnotationSemanticSegmentationMaskFile"], "exact_mappings": ["cdp-common:annotation_source_file_mask_label"], "ifabsent": "int(1)", - } + }, }, ) rescale: Optional[bool] = Field( @@ -3493,7 +3493,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): "domain_of": ["AnnotationSemanticSegmentationMaskFile"], "exact_mappings": ["cdp-common:annotation_source_file_rescale"], "ifabsent": "False", - } + }, }, ) threshold: Optional[float] = Field( @@ -3504,7 +3504,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): "alias": "threshold", "domain_of": ["AnnotationSemanticSegmentationMaskFile"], "exact_mappings": ["cdp-common:annotation_source_file_threshold"], - } + }, }, ) file_format: str = Field( @@ -3524,7 +3524,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_format"], - } + }, }, ) glob_string: Optional[str] = Field( @@ -3544,7 +3544,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_glob_string"], - } + }, }, ) glob_strings: Optional[List[str]] = Field( @@ -3564,7 +3564,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_glob_strings"], - } + }, }, ) is_visualization_default: Optional[bool] = Field( @@ -3586,7 +3586,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): ], "exact_mappings": ["cdp-common:annotation_source_file_is_visualization_default"], "ifabsent": "False", - } + }, }, ) is_portal_standard: Optional[bool] = Field( @@ -3608,7 +3608,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): ], "exact_mappings": ["cdp-common:annotation_source_file_is_portal_standard"], "ifabsent": "False", - } + }, }, ) @@ -3630,7 +3630,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): "domain_of": ["AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile"], "exact_mappings": ["cdp-common:annotation_source_file_scale_factor"], "ifabsent": "float(1)", - } + }, }, ) file_format: str = Field( @@ -3650,7 +3650,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_format"], - } + }, }, ) glob_string: Optional[str] = Field( @@ -3670,7 +3670,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_glob_string"], - } + }, }, ) glob_strings: Optional[List[str]] = Field( @@ -3690,7 +3690,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_glob_strings"], - } + }, }, ) is_visualization_default: Optional[bool] = Field( @@ -3712,7 +3712,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): ], "exact_mappings": ["cdp-common:annotation_source_file_is_visualization_default"], "ifabsent": "False", - } + }, }, ) is_portal_standard: Optional[bool] = Field( @@ -3734,7 +3734,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): ], "exact_mappings": ["cdp-common:annotation_source_file_is_portal_standard"], "ifabsent": "False", - } + }, }, ) @@ -3756,7 +3756,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): "domain_of": ["AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile"], "exact_mappings": ["cdp-common:annotation_source_file_scale_factor"], "ifabsent": "float(1)", - } + }, }, ) name: Optional[str] = Field( @@ -3777,7 +3777,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): "Author", ], "exact_mappings": ["cdp-common:annotation_source_file_mesh_name"], - } + }, }, ) file_format: str = Field( @@ -3797,7 +3797,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_format"], - } + }, }, ) glob_string: Optional[str] = Field( @@ -3817,7 +3817,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_glob_string"], - } + }, }, ) glob_strings: Optional[List[str]] = Field( @@ -3837,7 +3837,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): "AnnotationTriangularMeshGroupFile", ], "exact_mappings": ["cdp-common:annotation_source_file_glob_strings"], - } + }, }, ) is_visualization_default: Optional[bool] = Field( @@ -3859,7 +3859,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): ], "exact_mappings": ["cdp-common:annotation_source_file_is_visualization_default"], "ifabsent": "False", - } + }, }, ) is_portal_standard: Optional[bool] = Field( @@ -3881,7 +3881,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): ], "exact_mappings": ["cdp-common:annotation_source_file_is_portal_standard"], "ifabsent": "False", - } + }, }, ) @@ -3892,7 +3892,7 @@ class Annotation(AuthoredEntity, DateStampedEntity): """ linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta( - {"from_schema": "metadata", "mixins": ["DateStampedEntity", "AuthoredEntity"]} + {"from_schema": "metadata", "mixins": ["DateStampedEntity", "AuthoredEntity"]}, ) annotation_method: str = Field( @@ -3903,7 +3903,7 @@ class Annotation(AuthoredEntity, DateStampedEntity): "alias": "annotation_method", "domain_of": ["Annotation"], "exact_mappings": ["cdp-common:annotation_method"], - } + }, }, ) annotation_object: AnnotationObject = Field( @@ -3919,7 +3919,7 @@ class Annotation(AuthoredEntity, DateStampedEntity): "alias": "annotation_publications", "domain_of": ["Annotation"], "exact_mappings": ["cdp-common:annotation_publications"], - } + }, }, ) annotation_software: Optional[str] = Field( @@ -3931,7 +3931,7 @@ class Annotation(AuthoredEntity, DateStampedEntity): "domain_of": ["Annotation"], "exact_mappings": ["cdp-common:annotation_software"], "recommended": True, - } + }, }, ) confidence: Optional[AnnotationConfidence] = Field( @@ -3943,7 +3943,7 @@ class Annotation(AuthoredEntity, DateStampedEntity): None, description="""File and sourcing data for an annotation. Represents an entry in annotation.sources.""", json_schema_extra={ - "linkml_meta": {"alias": "files", "domain_of": ["Annotation"], "list_elements_ordered": True} + "linkml_meta": {"alias": "files", "domain_of": ["Annotation"], "list_elements_ordered": True}, }, ) ground_truth_status: Optional[bool] = Field( @@ -3956,7 +3956,7 @@ class Annotation(AuthoredEntity, DateStampedEntity): "exact_mappings": ["cdp-common:annotation_ground_truth_status"], "ifabsent": "False", "recommended": True, - } + }, }, ) is_curator_recommended: Optional[bool] = Field( @@ -3968,7 +3968,7 @@ class Annotation(AuthoredEntity, DateStampedEntity): "domain_of": ["Annotation"], "exact_mappings": ["cdp-common:annotation_is_curator_recommended"], "ifabsent": "False", - } + }, }, ) method_type: AnnotationMethodTypeEnum = Field( @@ -3979,7 +3979,7 @@ class Annotation(AuthoredEntity, DateStampedEntity): "alias": "method_type", "domain_of": ["Annotation", "Alignment"], "exact_mappings": ["cdp-common:annotation_method_type"], - } + }, }, ) method_links: Optional[List[AnnotationMethodLinks]] = Field( @@ -3995,7 +3995,7 @@ class Annotation(AuthoredEntity, DateStampedEntity): "alias": "object_count", "domain_of": ["Annotation"], "exact_mappings": ["cdp-common:annotation_object_count"], - } + }, }, ) version: Optional[float] = Field( @@ -4006,7 +4006,7 @@ class Annotation(AuthoredEntity, DateStampedEntity): "alias": "version", "domain_of": ["Annotation"], "exact_mappings": ["cdp-common:annotation_version"], - } + }, }, ) dates: DateStamp = Field( @@ -4016,7 +4016,7 @@ class Annotation(AuthoredEntity, DateStampedEntity): "linkml_meta": { "alias": "dates", "domain_of": ["DateStampedEntity", "Tomogram", "Dataset", "Deposition", "Annotation"], - } + }, }, ) authors: List[Author] = Field( @@ -4028,14 +4028,14 @@ class Annotation(AuthoredEntity, DateStampedEntity): "alias": "authors", "domain_of": ["AuthoredEntity", "Dataset", "Deposition", "Tomogram", "Annotation"], "list_elements_ordered": True, - } + }, }, ) @field_validator("annotation_publications") def pattern_annotation_publications(cls, v): pattern = re.compile( - r"^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|PDB-[0-9a-zA-Z]{4,8}))*$" + r"^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|PDB-[0-9a-zA-Z]{4,8}))*$", ) if isinstance(v, list): for element in v: @@ -4075,7 +4075,7 @@ class AlignmentSize(ConfiguredBaseModel): "any_of": [{"range": "integer"}, {"range": "IntegerFormattedString"}], "domain_of": ["TomogramSize", "TomogramOffset", "AlignmentSize", "AlignmentOffset"], "unit": {"descriptive_name": "pixels", "symbol": "px"}, - } + }, }, ) y: Union[int, str] = Field( @@ -4087,7 +4087,7 @@ class AlignmentSize(ConfiguredBaseModel): "any_of": [{"range": "integer"}, {"range": "IntegerFormattedString"}], "domain_of": ["TomogramSize", "TomogramOffset", "AlignmentSize", "AlignmentOffset"], "unit": {"descriptive_name": "pixels", "symbol": "px"}, - } + }, }, ) z: Union[int, str] = Field( @@ -4099,7 +4099,7 @@ class AlignmentSize(ConfiguredBaseModel): "any_of": [{"range": "integer"}, {"range": "IntegerFormattedString"}], "domain_of": ["TomogramSize", "TomogramOffset", "AlignmentSize", "AlignmentOffset"], "unit": {"descriptive_name": "pixels", "symbol": "px"}, - } + }, }, ) @@ -4157,7 +4157,7 @@ class AlignmentOffset(ConfiguredBaseModel): "domain_of": ["TomogramSize", "TomogramOffset", "AlignmentSize", "AlignmentOffset"], "ifabsent": "int(0)", "unit": {"descriptive_name": "pixels", "symbol": "px"}, - } + }, }, ) y: Union[int, str] = Field( @@ -4170,7 +4170,7 @@ class AlignmentOffset(ConfiguredBaseModel): "domain_of": ["TomogramSize", "TomogramOffset", "AlignmentSize", "AlignmentOffset"], "ifabsent": "int(0)", "unit": {"descriptive_name": "pixels", "symbol": "px"}, - } + }, }, ) z: Union[int, str] = Field( @@ -4183,7 +4183,7 @@ class AlignmentOffset(ConfiguredBaseModel): "domain_of": ["TomogramSize", "TomogramOffset", "AlignmentSize", "AlignmentOffset"], "ifabsent": "int(0)", "unit": {"descriptive_name": "pixels", "symbol": "px"}, - } + }, }, ) @@ -4251,14 +4251,14 @@ class Alignment(ConfiguredBaseModel): "any_of": [{"range": "integer"}, {"range": "IntegerFormattedString"}], "domain_of": ["Alignment"], "ifabsent": "int(0)", - } + }, }, ) tilt_offset: Optional[float] = Field( 0.0, description="""The tilt offset relative to the tomogram.""", json_schema_extra={ - "linkml_meta": {"alias": "tilt_offset", "domain_of": ["Alignment"], "ifabsent": "float(0.0)"} + "linkml_meta": {"alias": "tilt_offset", "domain_of": ["Alignment"], "ifabsent": "float(0.0)"}, }, ) affine_transformation_matrix: Optional[ @@ -4274,7 +4274,7 @@ class Alignment(ConfiguredBaseModel): "exact_number_dimensions": 2, }, "domain_of": ["Tomogram", "Alignment"], - } + }, }, ) is_portal_standard: Optional[bool] = Field( @@ -4295,7 +4295,7 @@ class Alignment(ConfiguredBaseModel): "AnnotationTriangularMeshGroupFile", ], "ifabsent": "False", - } + }, }, ) format: AlignmentFormatEnum = Field( @@ -4366,7 +4366,7 @@ class Frame(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"from_schema": "metadata"}) dose: float = Field( - ..., description="""The dose.""", json_schema_extra={"linkml_meta": {"alias": "dose", "domain_of": ["Frame"]}} + ..., description="""The dose.""", json_schema_extra={"linkml_meta": {"alias": "dose", "domain_of": ["Frame"]}}, ) defocus: float = Field( ..., @@ -4400,7 +4400,7 @@ class DateStampedEntityMixin(ConfiguredBaseModel): "alias": "deposition_date", "domain_of": ["DateStampedEntityMixin", "DateStamp"], "exact_mappings": ["cdp-common:deposition_date"], - } + }, }, ) release_date: date = Field( @@ -4411,7 +4411,7 @@ class DateStampedEntityMixin(ConfiguredBaseModel): "alias": "release_date", "domain_of": ["DateStampedEntityMixin", "DateStamp"], "exact_mappings": ["cdp-common:release_date"], - } + }, }, ) last_modified_date: date = Field( @@ -4422,7 +4422,7 @@ class DateStampedEntityMixin(ConfiguredBaseModel): "alias": "last_modified_date", "domain_of": ["DateStampedEntityMixin", "DateStamp"], "exact_mappings": ["cdp-common:last_modified_date"], - } + }, }, ) @@ -4442,7 +4442,7 @@ class DateStamp(DateStampedEntityMixin): "alias": "deposition_date", "domain_of": ["DateStampedEntityMixin", "DateStamp"], "exact_mappings": ["cdp-common:deposition_date"], - } + }, }, ) release_date: date = Field( @@ -4453,7 +4453,7 @@ class DateStamp(DateStampedEntityMixin): "alias": "release_date", "domain_of": ["DateStampedEntityMixin", "DateStamp"], "exact_mappings": ["cdp-common:release_date"], - } + }, }, ) last_modified_date: date = Field( @@ -4464,7 +4464,7 @@ class DateStamp(DateStampedEntityMixin): "alias": "last_modified_date", "domain_of": ["DateStampedEntityMixin", "DateStamp"], "exact_mappings": ["cdp-common:last_modified_date"], - } + }, }, ) @@ -4484,7 +4484,7 @@ class CrossReferencesMixin(ConfiguredBaseModel): "alias": "publications", "domain_of": ["CrossReferencesMixin", "CrossReferences"], "recommended": True, - } + }, }, ) related_database_entries: Optional[str] = Field( @@ -4495,28 +4495,28 @@ class CrossReferencesMixin(ConfiguredBaseModel): "alias": "related_database_entries", "domain_of": ["CrossReferencesMixin", "CrossReferences"], "recommended": True, - } + }, }, ) related_database_links: Optional[str] = Field( None, description="""Comma-separated list of related database links for the dataset.""", json_schema_extra={ - "linkml_meta": {"alias": "related_database_links", "domain_of": ["CrossReferencesMixin", "CrossReferences"]} + "linkml_meta": {"alias": "related_database_links", "domain_of": ["CrossReferencesMixin", "CrossReferences"]}, }, ) dataset_citations: Optional[str] = Field( None, description="""Comma-separated list of DOIs for publications citing the dataset.""", json_schema_extra={ - "linkml_meta": {"alias": "dataset_citations", "domain_of": ["CrossReferencesMixin", "CrossReferences"]} + "linkml_meta": {"alias": "dataset_citations", "domain_of": ["CrossReferencesMixin", "CrossReferences"]}, }, ) @field_validator("publications") def pattern_publications(cls, v): pattern = re.compile( - r"(^(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+(\s*,\s*(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+)*$)|(^(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+(\s*,\s*(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+)*$)" + r"(^(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+(\s*,\s*(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+)*$)|(^(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+(\s*,\s*(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+)*$)", ) if isinstance(v, list): for element in v: @@ -4530,7 +4530,7 @@ def pattern_publications(cls, v): @field_validator("related_database_entries") def pattern_related_database_entries(cls, v): pattern = re.compile( - r"(^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$)|(^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$)" + r"(^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$)|(^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$)", ) if isinstance(v, list): for element in v: @@ -4557,7 +4557,7 @@ class CrossReferences(CrossReferencesMixin): "alias": "publications", "domain_of": ["CrossReferencesMixin", "CrossReferences"], "recommended": True, - } + }, }, ) related_database_entries: Optional[str] = Field( @@ -4568,28 +4568,28 @@ class CrossReferences(CrossReferencesMixin): "alias": "related_database_entries", "domain_of": ["CrossReferencesMixin", "CrossReferences"], "recommended": True, - } + }, }, ) related_database_links: Optional[str] = Field( None, description="""Comma-separated list of related database links for the dataset.""", json_schema_extra={ - "linkml_meta": {"alias": "related_database_links", "domain_of": ["CrossReferencesMixin", "CrossReferences"]} + "linkml_meta": {"alias": "related_database_links", "domain_of": ["CrossReferencesMixin", "CrossReferences"]}, }, ) dataset_citations: Optional[str] = Field( None, description="""Comma-separated list of DOIs for publications citing the dataset.""", json_schema_extra={ - "linkml_meta": {"alias": "dataset_citations", "domain_of": ["CrossReferencesMixin", "CrossReferences"]} + "linkml_meta": {"alias": "dataset_citations", "domain_of": ["CrossReferencesMixin", "CrossReferences"]}, }, ) @field_validator("publications") def pattern_publications(cls, v): pattern = re.compile( - r"(^(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+(\s*,\s*(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+)*$)|(^(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+(\s*,\s*(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+)*$)" + r"(^(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+(\s*,\s*(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+)*$)|(^(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+(\s*,\s*(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+)*$)", ) if isinstance(v, list): for element in v: @@ -4603,7 +4603,7 @@ def pattern_publications(cls, v): @field_validator("related_database_entries") def pattern_related_database_entries(cls, v): pattern = re.compile( - r"(^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$)|(^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$)" + r"(^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$)|(^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|PDB-[0-9a-zA-Z]{4,8}))*$)", ) if isinstance(v, list): for element in v: @@ -4640,7 +4640,7 @@ class AuthorMixin(ConfiguredBaseModel): "Author", ], "exact_mappings": ["cdp-common:author_name"], - } + }, }, ) email: Optional[str] = Field( @@ -4651,7 +4651,7 @@ class AuthorMixin(ConfiguredBaseModel): "alias": "email", "domain_of": ["AuthorMixin", "Author"], "exact_mappings": ["cdp-common:author_email"], - } + }, }, ) affiliation_name: Optional[str] = Field( @@ -4662,7 +4662,7 @@ class AuthorMixin(ConfiguredBaseModel): "alias": "affiliation_name", "domain_of": ["AuthorMixin", "Author"], "exact_mappings": ["cdp-common:author_affiliation_name"], - } + }, }, ) affiliation_address: Optional[str] = Field( @@ -4673,7 +4673,7 @@ class AuthorMixin(ConfiguredBaseModel): "alias": "affiliation_address", "domain_of": ["AuthorMixin", "Author"], "exact_mappings": ["cdp-common:author_affiliation_address"], - } + }, }, ) affiliation_identifier: Optional[str] = Field( @@ -4685,7 +4685,7 @@ class AuthorMixin(ConfiguredBaseModel): "domain_of": ["AuthorMixin", "Author"], "exact_mappings": ["cdp-common:author_affiliation_identifier"], "recommended": True, - } + }, }, ) corresponding_author_status: Optional[bool] = Field( @@ -4697,7 +4697,7 @@ class AuthorMixin(ConfiguredBaseModel): "domain_of": ["AuthorMixin", "Author"], "exact_mappings": ["cdp-common:author_corresponding_author_status"], "ifabsent": "False", - } + }, }, ) primary_author_status: Optional[bool] = Field( @@ -4709,7 +4709,7 @@ class AuthorMixin(ConfiguredBaseModel): "domain_of": ["AuthorMixin", "Author"], "exact_mappings": ["cdp-common:author_primary_author_status"], "ifabsent": "False", - } + }, }, ) @@ -4730,7 +4730,7 @@ class Author(AuthorMixin): "domain_of": ["Author"], "exact_mappings": ["cdp-common:author_orcid"], "recommended": True, - } + }, }, ) name: str = Field( @@ -4751,7 +4751,7 @@ class Author(AuthorMixin): "Author", ], "exact_mappings": ["cdp-common:author_name"], - } + }, }, ) email: Optional[str] = Field( @@ -4762,7 +4762,7 @@ class Author(AuthorMixin): "alias": "email", "domain_of": ["AuthorMixin", "Author"], "exact_mappings": ["cdp-common:author_email"], - } + }, }, ) affiliation_name: Optional[str] = Field( @@ -4773,7 +4773,7 @@ class Author(AuthorMixin): "alias": "affiliation_name", "domain_of": ["AuthorMixin", "Author"], "exact_mappings": ["cdp-common:author_affiliation_name"], - } + }, }, ) affiliation_address: Optional[str] = Field( @@ -4784,7 +4784,7 @@ class Author(AuthorMixin): "alias": "affiliation_address", "domain_of": ["AuthorMixin", "Author"], "exact_mappings": ["cdp-common:author_affiliation_address"], - } + }, }, ) affiliation_identifier: Optional[str] = Field( @@ -4796,7 +4796,7 @@ class Author(AuthorMixin): "domain_of": ["AuthorMixin", "Author"], "exact_mappings": ["cdp-common:author_affiliation_identifier"], "recommended": True, - } + }, }, ) corresponding_author_status: Optional[bool] = Field( @@ -4808,7 +4808,7 @@ class Author(AuthorMixin): "domain_of": ["AuthorMixin", "Author"], "exact_mappings": ["cdp-common:author_corresponding_author_status"], "ifabsent": "False", - } + }, }, ) primary_author_status: Optional[bool] = Field( @@ -4820,7 +4820,7 @@ class Author(AuthorMixin): "domain_of": ["AuthorMixin", "Author"], "exact_mappings": ["cdp-common:author_primary_author_status"], "ifabsent": "False", - } + }, }, ) diff --git a/schema/core/v2.0.0/codegen/metadata_materialized.yaml b/schema/core/v2.0.0/codegen/metadata_materialized.yaml index 635878f8a..af19c8288 100644 --- a/schema/core/v2.0.0/codegen/metadata_materialized.yaml +++ b/schema/core/v2.0.0/codegen/metadata_materialized.yaml @@ -354,24 +354,54 @@ types: from_schema: metadata base: str pattern: WBStrain[0-9]{8}$ - WORMBASE_TISSUE_ID: - name: WORMBASE_TISSUE_ID - description: A WormBase tissue identifier - from_schema: metadata - base: str - pattern: WBbt:[0-9]{7}$ WORMBASE_DEVELOPMENT_ID: name: WORMBASE_DEVELOPMENT_ID - description: A WormBase identifier + description: A WormBase developmental stage identifier from_schema: metadata base: str pattern: WBls:[0-9]{7}$ HSAPDV_ID: name: HSAPDV_ID - description: A Human Developmental Phenotype Ontology identifier + description: A human developmental phenotype ontology identifier from_schema: metadata base: str pattern: HsapDv:[0-9]{7}$ + MMUSDV_ID: + name: MMUSDV_ID + description: A mouse developmental stage ontology identifier + from_schema: metadata + base: str + pattern: MmusDv:[0-9]{7}$ + ZFS_ID: + name: ZFS_ID + description: A zebrafish developmental stage ontology identifier + from_schema: metadata + base: str + pattern: ZFS:[0-9]{7}$ + FBDV_ID: + name: FBDV_ID + description: A drosophila developmental stage ontology identifier + from_schema: metadata + base: str + pattern: FBdv:[0-9]{8}$ + WBBT_ID: + name: WBBT_ID + description: A WormBase anatomy ontology identifier + from_schema: metadata + base: str + pattern: WBbt:[0-9]{7}$ + ZFA_ID: + name: ZFA_ID + description: A zebrafish anatomy ontology identifier + from_schema: metadata + base: str + pattern: ZFA:[0-9]{7}$ + FBBT_ID: + name: FBBT_ID + description: A drosophila anatomy ontology identifier + from_schema: metadata + base: str + pattern: FBbt:[0-9]{8}$ ONTOLOGY_ID: name: ONTOLOGY_ID description: An ontology identifier @@ -575,6 +605,9 @@ enums: CTFFIND: text: CTFFIND description: The file has ctffind schema + Gctf: + text: Gctf + description: The file has Gctf schema deposition_types_enum: name: deposition_types_enum description: Types of data a deposition has @@ -1310,12 +1343,15 @@ classes: recommended: true inlined: true inlined_as_list: true - pattern: (^unknown$)|(WBls:[0-9]{7}$)|(^UBERON:[0-9]{7}$)|(HsapDv:[0-9]{7}$) + pattern: (^unknown$)|(WBls:[0-9]{7}$)|(^UBERON:[0-9]{7}$)|(HsapDv:[0-9]{7}$)|(MmusDv:[0-9]{7}$)|(ZFS:[0-9]{7}$)|(FBdv:[0-9]{8}$) any_of: - range: UNKNOWN_LITERAL - range: WORMBASE_DEVELOPMENT_ID - range: UBERON_ID - range: HSAPDV_ID + - range: MMUSDV_ID + - range: ZFS_ID + - range: FBDV_ID Disease: name: Disease description: The disease or condition of the patients from which assayed biosamples @@ -1467,10 +1503,12 @@ classes: recommended: true inlined: true inlined_as_list: true - pattern: (^CL:[0-9]{7}$)|(WBbt:[0-9]{7}$)|(^UBERON:[0-9]{7}$) + pattern: (^CL:[0-9]{7}$)|(WBbt:[0-9]{7}$)|(ZFA:[0-9]{7}$)|(FBbt:[0-9]{8}$)|(^UBERON:[0-9]{7}$) any_of: - range: CL_ID - - range: WORMBASE_TISSUE_ID + - range: WBBT_ID + - range: ZFA_ID + - range: FBBT_ID - range: UBERON_ID CellType: name: CellType @@ -1523,9 +1561,12 @@ classes: recommended: true inlined: true inlined_as_list: true - pattern: (^CL:[0-9]{7}$)|(^UBERON:[0-9]{7}$) + pattern: (^CL:[0-9]{7}$)|(WBbt:[0-9]{7}$)|(ZFA:[0-9]{7}$)|(FBbt:[0-9]{8}$)|(^UBERON:[0-9]{7}$) any_of: - range: CL_ID + - range: WBBT_ID + - range: ZFA_ID + - range: FBBT_ID - range: UBERON_ID CellStrain: name: CellStrain @@ -4192,6 +4233,7 @@ classes: owner: AnnotationSemanticSegmentationMaskFile domain_of: - AnnotationSemanticSegmentationMaskFile + - AnnotationInstanceSegmentationMaskFile range: boolean inlined: true inlined_as_list: true @@ -4284,6 +4326,106 @@ classes: required: false inlined: true inlined_as_list: true + AnnotationInstanceSegmentationMaskFile: + name: AnnotationInstanceSegmentationMaskFile + description: File and sourcing data for an instance segmentation annotation mask. + Annotation that identifies individual instances of object shape masks. + from_schema: metadata + aliases: + - InstanceSegmentationMask + is_a: AnnotationSourceFile + attributes: + rescale: + name: rescale + description: Whether the annotation file needs to be rescaled. + from_schema: metadata + exact_mappings: + - cdp-common:annotation_source_file_rescale + ifabsent: 'False' + alias: rescale + owner: AnnotationInstanceSegmentationMaskFile + domain_of: + - AnnotationSemanticSegmentationMaskFile + - AnnotationInstanceSegmentationMaskFile + range: boolean + inlined: true + inlined_as_list: true + file_format: + name: file_format + description: File format for this file + from_schema: metadata + exact_mappings: + - cdp-common:annotation_source_file_format + alias: file_format + owner: AnnotationInstanceSegmentationMaskFile + domain_of: + - AnnotationSourceFile + range: string + required: true + inlined: true + inlined_as_list: true + glob_string: + name: glob_string + description: Glob string to match annotation files in the dataset. Required + if annotation_source_file_glob_strings is not provided. + from_schema: metadata + exact_mappings: + - cdp-common:annotation_source_file_glob_string + alias: glob_string + owner: AnnotationInstanceSegmentationMaskFile + domain_of: + - AnnotationSourceFile + range: string + required: false + inlined: true + inlined_as_list: true + glob_strings: + name: glob_strings + description: Glob strings to match annotation files in the dataset. Required + if annotation_source_file_glob_string is not provided. + from_schema: metadata + exact_mappings: + - cdp-common:annotation_source_file_glob_strings + alias: glob_strings + owner: AnnotationInstanceSegmentationMaskFile + domain_of: + - AnnotationSourceFile + range: string + required: false + multivalued: true + inlined: true + inlined_as_list: true + is_visualization_default: + name: is_visualization_default + description: This annotation will be rendered in neuroglancer by default. + from_schema: metadata + exact_mappings: + - cdp-common:annotation_source_file_is_visualization_default + ifabsent: 'False' + alias: is_visualization_default + owner: AnnotationInstanceSegmentationMaskFile + domain_of: + - Tomogram + - AnnotationSourceFile + range: boolean + inlined: true + inlined_as_list: true + is_portal_standard: + name: is_portal_standard + description: Whether the annotation source is a portal standard. + from_schema: metadata + exact_mappings: + - cdp-common:annotation_source_file_is_portal_standard + ifabsent: 'False' + alias: is_portal_standard + owner: AnnotationInstanceSegmentationMaskFile + domain_of: + - AnnotationSourceFile + - Alignment + range: boolean + required: false + inlined: true + inlined_as_list: true AnnotationTriangularMeshFile: name: AnnotationTriangularMeshFile description: File and sourcing data for a triangular mesh annotation. Annotation @@ -5246,7 +5388,7 @@ classes: required: true inlined: true inlined_as_list: true - pattern: ^CTFFIND$ + pattern: (^CTFFIND$)|(^Gctf$) Any: name: Any description: A placeholder for any type of data. diff --git a/schema/core/v2.0.0/codegen/metadata_models.py b/schema/core/v2.0.0/codegen/metadata_models.py index 2516374fa..bf1ba4515 100644 --- a/schema/core/v2.0.0/codegen/metadata_models.py +++ b/schema/core/v2.0.0/codegen/metadata_models.py @@ -135,6 +135,20 @@ def __contains__(self, key: str) -> bool: "name": "EMPIAR_ID", "pattern": "^EMPIAR-[0-9]+$", }, + "FBBT_ID": { + "base": "str", + "description": "A drosophila anatomy ontology " "identifier", + "from_schema": "metadata", + "name": "FBBT_ID", + "pattern": "FBbt:[0-9]{8}$", + }, + "FBDV_ID": { + "base": "str", + "description": "A drosophila developmental stage " "ontology identifier", + "from_schema": "metadata", + "name": "FBDV_ID", + "pattern": "FBdv:[0-9]{8}$", + }, "FloatFormattedString": { "base": "str", "description": "A formatted string that " "represents a floating " "point number.", @@ -151,7 +165,7 @@ def __contains__(self, key: str) -> bool: }, "HSAPDV_ID": { "base": "str", - "description": "A Human Developmental Phenotype " "Ontology identifier", + "description": "A human developmental phenotype " "ontology identifier", "from_schema": "metadata", "name": "HSAPDV_ID", "pattern": "HsapDv:[0-9]{7}$", @@ -163,6 +177,13 @@ def __contains__(self, key: str) -> bool: "name": "IntegerFormattedString", "pattern": "^int[ " "]*\\{[a-zA-Z0-9_-]+\\}[ ]*$", }, + "MMUSDV_ID": { + "base": "str", + "description": "A mouse developmental stage ontology " "identifier", + "from_schema": "metadata", + "name": "MMUSDV_ID", + "pattern": "MmusDv:[0-9]{7}$", + }, "MONDO_ID": { "base": "str", "description": "An identifier of type MONDO", @@ -247,9 +268,16 @@ def __contains__(self, key: str) -> bool: "minimum_value": 0, "name": "VersionString", }, + "WBBT_ID": { + "base": "str", + "description": "A WormBase anatomy ontology identifier", + "from_schema": "metadata", + "name": "WBBT_ID", + "pattern": "WBbt:[0-9]{7}$", + }, "WORMBASE_DEVELOPMENT_ID": { "base": "str", - "description": "A WormBase identifier", + "description": "A WormBase " "developmental stage " "identifier", "from_schema": "metadata", "name": "WORMBASE_DEVELOPMENT_ID", "pattern": "WBls:[0-9]{7}$", @@ -261,12 +289,19 @@ def __contains__(self, key: str) -> bool: "name": "WORMBASE_STRAIN_ID", "pattern": "WBStrain[0-9]{8}$", }, - "WORMBASE_TISSUE_ID": { + "ZFA_ID": { "base": "str", - "description": "A WormBase tissue identifier", + "description": "A zebrafish anatomy ontology identifier", "from_schema": "metadata", - "name": "WORMBASE_TISSUE_ID", - "pattern": "WBbt:[0-9]{7}$", + "name": "ZFA_ID", + "pattern": "ZFA:[0-9]{7}$", + }, + "ZFS_ID": { + "base": "str", + "description": "A zebrafish developmental stage ontology " "identifier", + "from_schema": "metadata", + "name": "ZFS_ID", + "pattern": "ZFS:[0-9]{7}$", }, "boolean": { "base": "Bool", @@ -683,6 +718,8 @@ class CtfFormatEnum(str, Enum): # The file has ctffind schema CTFFIND = "CTFFIND" + # The file has Gctf schema + Gctf = "Gctf" class DepositionTypesEnum(str, Enum): @@ -1166,6 +1203,9 @@ class DevelopmentStageDetails(ConfiguredBaseModel): {"range": "WORMBASE_DEVELOPMENT_ID"}, {"range": "UBERON_ID"}, {"range": "HSAPDV_ID"}, + {"range": "MMUSDV_ID"}, + {"range": "ZFS_ID"}, + {"range": "FBDV_ID"}, ], "domain_of": [ "Assay", @@ -1185,7 +1225,9 @@ class DevelopmentStageDetails(ConfiguredBaseModel): @field_validator("id") def pattern_id(cls, v): - pattern = re.compile(r"(^unknown$)|(WBls:[0-9]{7}$)|(^UBERON:[0-9]{7}$)|(HsapDv:[0-9]{7}$)") + pattern = re.compile( + r"(^unknown$)|(WBls:[0-9]{7}$)|(^UBERON:[0-9]{7}$)|(HsapDv:[0-9]{7}$)|(MmusDv:[0-9]{7}$)|(ZFS:[0-9]{7}$)|(FBdv:[0-9]{8}$)" + ) if isinstance(v, list): for element in v: if not pattern.match(element): @@ -1347,7 +1389,13 @@ class TissueDetails(ConfiguredBaseModel): json_schema_extra={ "linkml_meta": { "alias": "id", - "any_of": [{"range": "CL_ID"}, {"range": "WORMBASE_TISSUE_ID"}, {"range": "UBERON_ID"}], + "any_of": [ + {"range": "CL_ID"}, + {"range": "WBBT_ID"}, + {"range": "ZFA_ID"}, + {"range": "FBBT_ID"}, + {"range": "UBERON_ID"}, + ], "domain_of": [ "Assay", "DevelopmentStageDetails", @@ -1366,7 +1414,7 @@ class TissueDetails(ConfiguredBaseModel): @field_validator("id") def pattern_id(cls, v): - pattern = re.compile(r"(^CL:[0-9]{7}$)|(WBbt:[0-9]{7}$)|(^UBERON:[0-9]{7}$)") + pattern = re.compile(r"(^CL:[0-9]{7}$)|(WBbt:[0-9]{7}$)|(ZFA:[0-9]{7}$)|(FBbt:[0-9]{8}$)|(^UBERON:[0-9]{7}$)") if isinstance(v, list): for element in v: if not pattern.match(element): @@ -1414,7 +1462,13 @@ class CellType(ConfiguredBaseModel): json_schema_extra={ "linkml_meta": { "alias": "id", - "any_of": [{"range": "CL_ID"}, {"range": "UBERON_ID"}], + "any_of": [ + {"range": "CL_ID"}, + {"range": "WBBT_ID"}, + {"range": "ZFA_ID"}, + {"range": "FBBT_ID"}, + {"range": "UBERON_ID"}, + ], "domain_of": [ "Assay", "DevelopmentStageDetails", @@ -1433,7 +1487,7 @@ class CellType(ConfiguredBaseModel): @field_validator("id") def pattern_id(cls, v): - pattern = re.compile(r"(^CL:[0-9]{7}$)|(^UBERON:[0-9]{7}$)") + pattern = re.compile(r"(^CL:[0-9]{7}$)|(WBbt:[0-9]{7}$)|(ZFA:[0-9]{7}$)|(FBbt:[0-9]{8}$)|(^UBERON:[0-9]{7}$)") if isinstance(v, list): for element in v: if not pattern.match(element): @@ -3084,6 +3138,7 @@ class Tomogram(AuthoredEntity): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -3386,6 +3441,7 @@ class AnnotationSourceFile(ConfiguredBaseModel): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -3406,6 +3462,7 @@ class AnnotationSourceFile(ConfiguredBaseModel): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -3426,6 +3483,7 @@ class AnnotationSourceFile(ConfiguredBaseModel): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -3447,6 +3505,7 @@ class AnnotationSourceFile(ConfiguredBaseModel): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -3469,6 +3528,7 @@ class AnnotationSourceFile(ConfiguredBaseModel): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -3555,6 +3615,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -3575,6 +3636,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -3595,6 +3657,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -3616,6 +3679,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -3638,6 +3702,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -3724,6 +3789,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -3744,6 +3810,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -3764,6 +3831,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -3785,6 +3853,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -3807,6 +3876,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -3894,6 +3964,7 @@ class AnnotationPointFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -3914,6 +3985,7 @@ class AnnotationPointFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -3934,6 +4006,7 @@ class AnnotationPointFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -3955,6 +4028,7 @@ class AnnotationPointFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -3977,6 +4051,7 @@ class AnnotationPointFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -4007,6 +4082,7 @@ class AnnotationSegmentationMaskFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -4027,6 +4103,7 @@ class AnnotationSegmentationMaskFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -4047,6 +4124,7 @@ class AnnotationSegmentationMaskFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -4068,6 +4146,7 @@ class AnnotationSegmentationMaskFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -4090,6 +4169,7 @@ class AnnotationSegmentationMaskFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -4125,7 +4205,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): json_schema_extra={ "linkml_meta": { "alias": "rescale", - "domain_of": ["AnnotationSemanticSegmentationMaskFile"], + "domain_of": ["AnnotationSemanticSegmentationMaskFile", "AnnotationInstanceSegmentationMaskFile"], "exact_mappings": ["cdp-common:annotation_source_file_rescale"], "ifabsent": "False", } @@ -4155,6 +4235,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -4175,6 +4256,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -4195,6 +4277,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -4216,6 +4299,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -4238,6 +4322,137 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", + "AnnotationTriangularMeshFile", + "AnnotationTriangularMeshGroupFile", + ], + "exact_mappings": ["cdp-common:annotation_source_file_is_portal_standard"], + "ifabsent": "False", + } + }, + ) + + +class AnnotationInstanceSegmentationMaskFile(AnnotationSourceFile): + """ + File and sourcing data for an instance segmentation annotation mask. Annotation that identifies individual instances of object shape masks. + """ + + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({"aliases": ["InstanceSegmentationMask"], "from_schema": "metadata"}) + + rescale: Optional[bool] = Field( + False, + description="""Whether the annotation file needs to be rescaled.""", + json_schema_extra={ + "linkml_meta": { + "alias": "rescale", + "domain_of": ["AnnotationSemanticSegmentationMaskFile", "AnnotationInstanceSegmentationMaskFile"], + "exact_mappings": ["cdp-common:annotation_source_file_rescale"], + "ifabsent": "False", + } + }, + ) + file_format: str = Field( + ..., + description="""File format for this file""", + json_schema_extra={ + "linkml_meta": { + "alias": "file_format", + "domain_of": [ + "AnnotationSourceFile", + "AnnotationOrientedPointFile", + "AnnotationInstanceSegmentationFile", + "AnnotationPointFile", + "AnnotationSegmentationMaskFile", + "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", + "AnnotationTriangularMeshFile", + "AnnotationTriangularMeshGroupFile", + ], + "exact_mappings": ["cdp-common:annotation_source_file_format"], + } + }, + ) + glob_string: Optional[str] = Field( + None, + description="""Glob string to match annotation files in the dataset. Required if annotation_source_file_glob_strings is not provided.""", + json_schema_extra={ + "linkml_meta": { + "alias": "glob_string", + "domain_of": [ + "AnnotationSourceFile", + "AnnotationOrientedPointFile", + "AnnotationInstanceSegmentationFile", + "AnnotationPointFile", + "AnnotationSegmentationMaskFile", + "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", + "AnnotationTriangularMeshFile", + "AnnotationTriangularMeshGroupFile", + ], + "exact_mappings": ["cdp-common:annotation_source_file_glob_string"], + } + }, + ) + glob_strings: Optional[List[str]] = Field( + None, + description="""Glob strings to match annotation files in the dataset. Required if annotation_source_file_glob_string is not provided.""", + json_schema_extra={ + "linkml_meta": { + "alias": "glob_strings", + "domain_of": [ + "AnnotationSourceFile", + "AnnotationOrientedPointFile", + "AnnotationInstanceSegmentationFile", + "AnnotationPointFile", + "AnnotationSegmentationMaskFile", + "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", + "AnnotationTriangularMeshFile", + "AnnotationTriangularMeshGroupFile", + ], + "exact_mappings": ["cdp-common:annotation_source_file_glob_strings"], + } + }, + ) + is_visualization_default: Optional[bool] = Field( + False, + description="""This annotation will be rendered in neuroglancer by default.""", + json_schema_extra={ + "linkml_meta": { + "alias": "is_visualization_default", + "domain_of": [ + "Tomogram", + "AnnotationSourceFile", + "AnnotationOrientedPointFile", + "AnnotationInstanceSegmentationFile", + "AnnotationPointFile", + "AnnotationSegmentationMaskFile", + "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", + "AnnotationTriangularMeshFile", + "AnnotationTriangularMeshGroupFile", + ], + "exact_mappings": ["cdp-common:annotation_source_file_is_visualization_default"], + "ifabsent": "False", + } + }, + ) + is_portal_standard: Optional[bool] = Field( + False, + description="""Whether the annotation source is a portal standard.""", + json_schema_extra={ + "linkml_meta": { + "alias": "is_portal_standard", + "domain_of": [ + "AnnotationSourceFile", + "Alignment", + "AnnotationOrientedPointFile", + "AnnotationInstanceSegmentationFile", + "AnnotationPointFile", + "AnnotationSegmentationMaskFile", + "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -4281,6 +4496,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -4301,6 +4517,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -4321,6 +4538,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -4342,6 +4560,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -4364,6 +4583,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -4431,6 +4651,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -4451,6 +4672,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -4471,6 +4693,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -4492,6 +4715,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -4514,6 +4738,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -5119,6 +5344,7 @@ class Alignment(ConfiguredBaseModel): "AnnotationPointFile", "AnnotationSegmentationMaskFile", "AnnotationSemanticSegmentationMaskFile", + "AnnotationInstanceSegmentationMaskFile", "AnnotationTriangularMeshFile", "AnnotationTriangularMeshGroupFile", ], @@ -5238,7 +5464,7 @@ class Ctf(ConfiguredBaseModel): @field_validator("format") def pattern_format(cls, v): - pattern = re.compile(r"^CTFFIND$") + pattern = re.compile(r"(^CTFFIND$)|(^Gctf$)") if isinstance(v, list): for element in v: if not pattern.match(element): @@ -5739,10 +5965,10 @@ def pattern_ORCID(cls, v): FundedEntity.model_rebuild() CrossReferencedEntity.model_rebuild() PicturedEntity.model_rebuild() +PicturedMetadataEntity.model_rebuild() Assay.model_rebuild() DevelopmentStageDetails.model_rebuild() Disease.model_rebuild() -PicturedMetadataEntity.model_rebuild() OrganismDetails.model_rebuild() TissueDetails.model_rebuild() CellType.model_rebuild() @@ -5770,6 +5996,7 @@ def pattern_ORCID(cls, v): AnnotationPointFile.model_rebuild() AnnotationSegmentationMaskFile.model_rebuild() AnnotationSemanticSegmentationMaskFile.model_rebuild() +AnnotationInstanceSegmentationMaskFile.model_rebuild() AnnotationTriangularMeshFile.model_rebuild() AnnotationTriangularMeshGroupFile.model_rebuild() IdentifiedObject.model_rebuild() diff --git a/schema/core/v2.0.0/common.yaml b/schema/core/v2.0.0/common.yaml index b6b026001..0d01f1a0b 100644 --- a/schema/core/v2.0.0/common.yaml +++ b/schema/core/v2.0.0/common.yaml @@ -241,7 +241,9 @@ slots: tissue_id: any_of: - range: CL_ID - - range: WORMBASE_TISSUE_ID + - range: WBBT_ID + - range: ZFA_ID + - range: FBBT_ID - range: UBERON_ID recommended: true description: >- @@ -256,6 +258,9 @@ slots: cell_type_id: any_of: - range: CL_ID + - range: WBBT_ID + - range: ZFA_ID + - range: FBBT_ID - range: UBERON_ID recommended: true description: >- @@ -313,6 +318,9 @@ slots: - range: WORMBASE_DEVELOPMENT_ID - range: UBERON_ID - range: HSAPDV_ID + - range: MMUSDV_ID + - range: ZFS_ID + - range: FBDV_ID recommended: true description: >- The ontology identifier for the developmental stage component. @@ -1324,6 +1332,8 @@ enums: permissible_values: CTFFIND: description: The file has ctffind schema + Gctf: + description: The file has Gctf schema deposition_types_enum: description: Types of data a deposition has @@ -1517,21 +1527,46 @@ types: base: str pattern: 'WBStrain[0-9]{8}$' - WORMBASE_TISSUE_ID: - description: A WormBase tissue identifier - base: str - pattern: 'WBbt:[0-9]{7}$' - WORMBASE_DEVELOPMENT_ID: - description: A WormBase identifier + description: A WormBase developmental stage identifier base: str pattern: 'WBls:[0-9]{7}$' HSAPDV_ID: - description: A Human Developmental Phenotype Ontology identifier + description: A human developmental phenotype ontology identifier base: str pattern: 'HsapDv:[0-9]{7}$' + MMUSDV_ID: + description: A mouse developmental stage ontology identifier + base: str + pattern: 'MmusDv:[0-9]{7}$' + + ZFS_ID: + description: A zebrafish developmental stage ontology identifier + base: str + pattern: 'ZFS:[0-9]{7}$' + + FBDV_ID: + description: A drosophila developmental stage ontology identifier + base: str + pattern: 'FBdv:[0-9]{8}$' + + WBBT_ID: + description: A WormBase anatomy ontology identifier + base: str + pattern: 'WBbt:[0-9]{7}$' + + ZFA_ID: + description: A zebrafish anatomy ontology identifier + base: str + pattern: 'ZFA:[0-9]{7}$' + + FBBT_ID: + description: A drosophila anatomy ontology identifier + base: str + pattern: 'FBbt:[0-9]{8}$' + ONTOLOGY_ID: description: An ontology identifier base: str diff --git a/schema/core/v2.0.0/metadata.yaml b/schema/core/v2.0.0/metadata.yaml index db3396f66..fcfeb9d90 100644 --- a/schema/core/v2.0.0/metadata.yaml +++ b/schema/core/v2.0.0/metadata.yaml @@ -459,7 +459,6 @@ classes: unit: descriptive_name: sections - TiltSeries: description: Metadata describing a tilt series. @@ -820,6 +819,16 @@ classes: exact_mappings: - cdp-common:annotation_source_file_threshold + AnnotationInstanceSegmentationMaskFile: + is_a: AnnotationSourceFile + aliases: + - InstanceSegmentationMask + description: File and sourcing data for an instance segmentation annotation mask. Annotation that identifies individual instances of object shape masks. + attributes: + rescale: + exact_mappings: + - cdp-common:annotation_source_file_rescale + AnnotationTriangularMeshFile: is_a: AnnotationSourceFile aliases: @@ -870,7 +879,6 @@ classes: - cdp-common:identified_object_filter_value # ============================================================================ - # ============================================================================ # annotation_metadata.json # ============================================================================ @@ -941,8 +949,8 @@ classes: symbol: Å y: any_of: - - range: float - - range: FloatFormattedString + - range: float + - range: FloatFormattedString required: true description: Y dimension of the reconstruction volume in angstrom unit: @@ -950,8 +958,8 @@ classes: symbol: Å z: any_of: - - range: float - - range: FloatFormattedString + - range: float + - range: FloatFormattedString required: true description: Z dimension of the reconstruction volume in angstrom unit: @@ -973,8 +981,8 @@ classes: symbol: Å y: any_of: - - range: float - - range: cdp-common:FloatFormattedString + - range: float + - range: cdp-common:FloatFormattedString required: true description: Y shift of the reconstruction volume in angstrom ifabsent: "float(0)" @@ -983,8 +991,8 @@ classes: symbol: Å z: any_of: - - range: float - - range: cdp-common:FloatFormattedString + - range: float + - range: cdp-common:FloatFormattedString required: true description: Z shift of the reconstruction volume in angstrom ifabsent: "float(0)" @@ -1051,7 +1059,7 @@ classes: dimensions: - exact_cardinality: 4 - exact_cardinality: 4 -# ifabsent: "np.eye(4)" # TODO: comming soon from linkml https://linkml.io/linkml/schemas/arrays#representations + # ifabsent: "np.eye(4)" # TODO: comming soon from linkml https://linkml.io/linkml/schemas/arrays#representations is_portal_standard: description: Whether the alignment is standardized for the portal. range: boolean @@ -1076,8 +1084,8 @@ classes: dose_rate: description: The dose exposure for a given frame. any_of: - - range: float - - range: FloatFormattedString + - range: float + - range: FloatFormattedString required: true is_gain_corrected: description: Is the frame gain corrected diff --git a/schema/ingestion_config/v1.0.0/codegen/ingestion_config_models.py b/schema/ingestion_config/v1.0.0/codegen/ingestion_config_models.py index 04c8956eb..bba0a4d58 100644 --- a/schema/ingestion_config/v1.0.0/codegen/ingestion_config_models.py +++ b/schema/ingestion_config/v1.0.0/codegen/ingestion_config_models.py @@ -130,6 +130,18 @@ def __contains__(self, key:str) -> bool: 'from_schema': 'cdp-ingestion-config', 'name': 'EMPIAR_ID', 'pattern': '^EMPIAR-[0-9]+$'}, + 'FBBT_ID': {'base': 'str', + 'description': 'A drosophila anatomy ontology ' + 'identifier', + 'from_schema': 'cdp-ingestion-config', + 'name': 'FBBT_ID', + 'pattern': 'FBbt:[0-9]{8}$'}, + 'FBDV_ID': {'base': 'str', + 'description': 'A drosophila developmental stage ' + 'ontology identifier', + 'from_schema': 'cdp-ingestion-config', + 'name': 'FBDV_ID', + 'pattern': 'FBdv:[0-9]{8}$'}, 'FloatFormattedString': {'base': 'str', 'description': 'A formatted string that ' 'represents a floating ' @@ -144,8 +156,8 @@ def __contains__(self, key:str) -> bool: 'name': 'GO_ID', 'pattern': '^GO:[0-9]{7}$'}, 'HSAPDV_ID': {'base': 'str', - 'description': 'A Human Developmental Phenotype ' - 'Ontology identifier', + 'description': 'A human developmental phenotype ' + 'ontology identifier', 'from_schema': 'cdp-ingestion-config', 'name': 'HSAPDV_ID', 'pattern': 'HsapDv:[0-9]{7}$'}, @@ -156,6 +168,12 @@ def __contains__(self, key:str) -> bool: 'name': 'IntegerFormattedString', 'pattern': '^int[ ' ']*\\{[a-zA-Z0-9_-]+\\}[ ]*$'}, + 'MMUSDV_ID': {'base': 'str', + 'description': 'A mouse developmental stage ontology ' + 'identifier', + 'from_schema': 'cdp-ingestion-config', + 'name': 'MMUSDV_ID', + 'pattern': 'MmusDv:[0-9]{7}$'}, 'MONDO_ID': {'base': 'str', 'description': 'An identifier of type MONDO', 'from_schema': 'cdp-ingestion-config', @@ -222,8 +240,15 @@ def __contains__(self, key:str) -> bool: 'from_schema': 'cdp-ingestion-config', 'minimum_value': 0, 'name': 'VersionString'}, + 'WBBT_ID': {'base': 'str', + 'description': 'A WormBase anatomy ontology identifier', + 'from_schema': 'cdp-ingestion-config', + 'name': 'WBBT_ID', + 'pattern': 'WBbt:[0-9]{7}$'}, 'WORMBASE_DEVELOPMENT_ID': {'base': 'str', - 'description': 'A WormBase identifier', + 'description': 'A WormBase ' + 'developmental stage ' + 'identifier', 'from_schema': 'cdp-ingestion-config', 'name': 'WORMBASE_DEVELOPMENT_ID', 'pattern': 'WBls:[0-9]{7}$'}, @@ -232,11 +257,17 @@ def __contains__(self, key:str) -> bool: 'from_schema': 'cdp-ingestion-config', 'name': 'WORMBASE_STRAIN_ID', 'pattern': 'WBStrain[0-9]{8}$'}, - 'WORMBASE_TISSUE_ID': {'base': 'str', - 'description': 'A WormBase tissue identifier', - 'from_schema': 'cdp-ingestion-config', - 'name': 'WORMBASE_TISSUE_ID', - 'pattern': 'WBbt:[0-9]{7}$'}, + 'ZFA_ID': {'base': 'str', + 'description': 'A zebrafish anatomy ontology identifier', + 'from_schema': 'cdp-ingestion-config', + 'name': 'ZFA_ID', + 'pattern': 'ZFA:[0-9]{7}$'}, + 'ZFS_ID': {'base': 'str', + 'description': 'A zebrafish developmental stage ontology ' + 'identifier', + 'from_schema': 'cdp-ingestion-config', + 'name': 'ZFS_ID', + 'pattern': 'ZFS:[0-9]{7}$'}, 'boolean': {'base': 'Bool', 'description': 'A binary (true or false) value', 'exact_mappings': ['schema:Boolean'], @@ -571,6 +602,8 @@ class CtfFormatEnum(str, Enum): """ # The file has ctffind schema CTFFIND = "CTFFIND" + # The file has Gctf schema + Gctf = "Gctf" class DepositionTypesEnum(str, Enum): @@ -915,7 +948,10 @@ class DevelopmentStageDetails(ConfiguredBaseModel): 'any_of': [{'range': 'UNKNOWN_LITERAL'}, {'range': 'WORMBASE_DEVELOPMENT_ID'}, {'range': 'UBERON_ID'}, - {'range': 'HSAPDV_ID'}], + {'range': 'HSAPDV_ID'}, + {'range': 'MMUSDV_ID'}, + {'range': 'ZFS_ID'}, + {'range': 'FBDV_ID'}], 'domain_of': ['Assay', 'DevelopmentStageDetails', 'Disease', @@ -929,7 +965,7 @@ class DevelopmentStageDetails(ConfiguredBaseModel): @field_validator('id') def pattern_id(cls, v): - pattern=re.compile(r"(^unknown$)|(WBls:[0-9]{7}$)|(^UBERON:[0-9]{7}$)|(HsapDv:[0-9]{7}$)") + pattern=re.compile(r"(^unknown$)|(WBls:[0-9]{7}$)|(^UBERON:[0-9]{7}$)|(HsapDv:[0-9]{7}$)|(MmusDv:[0-9]{7}$)|(ZFS:[0-9]{7}$)|(FBdv:[0-9]{8}$)") if isinstance(v,list): for element in v: if not pattern.match(element): @@ -1035,7 +1071,9 @@ class TissueDetails(ConfiguredBaseModel): 'exact_mappings': ['cdp-common:tissue_name']} }) id: Optional[str] = Field(None, description="""A placeholder for any type of data.""", json_schema_extra = { "linkml_meta": {'alias': 'id', 'any_of': [{'range': 'CL_ID'}, - {'range': 'WORMBASE_TISSUE_ID'}, + {'range': 'WBBT_ID'}, + {'range': 'ZFA_ID'}, + {'range': 'FBBT_ID'}, {'range': 'UBERON_ID'}], 'domain_of': ['Assay', 'DevelopmentStageDetails', @@ -1050,7 +1088,7 @@ class TissueDetails(ConfiguredBaseModel): @field_validator('id') def pattern_id(cls, v): - pattern=re.compile(r"(^CL:[0-9]{7}$)|(WBbt:[0-9]{7}$)|(^UBERON:[0-9]{7}$)") + pattern=re.compile(r"(^CL:[0-9]{7}$)|(WBbt:[0-9]{7}$)|(ZFA:[0-9]{7}$)|(FBbt:[0-9]{8}$)|(^UBERON:[0-9]{7}$)") if isinstance(v,list): for element in v: if not pattern.match(element): @@ -1082,7 +1120,11 @@ class CellType(ConfiguredBaseModel): 'Author'], 'exact_mappings': ['cdp-common:cell_name']} }) id: Optional[str] = Field(None, description="""A placeholder for any type of data.""", json_schema_extra = { "linkml_meta": {'alias': 'id', - 'any_of': [{'range': 'CL_ID'}, {'range': 'UBERON_ID'}], + 'any_of': [{'range': 'CL_ID'}, + {'range': 'WBBT_ID'}, + {'range': 'ZFA_ID'}, + {'range': 'FBBT_ID'}, + {'range': 'UBERON_ID'}], 'domain_of': ['Assay', 'DevelopmentStageDetails', 'Disease', @@ -1096,7 +1138,7 @@ class CellType(ConfiguredBaseModel): @field_validator('id') def pattern_id(cls, v): - pattern=re.compile(r"(^CL:[0-9]{7}$)|(^UBERON:[0-9]{7}$)") + pattern=re.compile(r"(^CL:[0-9]{7}$)|(WBbt:[0-9]{7}$)|(ZFA:[0-9]{7}$)|(FBbt:[0-9]{8}$)|(^UBERON:[0-9]{7}$)") if isinstance(v,list): for element in v: if not pattern.match(element): @@ -1942,6 +1984,7 @@ class Tomogram(AuthoredEntity): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'ifabsent': 'True'} }) @@ -2123,6 +2166,7 @@ class AnnotationSourceFile(ConfiguredBaseModel): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_format']} }) @@ -2133,6 +2177,7 @@ class AnnotationSourceFile(ConfiguredBaseModel): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_string']} }) @@ -2143,6 +2188,7 @@ class AnnotationSourceFile(ConfiguredBaseModel): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_strings']} }) @@ -2154,6 +2200,7 @@ class AnnotationSourceFile(ConfiguredBaseModel): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_is_visualization_default'], @@ -2166,6 +2213,7 @@ class AnnotationSourceFile(ConfiguredBaseModel): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_is_portal_standard'], @@ -2206,6 +2254,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_format']} }) @@ -2216,6 +2265,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_string']} }) @@ -2226,6 +2276,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_strings']} }) @@ -2237,6 +2288,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_is_visualization_default'], @@ -2249,6 +2301,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_is_portal_standard'], @@ -2289,6 +2342,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_format']} }) @@ -2299,6 +2353,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_string']} }) @@ -2309,6 +2364,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_strings']} }) @@ -2320,6 +2376,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_is_visualization_default'], @@ -2332,6 +2389,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_is_portal_standard'], @@ -2371,6 +2429,7 @@ class AnnotationPointFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_format']} }) @@ -2381,6 +2440,7 @@ class AnnotationPointFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_string']} }) @@ -2391,6 +2451,7 @@ class AnnotationPointFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_strings']} }) @@ -2402,6 +2463,7 @@ class AnnotationPointFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_is_visualization_default'], @@ -2414,6 +2476,7 @@ class AnnotationPointFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_is_portal_standard'], @@ -2433,6 +2496,7 @@ class AnnotationSegmentationMaskFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_format']} }) @@ -2443,6 +2507,7 @@ class AnnotationSegmentationMaskFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_string']} }) @@ -2453,6 +2518,7 @@ class AnnotationSegmentationMaskFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_strings']} }) @@ -2464,6 +2530,7 @@ class AnnotationSegmentationMaskFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_is_visualization_default'], @@ -2476,6 +2543,7 @@ class AnnotationSegmentationMaskFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_is_portal_standard'], @@ -2493,7 +2561,8 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): 'exact_mappings': ['cdp-common:annotation_source_file_mask_label'], 'ifabsent': 'int(1)'} }) rescale: Optional[bool] = Field(False, description="""Whether the annotation file needs to be rescaled.""", json_schema_extra = { "linkml_meta": {'alias': 'rescale', - 'domain_of': ['AnnotationSemanticSegmentationMaskFile'], + 'domain_of': ['AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile'], 'exact_mappings': ['cdp-common:annotation_source_file_rescale'], 'ifabsent': 'False'} }) threshold: Optional[float] = Field(None, description="""The threshold for a segmentation mask annotation file.""", json_schema_extra = { "linkml_meta": {'alias': 'threshold', @@ -2506,6 +2575,79 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', + 'AnnotationTriangularMeshFile', + 'AnnotationTriangularMeshGroupFile'], + 'exact_mappings': ['cdp-common:annotation_source_file_format']} }) + glob_string: Optional[str] = Field(None, description="""Glob string to match annotation files in the dataset. Required if annotation_source_file_glob_strings is not provided.""", json_schema_extra = { "linkml_meta": {'alias': 'glob_string', + 'domain_of': ['AnnotationSourceFile', + 'AnnotationOrientedPointFile', + 'AnnotationInstanceSegmentationFile', + 'AnnotationPointFile', + 'AnnotationSegmentationMaskFile', + 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', + 'AnnotationTriangularMeshFile', + 'AnnotationTriangularMeshGroupFile'], + 'exact_mappings': ['cdp-common:annotation_source_file_glob_string']} }) + glob_strings: Optional[List[str]] = Field(None, description="""Glob strings to match annotation files in the dataset. Required if annotation_source_file_glob_string is not provided.""", json_schema_extra = { "linkml_meta": {'alias': 'glob_strings', + 'domain_of': ['AnnotationSourceFile', + 'AnnotationOrientedPointFile', + 'AnnotationInstanceSegmentationFile', + 'AnnotationPointFile', + 'AnnotationSegmentationMaskFile', + 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', + 'AnnotationTriangularMeshFile', + 'AnnotationTriangularMeshGroupFile'], + 'exact_mappings': ['cdp-common:annotation_source_file_glob_strings']} }) + is_visualization_default: Optional[bool] = Field(False, description="""This annotation will be rendered in neuroglancer by default.""", json_schema_extra = { "linkml_meta": {'alias': 'is_visualization_default', + 'domain_of': ['Tomogram', + 'AnnotationSourceFile', + 'AnnotationOrientedPointFile', + 'AnnotationInstanceSegmentationFile', + 'AnnotationPointFile', + 'AnnotationSegmentationMaskFile', + 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', + 'AnnotationTriangularMeshFile', + 'AnnotationTriangularMeshGroupFile'], + 'exact_mappings': ['cdp-common:annotation_source_file_is_visualization_default'], + 'ifabsent': 'False'} }) + is_portal_standard: Optional[bool] = Field(False, description="""Whether the annotation source is a portal standard.""", json_schema_extra = { "linkml_meta": {'alias': 'is_portal_standard', + 'domain_of': ['AnnotationSourceFile', + 'Alignment', + 'AnnotationOrientedPointFile', + 'AnnotationInstanceSegmentationFile', + 'AnnotationPointFile', + 'AnnotationSegmentationMaskFile', + 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', + 'AnnotationTriangularMeshFile', + 'AnnotationTriangularMeshGroupFile'], + 'exact_mappings': ['cdp-common:annotation_source_file_is_portal_standard'], + 'ifabsent': 'False'} }) + + +class AnnotationInstanceSegmentationMaskFile(AnnotationSourceFile): + """ + File and sourcing data for an instance segmentation annotation mask. Annotation that identifies individual instances of object shape masks. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'aliases': ['InstanceSegmentationMask'], 'from_schema': 'metadata'}) + + rescale: Optional[bool] = Field(False, description="""Whether the annotation file needs to be rescaled.""", json_schema_extra = { "linkml_meta": {'alias': 'rescale', + 'domain_of': ['AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile'], + 'exact_mappings': ['cdp-common:annotation_source_file_rescale'], + 'ifabsent': 'False'} }) + file_format: str = Field(..., description="""File format for this file""", json_schema_extra = { "linkml_meta": {'alias': 'file_format', + 'domain_of': ['AnnotationSourceFile', + 'AnnotationOrientedPointFile', + 'AnnotationInstanceSegmentationFile', + 'AnnotationPointFile', + 'AnnotationSegmentationMaskFile', + 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_format']} }) @@ -2516,6 +2658,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_string']} }) @@ -2526,6 +2669,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_strings']} }) @@ -2537,6 +2681,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_is_visualization_default'], @@ -2549,6 +2694,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_is_portal_standard'], @@ -2573,6 +2719,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_format']} }) @@ -2583,6 +2730,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_string']} }) @@ -2593,6 +2741,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_strings']} }) @@ -2604,6 +2753,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_is_visualization_default'], @@ -2616,6 +2766,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_is_portal_standard'], @@ -2654,6 +2805,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_format']} }) @@ -2664,6 +2816,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_string']} }) @@ -2674,6 +2827,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_strings']} }) @@ -2685,6 +2839,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_is_visualization_default'], @@ -2697,6 +2852,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_is_portal_standard'], @@ -3024,6 +3180,7 @@ class Alignment(ConfiguredBaseModel): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'ifabsent': 'False'} }) @@ -3113,7 +3270,7 @@ class Ctf(ConfiguredBaseModel): @field_validator('format') def pattern_format(cls, v): - pattern=re.compile(r"^CTFFIND$") + pattern=re.compile(r"(^CTFFIND$)|(^Gctf$)") if isinstance(v,list): for element in v: if not pattern.match(element): @@ -7902,10 +8059,10 @@ class TomogramHeader(ConfiguredBaseModel): FundedEntity.model_rebuild() CrossReferencedEntity.model_rebuild() PicturedEntity.model_rebuild() +PicturedMetadataEntity.model_rebuild() Assay.model_rebuild() DevelopmentStageDetails.model_rebuild() Disease.model_rebuild() -PicturedMetadataEntity.model_rebuild() OrganismDetails.model_rebuild() TissueDetails.model_rebuild() CellType.model_rebuild() @@ -7933,6 +8090,7 @@ class TomogramHeader(ConfiguredBaseModel): AnnotationPointFile.model_rebuild() AnnotationSegmentationMaskFile.model_rebuild() AnnotationSemanticSegmentationMaskFile.model_rebuild() +AnnotationInstanceSegmentationMaskFile.model_rebuild() AnnotationTriangularMeshFile.model_rebuild() AnnotationTriangularMeshGroupFile.model_rebuild() IdentifiedObject.model_rebuild() diff --git a/schema/ingestion_config/v1.0.0/codegen/ingestion_config_models.schema.json b/schema/ingestion_config/v1.0.0/codegen/ingestion_config_models.schema.json index d6ea60c2c..af4bf5d2c 100644 --- a/schema/ingestion_config/v1.0.0/codegen/ingestion_config_models.schema.json +++ b/schema/ingestion_config/v1.0.0/codegen/ingestion_config_models.schema.json @@ -683,6 +683,59 @@ "title": "AnnotationInstanceSegmentationFile", "type": "object" }, + "AnnotationInstanceSegmentationMaskFile": { + "additionalProperties": false, + "description": "File and sourcing data for an instance segmentation annotation mask. Annotation that identifies individual instances of object shape masks.", + "properties": { + "file_format": { + "description": "File format for this file", + "type": "string" + }, + "glob_string": { + "description": "Glob string to match annotation files in the dataset. Required if annotation_source_file_glob_strings is not provided.", + "type": [ + "string", + "null" + ] + }, + "glob_strings": { + "description": "Glob strings to match annotation files in the dataset. Required if annotation_source_file_glob_string is not provided.", + "items": { + "type": "string" + }, + "type": [ + "array", + "null" + ] + }, + "is_portal_standard": { + "description": "Whether the annotation source is a portal standard.", + "type": [ + "boolean", + "null" + ] + }, + "is_visualization_default": { + "description": "This annotation will be rendered in neuroglancer by default.", + "type": [ + "boolean", + "null" + ] + }, + "rescale": { + "description": "Whether the annotation file needs to be rescaled.", + "type": [ + "boolean", + "null" + ] + } + }, + "required": [ + "file_format" + ], + "title": "AnnotationInstanceSegmentationMaskFile", + "type": "object" + }, "AnnotationMethodLinkTypeEnum": { "description": "Describes the type of link associated to the annotation method.", "enum": [ @@ -1623,6 +1676,18 @@ "pattern": "^CL:[0-9]{7}$", "type": "string" }, + { + "pattern": "WBbt:[0-9]{7}$", + "type": "string" + }, + { + "pattern": "ZFA:[0-9]{7}$", + "type": "string" + }, + { + "pattern": "FBbt:[0-9]{8}$", + "type": "string" + }, { "pattern": "^UBERON:[0-9]{7}$", "type": "string" @@ -1632,7 +1697,7 @@ } ], "description": "A placeholder for any type of data.", - "pattern": "(^CL:[0-9]{7}$)|(^UBERON:[0-9]{7}$)" + "pattern": "(^CL:[0-9]{7}$)|(WBbt:[0-9]{7}$)|(ZFA:[0-9]{7}$)|(FBbt:[0-9]{8}$)|(^UBERON:[0-9]{7}$)" }, "name": { "description": "Name of the cell type from which a biological sample used in a CryoET study is derived from.", @@ -2036,7 +2101,7 @@ "format": { "$ref": "#/$defs/CtfFormatEnum", "description": "The format of the ctf file.", - "pattern": "^CTFFIND$" + "pattern": "(^CTFFIND$)|(^Gctf$)" } }, "required": [ @@ -2078,7 +2143,8 @@ "CtfFormatEnum": { "description": "Used to determine what ctf parser to use.", "enum": [ - "CTFFIND" + "CTFFIND", + "Gctf" ], "title": "CtfFormatEnum", "type": "string" @@ -3136,12 +3202,24 @@ "pattern": "HsapDv:[0-9]{7}$", "type": "string" }, + { + "pattern": "MmusDv:[0-9]{7}$", + "type": "string" + }, + { + "pattern": "ZFS:[0-9]{7}$", + "type": "string" + }, + { + "pattern": "FBdv:[0-9]{8}$", + "type": "string" + }, { "type": "null" } ], "description": "A placeholder for any type of data.", - "pattern": "(^unknown$)|(WBls:[0-9]{7}$)|(^UBERON:[0-9]{7}$)|(HsapDv:[0-9]{7}$)" + "pattern": "(^unknown$)|(WBls:[0-9]{7}$)|(^UBERON:[0-9]{7}$)|(HsapDv:[0-9]{7}$)|(MmusDv:[0-9]{7}$)|(ZFS:[0-9]{7}$)|(FBdv:[0-9]{8}$)" }, "name": { "description": "Name of the developmental stage component.", @@ -5406,6 +5484,14 @@ "pattern": "WBbt:[0-9]{7}$", "type": "string" }, + { + "pattern": "ZFA:[0-9]{7}$", + "type": "string" + }, + { + "pattern": "FBbt:[0-9]{8}$", + "type": "string" + }, { "pattern": "^UBERON:[0-9]{7}$", "type": "string" @@ -5415,7 +5501,7 @@ } ], "description": "A placeholder for any type of data.", - "pattern": "(^CL:[0-9]{7}$)|(WBbt:[0-9]{7}$)|(^UBERON:[0-9]{7}$)" + "pattern": "(^CL:[0-9]{7}$)|(WBbt:[0-9]{7}$)|(ZFA:[0-9]{7}$)|(FBbt:[0-9]{8}$)|(^UBERON:[0-9]{7}$)" }, "name": { "description": "Name of the tissue from which a biological sample used in a CryoET study is derived from.", diff --git a/schema/ingestion_config/v1.0.0/codegen/ingestion_config_models_materialized.yaml b/schema/ingestion_config/v1.0.0/codegen/ingestion_config_models_materialized.yaml index 2f0c1d753..005309d66 100644 --- a/schema/ingestion_config/v1.0.0/codegen/ingestion_config_models_materialized.yaml +++ b/schema/ingestion_config/v1.0.0/codegen/ingestion_config_models_materialized.yaml @@ -338,24 +338,54 @@ types: from_schema: cdp-ingestion-config base: str pattern: WBStrain[0-9]{8}$ - WORMBASE_TISSUE_ID: - name: WORMBASE_TISSUE_ID - description: A WormBase tissue identifier - from_schema: cdp-ingestion-config - base: str - pattern: WBbt:[0-9]{7}$ WORMBASE_DEVELOPMENT_ID: name: WORMBASE_DEVELOPMENT_ID - description: A WormBase identifier + description: A WormBase developmental stage identifier from_schema: cdp-ingestion-config base: str pattern: WBls:[0-9]{7}$ HSAPDV_ID: name: HSAPDV_ID - description: A Human Developmental Phenotype Ontology identifier + description: A human developmental phenotype ontology identifier from_schema: cdp-ingestion-config base: str pattern: HsapDv:[0-9]{7}$ + MMUSDV_ID: + name: MMUSDV_ID + description: A mouse developmental stage ontology identifier + from_schema: cdp-ingestion-config + base: str + pattern: MmusDv:[0-9]{7}$ + ZFS_ID: + name: ZFS_ID + description: A zebrafish developmental stage ontology identifier + from_schema: cdp-ingestion-config + base: str + pattern: ZFS:[0-9]{7}$ + FBDV_ID: + name: FBDV_ID + description: A drosophila developmental stage ontology identifier + from_schema: cdp-ingestion-config + base: str + pattern: FBdv:[0-9]{8}$ + WBBT_ID: + name: WBBT_ID + description: A WormBase anatomy ontology identifier + from_schema: cdp-ingestion-config + base: str + pattern: WBbt:[0-9]{7}$ + ZFA_ID: + name: ZFA_ID + description: A zebrafish anatomy ontology identifier + from_schema: cdp-ingestion-config + base: str + pattern: ZFA:[0-9]{7}$ + FBBT_ID: + name: FBBT_ID + description: A drosophila anatomy ontology identifier + from_schema: cdp-ingestion-config + base: str + pattern: FBbt:[0-9]{8}$ ONTOLOGY_ID: name: ONTOLOGY_ID description: An ontology identifier @@ -559,6 +589,9 @@ enums: CTFFIND: text: CTFFIND description: The file has ctffind schema + Gctf: + text: Gctf + description: The file has Gctf schema deposition_types_enum: name: deposition_types_enum description: Types of data a deposition has diff --git a/schema/ingestion_config/v1.0.0/v1.0.0 b/schema/ingestion_config/v1.0.0/v1.0.0 new file mode 120000 index 000000000..60453e690 --- /dev/null +++ b/schema/ingestion_config/v1.0.0/v1.0.0 @@ -0,0 +1 @@ +v1.0.0 \ No newline at end of file diff --git a/schema/ingestion_config/v1.0.0/v2.0.0 b/schema/ingestion_config/v1.0.0/v2.0.0 new file mode 120000 index 000000000..6eaf89433 --- /dev/null +++ b/schema/ingestion_config/v1.0.0/v2.0.0 @@ -0,0 +1 @@ +v2.0.0 \ No newline at end of file diff --git a/schema/metadata_files/v2.0.0/codegen/metadata_files.py b/schema/metadata_files/v2.0.0/codegen/metadata_files.py index b6e7cdb55..b949d2842 100644 --- a/schema/metadata_files/v2.0.0/codegen/metadata_files.py +++ b/schema/metadata_files/v2.0.0/codegen/metadata_files.py @@ -84,11 +84,21 @@ def __contains__(self, key:str) -> bool: 'from_schema': 'metadata', 'name': 'BTO_ID', 'pattern': '^BTO:[0-9]{7}$'}, + 'CC_ID': {'base': 'str', + 'description': 'A Cell Culture Collection identifier', + 'from_schema': 'metadata', + 'name': 'CC_ID', + 'pattern': '^CC-[0-9]{4}$'}, 'CL_ID': {'base': 'str', 'description': 'A Cell Ontology identifier', 'from_schema': 'metadata', 'name': 'CL_ID', 'pattern': '^CL:[0-9]{7}$'}, + 'CVCL_ID': {'base': 'str', + 'description': 'A Cellosaurus identifier', + 'from_schema': 'metadata', + 'name': 'CVCL_ID', + 'pattern': '^CVCL_[A-Z0-9]{4,}$'}, 'DOI': {'base': 'str', 'description': 'A Digital Object Identifier', 'from_schema': 'metadata', @@ -99,6 +109,12 @@ def __contains__(self, key:str) -> bool: 'from_schema': 'metadata', 'name': 'DOI_LIST', 'pattern': '^(doi:)?10\\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+(\\s*,\\s*(doi:)?10\\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+)*$'}, + 'EFO_ID': {'base': 'str', + 'description': 'An Experimental Factor Ontology ' + 'identifier', + 'from_schema': 'metadata', + 'name': 'EFO_ID', + 'pattern': '^EFO:[0-9]{7}$'}, 'EMDB_ID': {'base': 'str', 'description': 'An Electron Microscopy Data Bank ' 'identifier', @@ -124,6 +140,18 @@ def __contains__(self, key:str) -> bool: 'from_schema': 'metadata', 'name': 'EMPIAR_ID', 'pattern': '^EMPIAR-[0-9]+$'}, + 'FBBT_ID': {'base': 'str', + 'description': 'A drosophila anatomy ontology ' + 'identifier', + 'from_schema': 'metadata', + 'name': 'FBBT_ID', + 'pattern': 'FBbt:[0-9]{8}$'}, + 'FBDV_ID': {'base': 'str', + 'description': 'A drosophila developmental stage ' + 'ontology identifier', + 'from_schema': 'metadata', + 'name': 'FBDV_ID', + 'pattern': 'FBdv:[0-9]{8}$'}, 'FloatFormattedString': {'base': 'str', 'description': 'A formatted string that ' 'represents a floating ' @@ -137,6 +165,12 @@ def __contains__(self, key:str) -> bool: 'from_schema': 'metadata', 'name': 'GO_ID', 'pattern': '^GO:[0-9]{7}$'}, + 'HSAPDV_ID': {'base': 'str', + 'description': 'A human developmental phenotype ' + 'ontology identifier', + 'from_schema': 'metadata', + 'name': 'HSAPDV_ID', + 'pattern': 'HsapDv:[0-9]{7}$'}, 'IntegerFormattedString': {'base': 'str', 'description': 'A formatted string that ' 'represents an integer.', @@ -144,6 +178,22 @@ def __contains__(self, key:str) -> bool: 'name': 'IntegerFormattedString', 'pattern': '^int[ ' ']*\\{[a-zA-Z0-9_-]+\\}[ ]*$'}, + 'MMUSDV_ID': {'base': 'str', + 'description': 'A mouse developmental stage ontology ' + 'identifier', + 'from_schema': 'metadata', + 'name': 'MMUSDV_ID', + 'pattern': 'MmusDv:[0-9]{7}$'}, + 'MONDO_ID': {'base': 'str', + 'description': 'An identifier of type MONDO', + 'from_schema': 'metadata', + 'name': 'MONDO_ID', + 'pattern': '^MONDO:[0-9]{7}$'}, + 'NCBI_TAXON_ID': {'base': 'str', + 'description': 'A NCBI Taxonomy identifier', + 'from_schema': 'metadata', + 'name': 'NCBI_TAXON_ID', + 'pattern': '^NCBITaxon:[0-9]+$'}, 'ONTOLOGY_ID': {'base': 'str', 'description': 'An ontology identifier', 'from_schema': 'metadata', @@ -155,6 +205,11 @@ def __contains__(self, key:str) -> bool: 'from_schema': 'metadata', 'name': 'ORCID', 'pattern': '[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$'}, + 'PATO_ID': {'base': 'str', + 'description': 'An identifier of type PATO', + 'from_schema': 'metadata', + 'name': 'PATO_ID', + 'pattern': '^PATO:[0-9]{7}$'}, 'PDB_ID': {'base': 'str', 'description': 'A Protein Data Bank identifier', 'from_schema': 'metadata', @@ -168,11 +223,22 @@ def __contains__(self, key:str) -> bool: 'name': 'StringFormattedString', 'pattern': '^[ ]*\\{[a-zA-Z0-9_-]+\\}[ ' ']*$'}, + 'UBERON_ID': {'base': 'str', + 'description': 'An UBERON identifier', + 'from_schema': 'metadata', + 'name': 'UBERON_ID', + 'pattern': '^UBERON:[0-9]{7}$'}, 'UNIPROT_ID': {'base': 'str', 'description': 'A UniProt identifier', 'from_schema': 'metadata', 'name': 'UNIPROT_ID', 'pattern': '^UniProtKB:[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}$'}, + 'UNKNOWN_LITERAL': {'base': 'str', + 'description': 'A placeholder for an unknown ' + 'value.', + 'from_schema': 'metadata', + 'name': 'UNKNOWN_LITERAL', + 'pattern': '^unknown$'}, 'URLorS3URI': {'base': 'str', 'description': 'A URL or S3 URI', 'from_schema': 'metadata', @@ -184,11 +250,34 @@ def __contains__(self, key:str) -> bool: 'from_schema': 'metadata', 'minimum_value': 0, 'name': 'VersionString'}, - 'WORMBASE_ID': {'base': 'str', - 'description': 'A WormBase identifier', - 'from_schema': 'metadata', - 'name': 'WORMBASE_ID', - 'pattern': 'WBStrain[0-9]{8}$'}, + 'WBBT_ID': {'base': 'str', + 'description': 'A WormBase anatomy ontology identifier', + 'from_schema': 'metadata', + 'name': 'WBBT_ID', + 'pattern': 'WBbt:[0-9]{7}$'}, + 'WORMBASE_DEVELOPMENT_ID': {'base': 'str', + 'description': 'A WormBase ' + 'developmental stage ' + 'identifier', + 'from_schema': 'metadata', + 'name': 'WORMBASE_DEVELOPMENT_ID', + 'pattern': 'WBls:[0-9]{7}$'}, + 'WORMBASE_STRAIN_ID': {'base': 'str', + 'description': 'A WormBase strain identifier', + 'from_schema': 'metadata', + 'name': 'WORMBASE_STRAIN_ID', + 'pattern': 'WBStrain[0-9]{8}$'}, + 'ZFA_ID': {'base': 'str', + 'description': 'A zebrafish anatomy ontology identifier', + 'from_schema': 'metadata', + 'name': 'ZFA_ID', + 'pattern': 'ZFA:[0-9]{7}$'}, + 'ZFS_ID': {'base': 'str', + 'description': 'A zebrafish developmental stage ontology ' + 'identifier', + 'from_schema': 'metadata', + 'name': 'ZFS_ID', + 'pattern': 'ZFS:[0-9]{7}$'}, 'boolean': {'base': 'Bool', 'description': 'A binary (true or false) value', 'exact_mappings': ['schema:Boolean'], @@ -523,6 +612,8 @@ class CtfFormatEnum(str, Enum): """ # The file has ctffind schema CTFFIND = "CTFFIND" + # The file has Gctf schema + Gctf = "Gctf" class DepositionTypesEnum(str, Enum): @@ -541,22 +632,26 @@ class SampleTypeEnum(str, Enum): """ Type of sample imaged in a CryoET study. """ - # Tomographic data of whole cells or cell sections. - cell = "cell" - # Tomographic data of tissue sections. - tissue = "tissue" - # Tomographic data of sections through multicellular organisms. - organism = "organism" - # Tomographic data of purified organelles. - organelle = "organelle" - # Tomographic data of purified viruses or VLPs. - virus = "virus" - # Tomographic data of in vitro reconstituted systems or mixtures of proteins. - in_vitro = "in_vitro" + # Tomographic data of immortalized cells or immortalized cell sections + cell_line = "cell_line" # Simulated tomographic data. in_silico = "in_silico" + # Tomographic data of in vitro reconstituted systems or mixtures of proteins. + in_vitro = "in_vitro" + # Tomographic data of purified organelles. + organelle = "organelle" + # Tomographic data of sections through multicellular organisms. + organism = "organism" + # Tomographic data of organoid-derived samples. + organoid = "organoid" # Other type of sample. other = "other" + # Tomographic data of whole primary cells or primary cell sections. + primary_cell_culture = "primary_cell_culture" + # Tomographic data of tissue sections. + tissue = "tissue" + # Tomographic data of purified viruses or VLPs. + virus = "virus" class TiltseriesCameraAcquireModeEnum(str, Enum): @@ -813,6 +908,151 @@ class PicturedMetadataEntity(ConfiguredBaseModel): 'DepositionMetadata']} }) +class Assay(ConfiguredBaseModel): + """ + The assay that was used to create the dataset. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'metadata'}) + + name: str = Field(..., description="""Name of the assay component.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'domain_of': ['Assay', + 'DevelopmentStageDetails', + 'Disease', + 'OrganismDetails', + 'TissueDetails', + 'CellType', + 'CellStrain', + 'CellComponent', + 'AnnotationObject', + 'AnnotationTriangularMeshGroupFile', + 'AuthorMixin', + 'Author'], + 'exact_mappings': ['cdp-common:assay_name']} }) + id: Optional[str] = Field(None, description="""The EFO identifier for the cellular component.""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'domain_of': ['Assay', + 'DevelopmentStageDetails', + 'Disease', + 'TissueDetails', + 'CellType', + 'CellStrain', + 'CellComponent', + 'AnnotationObject'], + 'exact_mappings': ['cdp-common:assay_id'], + 'recommended': True} }) + + @field_validator('id') + def pattern_id(cls, v): + pattern=re.compile(r"^EFO:[0-9]{7}$") + if isinstance(v,list): + for element in v: + if not pattern.match(element): + raise ValueError(f"Invalid id format: {element}") + elif isinstance(v,str): + if not pattern.match(v): + raise ValueError(f"Invalid id format: {v}") + return v + + +class DevelopmentStageDetails(ConfiguredBaseModel): + """ + The development stage of the patients or organisms from which assayed biosamples were derived. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'metadata'}) + + name: Optional[str] = Field(None, description="""Name of the developmental stage component.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'domain_of': ['Assay', + 'DevelopmentStageDetails', + 'Disease', + 'OrganismDetails', + 'TissueDetails', + 'CellType', + 'CellStrain', + 'CellComponent', + 'AnnotationObject', + 'AnnotationTriangularMeshGroupFile', + 'AuthorMixin', + 'Author'], + 'exact_mappings': ['cdp-common:development_stage_name'], + 'recommended': True} }) + id: Optional[str] = Field(None, description="""A placeholder for any type of data.""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'any_of': [{'range': 'UNKNOWN_LITERAL'}, + {'range': 'WORMBASE_DEVELOPMENT_ID'}, + {'range': 'UBERON_ID'}, + {'range': 'HSAPDV_ID'}, + {'range': 'MMUSDV_ID'}, + {'range': 'ZFS_ID'}, + {'range': 'FBDV_ID'}], + 'domain_of': ['Assay', + 'DevelopmentStageDetails', + 'Disease', + 'TissueDetails', + 'CellType', + 'CellStrain', + 'CellComponent', + 'AnnotationObject'], + 'exact_mappings': ['cdp-common:development_stage_id'], + 'recommended': True} }) + + @field_validator('id') + def pattern_id(cls, v): + pattern=re.compile(r"(^unknown$)|(WBls:[0-9]{7}$)|(^UBERON:[0-9]{7}$)|(HsapDv:[0-9]{7}$)|(MmusDv:[0-9]{7}$)|(ZFS:[0-9]{7}$)|(FBdv:[0-9]{8}$)") + if isinstance(v,list): + for element in v: + if not pattern.match(element): + raise ValueError(f"Invalid id format: {element}") + elif isinstance(v,str): + if not pattern.match(v): + raise ValueError(f"Invalid id format: {v}") + return v + + +class Disease(ConfiguredBaseModel): + """ + The disease or condition of the patients from which assayed biosamples were derived. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'metadata'}) + + name: Optional[str] = Field(None, description="""Name of the disease.""", json_schema_extra = { "linkml_meta": {'alias': 'name', + 'domain_of': ['Assay', + 'DevelopmentStageDetails', + 'Disease', + 'OrganismDetails', + 'TissueDetails', + 'CellType', + 'CellStrain', + 'CellComponent', + 'AnnotationObject', + 'AnnotationTriangularMeshGroupFile', + 'AuthorMixin', + 'Author'], + 'exact_mappings': ['cdp-common:disease_name'], + 'recommended': True} }) + id: Optional[str] = Field(None, description="""A placeholder for any type of data.""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'any_of': [{'range': 'MONDO_ID'}, {'range': 'PATO_ID'}], + 'domain_of': ['Assay', + 'DevelopmentStageDetails', + 'Disease', + 'TissueDetails', + 'CellType', + 'CellStrain', + 'CellComponent', + 'AnnotationObject'], + 'exact_mappings': ['cdp-common:disease_id'], + 'recommended': True} }) + + @field_validator('id') + def pattern_id(cls, v): + pattern=re.compile(r"(^MONDO:[0-9]{7}$)|(^PATO:[0-9]{7}$)") + if isinstance(v,list): + for element in v: + if not pattern.match(element): + raise ValueError(f"Invalid id format: {element}") + elif isinstance(v,str): + if not pattern.match(v): + raise ValueError(f"Invalid id format: {v}") + return v + + class OrganismDetails(ConfiguredBaseModel): """ The species from which the sample was derived. @@ -820,7 +1060,10 @@ class OrganismDetails(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'metadata'}) name: str = Field(..., description="""Name of the organism from which a biological sample used in a CryoET study is derived from, e.g. homo sapiens.""", json_schema_extra = { "linkml_meta": {'alias': 'name', - 'domain_of': ['OrganismDetails', + 'domain_of': ['Assay', + 'DevelopmentStageDetails', + 'Disease', + 'OrganismDetails', 'TissueDetails', 'CellType', 'CellStrain', @@ -843,7 +1086,10 @@ class TissueDetails(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'metadata'}) name: str = Field(..., description="""Name of the tissue from which a biological sample used in a CryoET study is derived from.""", json_schema_extra = { "linkml_meta": {'alias': 'name', - 'domain_of': ['OrganismDetails', + 'domain_of': ['Assay', + 'DevelopmentStageDetails', + 'Disease', + 'OrganismDetails', 'TissueDetails', 'CellType', 'CellStrain', @@ -853,8 +1099,16 @@ class TissueDetails(ConfiguredBaseModel): 'AuthorMixin', 'Author'], 'exact_mappings': ['cdp-common:tissue_name']} }) - id: Optional[str] = Field(None, description="""The UBERON identifier for the tissue.""", json_schema_extra = { "linkml_meta": {'alias': 'id', - 'domain_of': ['TissueDetails', + id: Optional[str] = Field(None, description="""A placeholder for any type of data.""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'any_of': [{'range': 'CL_ID'}, + {'range': 'WBBT_ID'}, + {'range': 'ZFA_ID'}, + {'range': 'FBBT_ID'}, + {'range': 'UBERON_ID'}], + 'domain_of': ['Assay', + 'DevelopmentStageDetails', + 'Disease', + 'TissueDetails', 'CellType', 'CellStrain', 'CellComponent', @@ -864,7 +1118,7 @@ class TissueDetails(ConfiguredBaseModel): @field_validator('id') def pattern_id(cls, v): - pattern=re.compile(r"^BTO:[0-9]{7}$") + pattern=re.compile(r"(^CL:[0-9]{7}$)|(WBbt:[0-9]{7}$)|(ZFA:[0-9]{7}$)|(FBbt:[0-9]{8}$)|(^UBERON:[0-9]{7}$)") if isinstance(v,list): for element in v: if not pattern.match(element): @@ -882,7 +1136,10 @@ class CellType(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'metadata'}) name: str = Field(..., description="""Name of the cell type from which a biological sample used in a CryoET study is derived from.""", json_schema_extra = { "linkml_meta": {'alias': 'name', - 'domain_of': ['OrganismDetails', + 'domain_of': ['Assay', + 'DevelopmentStageDetails', + 'Disease', + 'OrganismDetails', 'TissueDetails', 'CellType', 'CellStrain', @@ -892,8 +1149,16 @@ class CellType(ConfiguredBaseModel): 'AuthorMixin', 'Author'], 'exact_mappings': ['cdp-common:cell_name']} }) - id: Optional[str] = Field(None, description="""Cell Ontology identifier for the cell type""", json_schema_extra = { "linkml_meta": {'alias': 'id', - 'domain_of': ['TissueDetails', + id: Optional[str] = Field(None, description="""A placeholder for any type of data.""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'any_of': [{'range': 'CL_ID'}, + {'range': 'WBBT_ID'}, + {'range': 'ZFA_ID'}, + {'range': 'FBBT_ID'}, + {'range': 'UBERON_ID'}], + 'domain_of': ['Assay', + 'DevelopmentStageDetails', + 'Disease', + 'TissueDetails', 'CellType', 'CellStrain', 'CellComponent', @@ -903,7 +1168,7 @@ class CellType(ConfiguredBaseModel): @field_validator('id') def pattern_id(cls, v): - pattern=re.compile(r"^CL:[0-9]{7}$") + pattern=re.compile(r"(^CL:[0-9]{7}$)|(WBbt:[0-9]{7}$)|(ZFA:[0-9]{7}$)|(FBbt:[0-9]{8}$)|(^UBERON:[0-9]{7}$)") if isinstance(v,list): for element in v: if not pattern.match(element): @@ -921,7 +1186,10 @@ class CellStrain(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'metadata'}) name: str = Field(..., description="""Cell line or strain for the sample.""", json_schema_extra = { "linkml_meta": {'alias': 'name', - 'domain_of': ['OrganismDetails', + 'domain_of': ['Assay', + 'DevelopmentStageDetails', + 'Disease', + 'OrganismDetails', 'TissueDetails', 'CellType', 'CellStrain', @@ -932,8 +1200,14 @@ class CellStrain(ConfiguredBaseModel): 'Author'], 'exact_mappings': ['cdp-common:cell_strain_name']} }) id: Optional[str] = Field(None, description="""A placeholder for any type of data.""", json_schema_extra = { "linkml_meta": {'alias': 'id', - 'any_of': [{'range': 'WORMBASE_ID'}, {'range': 'ONTOLOGY_ID'}], - 'domain_of': ['TissueDetails', + 'any_of': [{'range': 'WORMBASE_STRAIN_ID'}, + {'range': 'NCBI_TAXON_ID'}, + {'range': 'CVCL_ID'}, + {'range': 'CC_ID'}], + 'domain_of': ['Assay', + 'DevelopmentStageDetails', + 'Disease', + 'TissueDetails', 'CellType', 'CellStrain', 'CellComponent', @@ -943,7 +1217,7 @@ class CellStrain(ConfiguredBaseModel): @field_validator('id') def pattern_id(cls, v): - pattern=re.compile(r"(WBStrain[0-9]{8}$)|(^[a-zA-Z]+:[0-9]+$)") + pattern=re.compile(r"(WBStrain[0-9]{8}$)|(^NCBITaxon:[0-9]+$)|(^CVCL_[A-Z0-9]{4,}$)|(^CC-[0-9]{4}$)") if isinstance(v,list): for element in v: if not pattern.match(element): @@ -961,7 +1235,10 @@ class CellComponent(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'metadata'}) name: str = Field(..., description="""Name of the cellular component.""", json_schema_extra = { "linkml_meta": {'alias': 'name', - 'domain_of': ['OrganismDetails', + 'domain_of': ['Assay', + 'DevelopmentStageDetails', + 'Disease', + 'OrganismDetails', 'TissueDetails', 'CellType', 'CellStrain', @@ -972,7 +1249,10 @@ class CellComponent(ConfiguredBaseModel): 'Author'], 'exact_mappings': ['cdp-common:cell_component_name']} }) id: Optional[str] = Field(None, description="""The GO identifier for the cellular component.""", json_schema_extra = { "linkml_meta": {'alias': 'id', - 'domain_of': ['TissueDetails', + 'domain_of': ['Assay', + 'DevelopmentStageDetails', + 'Disease', + 'TissueDetails', 'CellType', 'CellStrain', 'CellComponent', @@ -1024,10 +1304,16 @@ class ExperimentMetadata(ConfiguredBaseModel): 'domain_of': ['ExperimentMetadata', 'Dataset', 'DatasetMetadata']} }) cell_component: Optional[CellComponent] = Field(None, description="""The cellular component from which the sample was derived.""", json_schema_extra = { "linkml_meta": {'alias': 'cell_component', 'domain_of': ['ExperimentMetadata', 'Dataset', 'DatasetMetadata']} }) + assay: Assay = Field(..., description="""The assay that was used to create the dataset.""", json_schema_extra = { "linkml_meta": {'alias': 'assay', + 'domain_of': ['ExperimentMetadata', 'Dataset', 'DatasetMetadata']} }) + development_stage: DevelopmentStageDetails = Field(..., description="""The development stage of the patients or organisms from which assayed biosamples were derived.""", json_schema_extra = { "linkml_meta": {'alias': 'development_stage', + 'domain_of': ['ExperimentMetadata', 'Dataset', 'DatasetMetadata']} }) + disease: Disease = Field(..., description="""The disease or condition of the patients from which assayed biosamples were derived.""", json_schema_extra = { "linkml_meta": {'alias': 'disease', + 'domain_of': ['ExperimentMetadata', 'Dataset', 'DatasetMetadata']} }) @field_validator('sample_type') def pattern_sample_type(cls, v): - pattern=re.compile(r"(^cell$)|(^tissue$)|(^organism$)|(^organelle$)|(^virus$)|(^in_vitro$)|(^in_silico$)|(^other$)") + pattern=re.compile(r"(^cell_line$)|(^in_silico$)|(^in_vitro$)|(^organelle$)|(^organism$)|(^organoid$)|(^other$)|(^primary_cell_culture$)|(^tissue$)|(^virus$)") if isinstance(v,list): for element in v: if not pattern.match(element): @@ -1116,10 +1402,16 @@ class Dataset(ExperimentMetadata, CrossReferencedEntity, FundedEntity, AuthoredE 'domain_of': ['ExperimentMetadata', 'Dataset', 'DatasetMetadata']} }) cell_component: Optional[CellComponent] = Field(None, description="""The cellular component from which the sample was derived.""", json_schema_extra = { "linkml_meta": {'alias': 'cell_component', 'domain_of': ['ExperimentMetadata', 'Dataset', 'DatasetMetadata']} }) + assay: Assay = Field(..., description="""The assay that was used to create the dataset.""", json_schema_extra = { "linkml_meta": {'alias': 'assay', + 'domain_of': ['ExperimentMetadata', 'Dataset', 'DatasetMetadata']} }) + development_stage: DevelopmentStageDetails = Field(..., description="""The development stage of the patients or organisms from which assayed biosamples were derived.""", json_schema_extra = { "linkml_meta": {'alias': 'development_stage', + 'domain_of': ['ExperimentMetadata', 'Dataset', 'DatasetMetadata']} }) + disease: Disease = Field(..., description="""The disease or condition of the patients from which assayed biosamples were derived.""", json_schema_extra = { "linkml_meta": {'alias': 'disease', + 'domain_of': ['ExperimentMetadata', 'Dataset', 'DatasetMetadata']} }) @field_validator('sample_type') def pattern_sample_type(cls, v): - pattern=re.compile(r"(^cell$)|(^tissue$)|(^organism$)|(^organelle$)|(^virus$)|(^in_vitro$)|(^in_silico$)|(^other$)") + pattern=re.compile(r"(^cell_line$)|(^in_silico$)|(^in_vitro$)|(^organelle$)|(^organism$)|(^organoid$)|(^other$)|(^primary_cell_culture$)|(^tissue$)|(^virus$)") if isinstance(v,list): for element in v: if not pattern.match(element): @@ -1768,6 +2060,7 @@ class Tomogram(AuthoredEntity): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AnnotationFileMetadata', @@ -1879,14 +2172,20 @@ class AnnotationObject(ConfiguredBaseModel): id: str = Field(..., description="""A placeholder for any type of data.""", json_schema_extra = { "linkml_meta": {'alias': 'id', 'any_of': [{'range': 'GO_ID'}, {'range': 'UNIPROT_ID'}], - 'domain_of': ['TissueDetails', + 'domain_of': ['Assay', + 'DevelopmentStageDetails', + 'Disease', + 'TissueDetails', 'CellType', 'CellStrain', 'CellComponent', 'AnnotationObject'], 'exact_mappings': ['cdp-common:annotation_object_id']} }) name: str = Field(..., description="""Name of the object being annotated (e.g. ribosome, nuclear pore complex, actin filament, membrane)""", json_schema_extra = { "linkml_meta": {'alias': 'name', - 'domain_of': ['OrganismDetails', + 'domain_of': ['Assay', + 'DevelopmentStageDetails', + 'Disease', + 'OrganismDetails', 'TissueDetails', 'CellType', 'CellStrain', @@ -1959,6 +2258,7 @@ class AnnotationSourceFile(ConfiguredBaseModel): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_format']} }) @@ -1969,6 +2269,7 @@ class AnnotationSourceFile(ConfiguredBaseModel): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_string']} }) @@ -1979,6 +2280,7 @@ class AnnotationSourceFile(ConfiguredBaseModel): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_strings']} }) @@ -1990,6 +2292,7 @@ class AnnotationSourceFile(ConfiguredBaseModel): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AnnotationFileMetadata', @@ -2004,6 +2307,7 @@ class AnnotationSourceFile(ConfiguredBaseModel): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AlignmentMetadata'], @@ -2034,6 +2338,10 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): 'AnnotationInstanceSegmentationFile'], 'exact_mappings': ['cdp-common:annotation_source_file_order'], 'ifabsent': 'string(xyz)'} }) + mesh_source_path: Optional[str] = Field(None, description="""The path to the mesh source file associated with an oriented point file.""", json_schema_extra = { "linkml_meta": {'alias': 'mesh_source_path', + 'domain_of': ['AnnotationOrientedPointFile', + 'AnnotationInstanceSegmentationFile'], + 'exact_mappings': ['cdp-common:annotation_source_file_mesh_source_path']} }) file_format: str = Field(..., description="""File format for this file""", json_schema_extra = { "linkml_meta": {'alias': 'file_format', 'domain_of': ['AnnotationSourceFile', 'AnnotationOrientedPointFile', @@ -2041,6 +2349,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_format']} }) @@ -2051,6 +2360,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_string']} }) @@ -2061,6 +2371,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_strings']} }) @@ -2072,6 +2383,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AnnotationFileMetadata', @@ -2086,6 +2398,7 @@ class AnnotationOrientedPointFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AlignmentMetadata'], @@ -2116,6 +2429,10 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): 'AnnotationInstanceSegmentationFile'], 'exact_mappings': ['cdp-common:annotation_source_file_order'], 'ifabsent': 'string(xyz)'} }) + mesh_source_path: Optional[str] = Field(None, description="""The path to the mesh source file associated with an oriented point file.""", json_schema_extra = { "linkml_meta": {'alias': 'mesh_source_path', + 'domain_of': ['AnnotationOrientedPointFile', + 'AnnotationInstanceSegmentationFile'], + 'exact_mappings': ['cdp-common:annotation_source_file_mesh_source_path']} }) file_format: str = Field(..., description="""File format for this file""", json_schema_extra = { "linkml_meta": {'alias': 'file_format', 'domain_of': ['AnnotationSourceFile', 'AnnotationOrientedPointFile', @@ -2123,6 +2440,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_format']} }) @@ -2133,6 +2451,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_string']} }) @@ -2143,6 +2462,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_strings']} }) @@ -2154,6 +2474,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AnnotationFileMetadata', @@ -2168,6 +2489,7 @@ class AnnotationInstanceSegmentationFile(AnnotationOrientedPointFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AlignmentMetadata'], @@ -2208,6 +2530,7 @@ class AnnotationPointFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_format']} }) @@ -2218,6 +2541,7 @@ class AnnotationPointFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_string']} }) @@ -2228,6 +2552,7 @@ class AnnotationPointFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_strings']} }) @@ -2239,6 +2564,7 @@ class AnnotationPointFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AnnotationFileMetadata', @@ -2253,6 +2579,7 @@ class AnnotationPointFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AlignmentMetadata'], @@ -2273,6 +2600,7 @@ class AnnotationSegmentationMaskFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_format']} }) @@ -2283,6 +2611,7 @@ class AnnotationSegmentationMaskFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_string']} }) @@ -2293,6 +2622,7 @@ class AnnotationSegmentationMaskFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_strings']} }) @@ -2304,6 +2634,7 @@ class AnnotationSegmentationMaskFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AnnotationFileMetadata', @@ -2318,6 +2649,7 @@ class AnnotationSegmentationMaskFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AlignmentMetadata'], @@ -2336,7 +2668,8 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): 'exact_mappings': ['cdp-common:annotation_source_file_mask_label'], 'ifabsent': 'int(1)'} }) rescale: Optional[bool] = Field(False, description="""Whether the annotation file needs to be rescaled.""", json_schema_extra = { "linkml_meta": {'alias': 'rescale', - 'domain_of': ['AnnotationSemanticSegmentationMaskFile'], + 'domain_of': ['AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile'], 'exact_mappings': ['cdp-common:annotation_source_file_rescale'], 'ifabsent': 'False'} }) threshold: Optional[float] = Field(None, description="""The threshold for a segmentation mask annotation file.""", json_schema_extra = { "linkml_meta": {'alias': 'threshold', @@ -2349,6 +2682,82 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', + 'AnnotationTriangularMeshFile', + 'AnnotationTriangularMeshGroupFile'], + 'exact_mappings': ['cdp-common:annotation_source_file_format']} }) + glob_string: Optional[str] = Field(None, description="""Glob string to match annotation files in the dataset. Required if annotation_source_file_glob_strings is not provided.""", json_schema_extra = { "linkml_meta": {'alias': 'glob_string', + 'domain_of': ['AnnotationSourceFile', + 'AnnotationOrientedPointFile', + 'AnnotationInstanceSegmentationFile', + 'AnnotationPointFile', + 'AnnotationSegmentationMaskFile', + 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', + 'AnnotationTriangularMeshFile', + 'AnnotationTriangularMeshGroupFile'], + 'exact_mappings': ['cdp-common:annotation_source_file_glob_string']} }) + glob_strings: Optional[List[str]] = Field(None, description="""Glob strings to match annotation files in the dataset. Required if annotation_source_file_glob_string is not provided.""", json_schema_extra = { "linkml_meta": {'alias': 'glob_strings', + 'domain_of': ['AnnotationSourceFile', + 'AnnotationOrientedPointFile', + 'AnnotationInstanceSegmentationFile', + 'AnnotationPointFile', + 'AnnotationSegmentationMaskFile', + 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', + 'AnnotationTriangularMeshFile', + 'AnnotationTriangularMeshGroupFile'], + 'exact_mappings': ['cdp-common:annotation_source_file_glob_strings']} }) + is_visualization_default: Optional[bool] = Field(False, description="""This annotation will be rendered in neuroglancer by default.""", json_schema_extra = { "linkml_meta": {'alias': 'is_visualization_default', + 'domain_of': ['Tomogram', + 'AnnotationSourceFile', + 'AnnotationOrientedPointFile', + 'AnnotationInstanceSegmentationFile', + 'AnnotationPointFile', + 'AnnotationSegmentationMaskFile', + 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', + 'AnnotationTriangularMeshFile', + 'AnnotationTriangularMeshGroupFile', + 'AnnotationFileMetadata', + 'TomogramMetadata'], + 'exact_mappings': ['cdp-common:annotation_source_file_is_visualization_default'], + 'ifabsent': 'False'} }) + is_portal_standard: Optional[bool] = Field(False, description="""Whether the annotation source is a portal standard.""", json_schema_extra = { "linkml_meta": {'alias': 'is_portal_standard', + 'domain_of': ['AnnotationSourceFile', + 'Alignment', + 'AnnotationOrientedPointFile', + 'AnnotationInstanceSegmentationFile', + 'AnnotationPointFile', + 'AnnotationSegmentationMaskFile', + 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', + 'AnnotationTriangularMeshFile', + 'AnnotationTriangularMeshGroupFile', + 'AlignmentMetadata'], + 'exact_mappings': ['cdp-common:annotation_source_file_is_portal_standard'], + 'ifabsent': 'False'} }) + + +class AnnotationInstanceSegmentationMaskFile(AnnotationSourceFile): + """ + File and sourcing data for an instance segmentation annotation mask. Annotation that identifies individual instances of object shape masks. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'aliases': ['InstanceSegmentationMask'], 'from_schema': 'metadata'}) + + rescale: Optional[bool] = Field(False, description="""Whether the annotation file needs to be rescaled.""", json_schema_extra = { "linkml_meta": {'alias': 'rescale', + 'domain_of': ['AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile'], + 'exact_mappings': ['cdp-common:annotation_source_file_rescale'], + 'ifabsent': 'False'} }) + file_format: str = Field(..., description="""File format for this file""", json_schema_extra = { "linkml_meta": {'alias': 'file_format', + 'domain_of': ['AnnotationSourceFile', + 'AnnotationOrientedPointFile', + 'AnnotationInstanceSegmentationFile', + 'AnnotationPointFile', + 'AnnotationSegmentationMaskFile', + 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_format']} }) @@ -2359,6 +2768,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_string']} }) @@ -2369,6 +2779,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_strings']} }) @@ -2380,6 +2791,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AnnotationFileMetadata', @@ -2394,6 +2806,7 @@ class AnnotationSemanticSegmentationMaskFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AlignmentMetadata'], @@ -2419,6 +2832,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_format']} }) @@ -2429,6 +2843,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_string']} }) @@ -2439,6 +2854,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_strings']} }) @@ -2450,6 +2866,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AnnotationFileMetadata', @@ -2464,6 +2881,7 @@ class AnnotationTriangularMeshFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AlignmentMetadata'], @@ -2483,7 +2901,10 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): 'exact_mappings': ['cdp-common:annotation_source_file_scale_factor'], 'ifabsent': 'float(1)'} }) name: Optional[str] = Field(None, description="""The name that identifies to a single annotation mesh among multiple meshes.""", json_schema_extra = { "linkml_meta": {'alias': 'name', - 'domain_of': ['OrganismDetails', + 'domain_of': ['Assay', + 'DevelopmentStageDetails', + 'Disease', + 'OrganismDetails', 'TissueDetails', 'CellType', 'CellStrain', @@ -2500,6 +2921,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_format']} }) @@ -2510,6 +2932,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_string']} }) @@ -2520,6 +2943,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile'], 'exact_mappings': ['cdp-common:annotation_source_file_glob_strings']} }) @@ -2531,6 +2955,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AnnotationFileMetadata', @@ -2545,6 +2970,7 @@ class AnnotationTriangularMeshGroupFile(AnnotationSourceFile): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AlignmentMetadata'], @@ -2892,6 +3318,7 @@ class Alignment(ConfiguredBaseModel): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AlignmentMetadata'], @@ -2995,7 +3422,7 @@ class Ctf(ConfiguredBaseModel): @field_validator('format') def pattern_format(cls, v): - pattern=re.compile(r"^CTFFIND$") + pattern=re.compile(r"(^CTFFIND$)|(^Gctf$)") if isinstance(v,list): for element in v: if not pattern.match(element): @@ -3133,6 +3560,9 @@ class AuthorMixin(ConfiguredBaseModel): name: str = Field(..., description="""The full name of the author.""", json_schema_extra = { "linkml_meta": {'alias': 'name', 'domain_of': ['AuthorMixin', 'Author', + 'Assay', + 'DevelopmentStageDetails', + 'Disease', 'OrganismDetails', 'TissueDetails', 'CellType', @@ -3182,6 +3612,9 @@ class Author(AuthorMixin): 'exact_mappings': ['cdp-common:kaggle_id']} }) name: str = Field(..., description="""The full name of the author.""", json_schema_extra = { "linkml_meta": {'alias': 'name', 'domain_of': ['AuthorMixin', + 'Assay', + 'DevelopmentStageDetails', + 'Disease', 'OrganismDetails', 'TissueDetails', 'CellType', @@ -3327,6 +3760,7 @@ class AlignmentMetadata(DefaultMetadata, Alignment): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AlignmentMetadata'], @@ -3412,6 +3846,7 @@ class AnnotationFileMetadata(ConfiguredBaseModel): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AnnotationFileMetadata', @@ -3728,10 +4163,16 @@ class DatasetMetadata(DefaultMetadata, Dataset, PicturedMetadataEntity): 'domain_of': ['ExperimentMetadata', 'Dataset', 'DatasetMetadata']} }) cell_component: Optional[CellComponent] = Field(None, description="""The cellular component from which the sample was derived.""", json_schema_extra = { "linkml_meta": {'alias': 'cell_component', 'domain_of': ['ExperimentMetadata', 'Dataset', 'DatasetMetadata']} }) + assay: Assay = Field(..., description="""The assay that was used to create the dataset.""", json_schema_extra = { "linkml_meta": {'alias': 'assay', + 'domain_of': ['ExperimentMetadata', 'Dataset', 'DatasetMetadata']} }) + development_stage: DevelopmentStageDetails = Field(..., description="""The development stage of the patients or organisms from which assayed biosamples were derived.""", json_schema_extra = { "linkml_meta": {'alias': 'development_stage', + 'domain_of': ['ExperimentMetadata', 'Dataset', 'DatasetMetadata']} }) + disease: Disease = Field(..., description="""The disease or condition of the patients from which assayed biosamples were derived.""", json_schema_extra = { "linkml_meta": {'alias': 'disease', + 'domain_of': ['ExperimentMetadata', 'Dataset', 'DatasetMetadata']} }) @field_validator('sample_type') def pattern_sample_type(cls, v): - pattern=re.compile(r"(^cell$)|(^tissue$)|(^organism$)|(^organelle$)|(^virus$)|(^in_vitro$)|(^in_silico$)|(^other$)") + pattern=re.compile(r"(^cell_line$)|(^in_silico$)|(^in_vitro$)|(^organelle$)|(^organism$)|(^organoid$)|(^other$)|(^primary_cell_culture$)|(^tissue$)|(^virus$)") if isinstance(v,list): for element in v: if not pattern.match(element): @@ -4224,6 +4665,7 @@ class TomogramMetadata(DefaultMetadata, Tomogram): 'AnnotationPointFile', 'AnnotationSegmentationMaskFile', 'AnnotationSemanticSegmentationMaskFile', + 'AnnotationInstanceSegmentationMaskFile', 'AnnotationTriangularMeshFile', 'AnnotationTriangularMeshGroupFile', 'AnnotationFileMetadata', @@ -4319,6 +4761,9 @@ def pattern_processing(cls, v): CrossReferencedEntity.model_rebuild() PicturedEntity.model_rebuild() PicturedMetadataEntity.model_rebuild() +Assay.model_rebuild() +DevelopmentStageDetails.model_rebuild() +Disease.model_rebuild() OrganismDetails.model_rebuild() TissueDetails.model_rebuild() CellType.model_rebuild() @@ -4346,6 +4791,7 @@ def pattern_processing(cls, v): AnnotationPointFile.model_rebuild() AnnotationSegmentationMaskFile.model_rebuild() AnnotationSemanticSegmentationMaskFile.model_rebuild() +AnnotationInstanceSegmentationMaskFile.model_rebuild() AnnotationTriangularMeshFile.model_rebuild() AnnotationTriangularMeshGroupFile.model_rebuild() IdentifiedObject.model_rebuild() diff --git a/schema/metadata_files/v2.0.0/codegen/metadata_files.schema.json b/schema/metadata_files/v2.0.0/codegen/metadata_files.schema.json index 01e24166f..31265af3e 100644 --- a/schema/metadata_files/v2.0.0/codegen/metadata_files.schema.json +++ b/schema/metadata_files/v2.0.0/codegen/metadata_files.schema.json @@ -652,6 +652,13 @@ "null" ] }, + "mesh_source_path": { + "description": "The path to the mesh source file associated with an oriented point file.", + "type": [ + "string", + "null" + ] + }, "order": { "description": "The order of axes for an oriented point / instance segmentation annotation file.", "type": [ @@ -666,6 +673,59 @@ "title": "AnnotationInstanceSegmentationFile", "type": "object" }, + "AnnotationInstanceSegmentationMaskFile": { + "additionalProperties": false, + "description": "File and sourcing data for an instance segmentation annotation mask. Annotation that identifies individual instances of object shape masks.", + "properties": { + "file_format": { + "description": "File format for this file", + "type": "string" + }, + "glob_string": { + "description": "Glob string to match annotation files in the dataset. Required if annotation_source_file_glob_strings is not provided.", + "type": [ + "string", + "null" + ] + }, + "glob_strings": { + "description": "Glob strings to match annotation files in the dataset. Required if annotation_source_file_glob_string is not provided.", + "items": { + "type": "string" + }, + "type": [ + "array", + "null" + ] + }, + "is_portal_standard": { + "description": "Whether the annotation source is a portal standard.", + "type": [ + "boolean", + "null" + ] + }, + "is_visualization_default": { + "description": "This annotation will be rendered in neuroglancer by default.", + "type": [ + "boolean", + "null" + ] + }, + "rescale": { + "description": "Whether the annotation file needs to be rescaled.", + "type": [ + "boolean", + "null" + ] + } + }, + "required": [ + "file_format" + ], + "title": "AnnotationInstanceSegmentationMaskFile", + "type": "object" + }, "AnnotationMetadata": { "additionalProperties": false, "description": "Metadata describing an annotation.", @@ -948,6 +1008,13 @@ "null" ] }, + "mesh_source_path": { + "description": "The path to the mesh source file associated with an oriented point file.", + "type": [ + "string", + "null" + ] + }, "order": { "description": "The order of axes for an oriented point / instance segmentation annotation file.", "type": [ @@ -1323,6 +1390,29 @@ "string" ] }, + "Assay": { + "additionalProperties": false, + "description": "The assay that was used to create the dataset.", + "properties": { + "id": { + "description": "The EFO identifier for the cellular component.", + "pattern": "^EFO:[0-9]{7}$", + "type": [ + "string", + "null" + ] + }, + "name": { + "description": "Name of the assay component.", + "type": "string" + } + }, + "required": [ + "name" + ], + "title": "Assay", + "type": "object" + }, "Author": { "additionalProperties": false, "description": "Author of a scientific data entity.", @@ -1485,7 +1575,15 @@ "type": "string" }, { - "pattern": "^[a-zA-Z]+:[0-9]+$", + "pattern": "^NCBITaxon:[0-9]+$", + "type": "string" + }, + { + "pattern": "^CVCL_[A-Z0-9]{4,}$", + "type": "string" + }, + { + "pattern": "^CC-[0-9]{4}$", "type": "string" }, { @@ -1493,7 +1591,7 @@ } ], "description": "A placeholder for any type of data.", - "pattern": "(WBStrain[0-9]{8}$)|(^[a-zA-Z]+:[0-9]+$)" + "pattern": "(WBStrain[0-9]{8}$)|(^NCBITaxon:[0-9]+$)|(^CVCL_[A-Z0-9]{4,}$)|(^CC-[0-9]{4}$)" }, "name": { "description": "Cell line or strain for the sample.", @@ -1511,12 +1609,33 @@ "description": "The cell type from which the sample was derived.", "properties": { "id": { - "description": "Cell Ontology identifier for the cell type", - "pattern": "^CL:[0-9]{7}$", - "type": [ - "string", - "null" - ] + "anyOf": [ + { + "pattern": "^CL:[0-9]{7}$", + "type": "string" + }, + { + "pattern": "WBbt:[0-9]{7}$", + "type": "string" + }, + { + "pattern": "ZFA:[0-9]{7}$", + "type": "string" + }, + { + "pattern": "FBbt:[0-9]{8}$", + "type": "string" + }, + { + "pattern": "^UBERON:[0-9]{7}$", + "type": "string" + }, + { + "type": "null" + } + ], + "description": "A placeholder for any type of data.", + "pattern": "(^CL:[0-9]{7}$)|(WBbt:[0-9]{7}$)|(ZFA:[0-9]{7}$)|(FBbt:[0-9]{8}$)|(^UBERON:[0-9]{7}$)" }, "name": { "description": "Name of the cell type from which a biological sample used in a CryoET study is derived from.", @@ -1574,7 +1693,7 @@ "format": { "$ref": "#/$defs/CtfFormatEnum", "description": "The format of the ctf file.", - "pattern": "^CTFFIND$" + "pattern": "(^CTFFIND$)|(^Gctf$)" } }, "required": [ @@ -1586,7 +1705,8 @@ "CtfFormatEnum": { "description": "Used to determine what ctf parser to use.", "enum": [ - "CTFFIND" + "CTFFIND", + "Gctf" ], "title": "CtfFormatEnum", "type": "string" @@ -1595,6 +1715,10 @@ "additionalProperties": false, "description": "High-level description of a cryoET dataset.", "properties": { + "assay": { + "$ref": "#/$defs/Assay", + "description": "The assay that was used to create the dataset." + }, "authors": { "description": "Author of a scientific data entity.", "items": { @@ -1663,6 +1787,14 @@ "$ref": "#/$defs/DateStamp", "description": "A set of dates at which a data item was deposited, published and last modified." }, + "development_stage": { + "$ref": "#/$defs/DevelopmentStageDetails", + "description": "The development stage of the patients or organisms from which assayed biosamples were derived." + }, + "disease": { + "$ref": "#/$defs/Disease", + "description": "The disease or condition of the patients from which assayed biosamples were derived." + }, "funding": { "description": "A funding source for a scientific data entity (base for JSON and DB representation).", "items": { @@ -1708,7 +1840,7 @@ "sample_type": { "$ref": "#/$defs/SampleTypeEnum", "description": "Type of sample imaged in a CryoET study.", - "pattern": "(^cell$)|(^tissue$)|(^organism$)|(^organelle$)|(^virus$)|(^in_vitro$)|(^in_silico$)|(^other$)" + "pattern": "(^cell_line$)|(^in_silico$)|(^in_vitro$)|(^organelle$)|(^organism$)|(^organoid$)|(^other$)|(^primary_cell_culture$)|(^tissue$)|(^virus$)" }, "tissue": { "anyOf": [ @@ -1728,7 +1860,10 @@ "dataset_description", "dates", "authors", - "sample_type" + "sample_type", + "assay", + "development_stage", + "disease" ], "title": "Dataset", "type": "object" @@ -1737,6 +1872,10 @@ "additionalProperties": false, "description": "Metadata describing a dataset.", "properties": { + "assay": { + "$ref": "#/$defs/Assay", + "description": "The assay that was used to create the dataset." + }, "authors": { "description": "Author of a scientific data entity.", "items": { @@ -1809,6 +1948,14 @@ "description": "An identifier for a CryoET deposition, assigned by the Data Portal. Used to identify the deposition the entity is a part of.", "type": "integer" }, + "development_stage": { + "$ref": "#/$defs/DevelopmentStageDetails", + "description": "The development stage of the patients or organisms from which assayed biosamples were derived." + }, + "disease": { + "$ref": "#/$defs/Disease", + "description": "The disease or condition of the patients from which assayed biosamples were derived." + }, "funding": { "description": "A funding source for a scientific data entity (base for JSON and DB representation).", "items": { @@ -1862,7 +2009,7 @@ "sample_type": { "$ref": "#/$defs/SampleTypeEnum", "description": "Type of sample imaged in a CryoET study.", - "pattern": "(^cell$)|(^tissue$)|(^organism$)|(^organelle$)|(^virus$)|(^in_vitro$)|(^in_silico$)|(^other$)" + "pattern": "(^cell_line$)|(^in_silico$)|(^in_vitro$)|(^organelle$)|(^organism$)|(^organoid$)|(^other$)|(^primary_cell_culture$)|(^tissue$)|(^virus$)" }, "tissue": { "anyOf": [ @@ -1885,7 +2032,10 @@ "dataset_description", "dates", "authors", - "sample_type" + "sample_type", + "assay", + "development_stage", + "disease" ], "title": "DatasetMetadata", "type": "object" @@ -2112,10 +2262,98 @@ "title": "DepositionTypesEnum", "type": "string" }, + "DevelopmentStageDetails": { + "additionalProperties": false, + "description": "The development stage of the patients or organisms from which assayed biosamples were derived.", + "properties": { + "id": { + "anyOf": [ + { + "pattern": "^unknown$", + "type": "string" + }, + { + "pattern": "WBls:[0-9]{7}$", + "type": "string" + }, + { + "pattern": "^UBERON:[0-9]{7}$", + "type": "string" + }, + { + "pattern": "HsapDv:[0-9]{7}$", + "type": "string" + }, + { + "pattern": "MmusDv:[0-9]{7}$", + "type": "string" + }, + { + "pattern": "ZFS:[0-9]{7}$", + "type": "string" + }, + { + "pattern": "FBdv:[0-9]{8}$", + "type": "string" + }, + { + "type": "null" + } + ], + "description": "A placeholder for any type of data.", + "pattern": "(^unknown$)|(WBls:[0-9]{7}$)|(^UBERON:[0-9]{7}$)|(HsapDv:[0-9]{7}$)|(MmusDv:[0-9]{7}$)|(ZFS:[0-9]{7}$)|(FBdv:[0-9]{8}$)" + }, + "name": { + "description": "Name of the developmental stage component.", + "type": [ + "string", + "null" + ] + } + }, + "title": "DevelopmentStageDetails", + "type": "object" + }, + "Disease": { + "additionalProperties": false, + "description": "The disease or condition of the patients from which assayed biosamples were derived.", + "properties": { + "id": { + "anyOf": [ + { + "pattern": "^MONDO:[0-9]{7}$", + "type": "string" + }, + { + "pattern": "^PATO:[0-9]{7}$", + "type": "string" + }, + { + "type": "null" + } + ], + "description": "A placeholder for any type of data.", + "pattern": "(^MONDO:[0-9]{7}$)|(^PATO:[0-9]{7}$)" + }, + "name": { + "description": "Name of the disease.", + "type": [ + "string", + "null" + ] + } + }, + "title": "Disease", + "type": "object" + }, "ExperimentMetadata": { "additionalProperties": false, "description": "Metadata describing sample and sample preparation methods used in a cryoET dataset.", "properties": { + "assay": { + "$ref": "#/$defs/Assay", + "description": "The assay that was used to create the dataset." + }, "cell_component": { "anyOf": [ { @@ -2149,6 +2387,14 @@ ], "description": "The cell type from which the sample was derived." }, + "development_stage": { + "$ref": "#/$defs/DevelopmentStageDetails", + "description": "The development stage of the patients or organisms from which assayed biosamples were derived." + }, + "disease": { + "$ref": "#/$defs/Disease", + "description": "The disease or condition of the patients from which assayed biosamples were derived." + }, "grid_preparation": { "description": "Describes Cryo-ET grid preparation.", "type": [ @@ -2184,7 +2430,7 @@ "sample_type": { "$ref": "#/$defs/SampleTypeEnum", "description": "Type of sample imaged in a CryoET study.", - "pattern": "(^cell$)|(^tissue$)|(^organism$)|(^organelle$)|(^virus$)|(^in_vitro$)|(^in_silico$)|(^other$)" + "pattern": "(^cell_line$)|(^in_silico$)|(^in_vitro$)|(^organelle$)|(^organism$)|(^organoid$)|(^other$)|(^primary_cell_culture$)|(^tissue$)|(^virus$)" }, "tissue": { "anyOf": [ @@ -2199,7 +2445,10 @@ } }, "required": [ - "sample_type" + "sample_type", + "assay", + "development_stage", + "disease" ], "title": "ExperimentMetadata", "type": "object" @@ -2775,14 +3024,16 @@ "SampleTypeEnum": { "description": "Type of sample imaged in a CryoET study.", "enum": [ - "cell", - "tissue", - "organism", - "organelle", - "virus", - "in_vitro", + "cell_line", "in_silico", - "other" + "in_vitro", + "organelle", + "organism", + "organoid", + "other", + "primary_cell_culture", + "tissue", + "virus" ], "title": "SampleTypeEnum", "type": "string" @@ -3423,12 +3674,33 @@ "description": "The type of tissue from which the sample was derived.", "properties": { "id": { - "description": "The UBERON identifier for the tissue.", - "pattern": "^BTO:[0-9]{7}$", - "type": [ - "string", - "null" - ] + "anyOf": [ + { + "pattern": "^CL:[0-9]{7}$", + "type": "string" + }, + { + "pattern": "WBbt:[0-9]{7}$", + "type": "string" + }, + { + "pattern": "ZFA:[0-9]{7}$", + "type": "string" + }, + { + "pattern": "FBbt:[0-9]{8}$", + "type": "string" + }, + { + "pattern": "^UBERON:[0-9]{7}$", + "type": "string" + }, + { + "type": "null" + } + ], + "description": "A placeholder for any type of data.", + "pattern": "(^CL:[0-9]{7}$)|(WBbt:[0-9]{7}$)|(ZFA:[0-9]{7}$)|(FBbt:[0-9]{8}$)|(^UBERON:[0-9]{7}$)" }, "name": { "description": "Name of the tissue from which a biological sample used in a CryoET study is derived from.", diff --git a/schema/metadata_files/v2.0.0/codegen/metadata_files_materialized.yaml b/schema/metadata_files/v2.0.0/codegen/metadata_files_materialized.yaml index 2a3675d2b..7740c0896 100644 --- a/schema/metadata_files/v2.0.0/codegen/metadata_files_materialized.yaml +++ b/schema/metadata_files/v2.0.0/codegen/metadata_files_materialized.yaml @@ -299,30 +299,108 @@ types: from_schema: metadata base: str pattern: ^BTO:[0-9]{7}$ + UBERON_ID: + name: UBERON_ID + description: An UBERON identifier + from_schema: metadata + base: str + pattern: ^UBERON:[0-9]{7}$ CL_ID: name: CL_ID description: A Cell Ontology identifier from_schema: metadata base: str pattern: ^CL:[0-9]{7}$ + CVCL_ID: + name: CVCL_ID + description: A Cellosaurus identifier + from_schema: metadata + base: str + pattern: ^CVCL_[A-Z0-9]{4,}$ + CC_ID: + name: CC_ID + description: A Cell Culture Collection identifier + from_schema: metadata + base: str + pattern: ^CC-[0-9]{4}$ GO_ID: name: GO_ID description: A Gene Ontology identifier from_schema: metadata base: str pattern: ^GO:[0-9]{7}$ + EFO_ID: + name: EFO_ID + description: An Experimental Factor Ontology identifier + from_schema: metadata + base: str + pattern: ^EFO:[0-9]{7}$ UNIPROT_ID: name: UNIPROT_ID description: A UniProt identifier from_schema: metadata base: str pattern: ^UniProtKB:[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}$ - WORMBASE_ID: - name: WORMBASE_ID - description: A WormBase identifier + NCBI_TAXON_ID: + name: NCBI_TAXON_ID + description: A NCBI Taxonomy identifier + from_schema: metadata + base: str + pattern: ^NCBITaxon:[0-9]+$ + WORMBASE_STRAIN_ID: + name: WORMBASE_STRAIN_ID + description: A WormBase strain identifier from_schema: metadata base: str pattern: WBStrain[0-9]{8}$ + WORMBASE_DEVELOPMENT_ID: + name: WORMBASE_DEVELOPMENT_ID + description: A WormBase developmental stage identifier + from_schema: metadata + base: str + pattern: WBls:[0-9]{7}$ + HSAPDV_ID: + name: HSAPDV_ID + description: A human developmental phenotype ontology identifier + from_schema: metadata + base: str + pattern: HsapDv:[0-9]{7}$ + MMUSDV_ID: + name: MMUSDV_ID + description: A mouse developmental stage ontology identifier + from_schema: metadata + base: str + pattern: MmusDv:[0-9]{7}$ + ZFS_ID: + name: ZFS_ID + description: A zebrafish developmental stage ontology identifier + from_schema: metadata + base: str + pattern: ZFS:[0-9]{7}$ + FBDV_ID: + name: FBDV_ID + description: A drosophila developmental stage ontology identifier + from_schema: metadata + base: str + pattern: FBdv:[0-9]{8}$ + WBBT_ID: + name: WBBT_ID + description: A WormBase anatomy ontology identifier + from_schema: metadata + base: str + pattern: WBbt:[0-9]{7}$ + ZFA_ID: + name: ZFA_ID + description: A zebrafish anatomy ontology identifier + from_schema: metadata + base: str + pattern: ZFA:[0-9]{7}$ + FBBT_ID: + name: FBBT_ID + description: A drosophila anatomy ontology identifier + from_schema: metadata + base: str + pattern: FBbt:[0-9]{8}$ ONTOLOGY_ID: name: ONTOLOGY_ID description: An ontology identifier @@ -371,6 +449,24 @@ types: from_schema: metadata base: str pattern: ^(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|PDB-[0-9a-zA-Z]{4,8})(\s*,\s*(EMPIAR-[0-9]{5}|EMD-[0-9]{4,5}|(doi:)?10\.[0-9]{4,9}/[-._;()/:a-zA-Z0-9]+|PDB-[0-9a-zA-Z]{4,8}))*$ + MONDO_ID: + name: MONDO_ID + description: An identifier of type MONDO + from_schema: metadata + base: str + pattern: ^MONDO:[0-9]{7}$ + PATO_ID: + name: PATO_ID + description: An identifier of type PATO + from_schema: metadata + base: str + pattern: ^PATO:[0-9]{7}$ + UNKNOWN_LITERAL: + name: UNKNOWN_LITERAL + description: A placeholder for an unknown value. + from_schema: metadata + base: str + pattern: ^unknown$ enums: alignment_type_enum: name: alignment_type_enum @@ -508,6 +604,9 @@ enums: CTFFIND: text: CTFFIND description: The file has ctffind schema + Gctf: + text: Gctf + description: The file has Gctf schema deposition_types_enum: name: deposition_types_enum description: Types of data a deposition has @@ -527,31 +626,37 @@ enums: description: Type of sample imaged in a CryoET study. from_schema: metadata permissible_values: - cell: - text: cell - description: Tomographic data of whole cells or cell sections. - tissue: - text: tissue - description: Tomographic data of tissue sections. - organism: - text: organism - description: Tomographic data of sections through multicellular organisms. - organelle: - text: organelle - description: Tomographic data of purified organelles. - virus: - text: virus - description: Tomographic data of purified viruses or VLPs. + cell_line: + text: cell_line + description: Tomographic data of immortalized cells or immortalized cell sections + in_silico: + text: in_silico + description: Simulated tomographic data. in_vitro: text: in_vitro description: Tomographic data of in vitro reconstituted systems or mixtures of proteins. - in_silico: - text: in_silico - description: Simulated tomographic data. + organelle: + text: organelle + description: Tomographic data of purified organelles. + organism: + text: organism + description: Tomographic data of sections through multicellular organisms. + organoid: + text: organoid + description: Tomographic data of organoid-derived samples. other: text: other description: Other type of sample. + primary_cell_culture: + text: primary_cell_culture + description: Tomographic data of whole primary cells or primary cell sections. + tissue: + text: tissue + description: Tomographic data of tissue sections. + virus: + text: virus + description: Tomographic data of purified viruses or VLPs. tiltseries_camera_acquire_mode_enum: name: tiltseries_camera_acquire_mode_enum description: Camera acquisition mode @@ -918,6 +1023,7 @@ classes: - AnnotationPointFile - AnnotationSegmentationMaskFile - AnnotationSemanticSegmentationMaskFile + - AnnotationInstanceSegmentationMaskFile - AnnotationTriangularMeshFile - AnnotationTriangularMeshGroupFile range: boolean @@ -1012,6 +1118,7 @@ classes: - AnnotationPointFile - AnnotationSegmentationMaskFile - AnnotationSemanticSegmentationMaskFile + - AnnotationInstanceSegmentationMaskFile - AnnotationTriangularMeshFile - AnnotationTriangularMeshGroupFile - AnnotationFileMetadata @@ -1637,7 +1744,7 @@ classes: required: true inlined: true inlined_as_list: true - pattern: (^cell$)|(^tissue$)|(^organism$)|(^organelle$)|(^virus$)|(^in_vitro$)|(^in_silico$)|(^other$) + pattern: (^cell_line$)|(^in_silico$)|(^in_vitro$)|(^organelle$)|(^organism$)|(^organoid$)|(^other$)|(^primary_cell_culture$)|(^tissue$)|(^virus$) sample_preparation: name: sample_preparation description: Describes how the sample was prepared. @@ -1744,6 +1851,47 @@ classes: range: CellComponent inlined: true inlined_as_list: true + assay: + name: assay + description: The assay that was used to create the dataset. + from_schema: metadata + alias: assay + owner: DatasetMetadata + domain_of: + - ExperimentMetadata + - Dataset + range: Assay + required: true + inlined: true + inlined_as_list: true + development_stage: + name: development_stage + description: The development stage of the patients or organisms from which + assayed biosamples were derived. + from_schema: metadata + alias: development_stage + owner: DatasetMetadata + domain_of: + - ExperimentMetadata + - Dataset + range: DevelopmentStageDetails + required: true + inlined: true + inlined_as_list: true + disease: + name: disease + description: The disease or condition of the patients from which assayed biosamples + were derived. + from_schema: metadata + alias: disease + owner: DatasetMetadata + domain_of: + - ExperimentMetadata + - Dataset + range: Disease + required: true + inlined: true + inlined_as_list: true DepositionMetadata: name: DepositionMetadata description: 'Metadata describing a deposition. @@ -2812,6 +2960,7 @@ classes: - AnnotationPointFile - AnnotationSegmentationMaskFile - AnnotationSemanticSegmentationMaskFile + - AnnotationInstanceSegmentationMaskFile - AnnotationTriangularMeshFile - AnnotationTriangularMeshGroupFile - AnnotationFileMetadata @@ -3004,6 +3153,9 @@ classes: domain_of: - AuthorMixin - Author + - Assay + - DevelopmentStageDetails + - Disease - OrganismDetails - TissueDetails - CellType diff --git a/schema/poetry.lock b/schema/poetry.lock index a1fd0613c..7f83c98cc 100644 --- a/schema/poetry.lock +++ b/schema/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -6,6 +6,7 @@ version = "2.4.4" description = "Happy Eyeballs for asyncio" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "aiohappyeyeballs-2.4.4-py3-none-any.whl", hash = "sha256:a980909d50efcd44795c4afeca523296716d50cd756ddca6af8c65b996e27de8"}, {file = "aiohappyeyeballs-2.4.4.tar.gz", hash = "sha256:5fdd7d87889c63183afc18ce9271f9b0a7d32c2303e394468dd45d514a757745"}, @@ -17,6 +18,7 @@ version = "3.10.11" description = "Async http client/server framework (asyncio)" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5077b1a5f40ffa3ba1f40d537d3bec4383988ee51fbba6b74aa8fb1bc466599e"}, {file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8d6a14a4d93b5b3c2891fca94fa9d41b2322a68194422bef0dd5ec1e57d7d298"}, @@ -120,7 +122,7 @@ multidict = ">=4.5,<7.0" yarl = ">=1.12.0,<2.0" [package.extras] -speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"] +speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (>=3.2.0) ; sys_platform == \"linux\" or sys_platform == \"darwin\"", "brotlicffi ; platform_python_implementation != \"CPython\""] [[package]] name = "aiosignal" @@ -128,6 +130,7 @@ version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, @@ -142,6 +145,7 @@ version = "0.7.0" description = "Reusable constraint types to use with typing.Annotated" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, @@ -153,6 +157,7 @@ version = "4.9.3" description = "ANTLR 4.9.3 runtime for Python 3.7" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b"}, ] @@ -163,6 +168,7 @@ version = "1.3.0" description = "Better dates & times for Python" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "arrow-1.3.0-py3-none-any.whl", hash = "sha256:c728b120ebc00eb84e01882a6f5e7927a53960aa990ce7dd2b10f39005a67f80"}, {file = "arrow-1.3.0.tar.gz", hash = "sha256:d4540617648cb5f895730f1ad8c82a65f2dad0166f57b75f3ca54759c4d67a85"}, @@ -182,6 +188,7 @@ version = "2.0.4" description = "Simple LRU cache for asyncio" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "async-lru-2.0.4.tar.gz", hash = "sha256:b8a59a5df60805ff63220b2a0c5b5393da5521b113cd5465a44eb037d81a5627"}, {file = "async_lru-2.0.4-py3-none-any.whl", hash = "sha256:ff02944ce3c288c5be660c42dbcca0742b32c3b279d6dceda655190240b99224"}, @@ -193,6 +200,7 @@ version = "23.2.0" description = "Classes Without Boilerplate" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"}, {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"}, @@ -203,8 +211,8 @@ cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] dev = ["attrs[tests]", "pre-commit"] docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] tests = ["attrs[tests-no-zope]", "zope-interface"] -tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] -tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.6) ; platform_python_implementation == \"CPython\" and python_version >= \"3.8\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.8\""] +tests-no-zope = ["attrs[tests-mypy]", "cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] [[package]] name = "black" @@ -212,6 +220,7 @@ version = "24.4.2" description = "The uncompromising code formatter." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "black-24.4.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dd1b5a14e417189db4c7b64a6540f31730713d173f0b63e55fabd52d61d8fdce"}, {file = "black-24.4.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e537d281831ad0e71007dcdcbe50a71470b978c453fa41ce77186bbe0ed6021"}, @@ -246,7 +255,7 @@ platformdirs = ">=2" [package.extras] colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] +d = ["aiohttp (>=3.7.4) ; sys_platform != \"win32\" or implementation_name != \"pypy\"", "aiohttp (>=3.7.4,!=3.9.0) ; sys_platform == \"win32\" and implementation_name == \"pypy\""] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] @@ -256,6 +265,7 @@ version = "2024.7.4" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"}, {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"}, @@ -267,6 +277,7 @@ version = "0.2.1" description = "rdflib collections flattening graph" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "CFGraph-0.2.1.tar.gz", hash = "sha256:b57fe7044a10b8ff65aa3a8a8ddc7d4cd77bf511b42e57289cd52cbc29f8fe74"}, ] @@ -280,6 +291,7 @@ version = "5.2.0" description = "Universal encoding detector for Python 3" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970"}, {file = "chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7"}, @@ -291,6 +303,7 @@ version = "3.3.2" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false python-versions = ">=3.7.0" +groups = ["main"] files = [ {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, @@ -390,6 +403,7 @@ version = "8.1.7" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, @@ -404,6 +418,8 @@ version = "0.4.6" description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["main"] +markers = "platform_system == \"Windows\" or sys_platform == \"win32\"" files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, @@ -415,6 +431,7 @@ version = "0.7.10" description = "Idiomatic conversion between URIs and compact URIs (CURIEs)." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "curies-0.7.10-py3-none-any.whl", hash = "sha256:ad80f420dd76b6f3e921a245370ff6ab7473c48c29c17254970c03cd2e58af5f"}, {file = "curies-0.7.10.tar.gz", hash = "sha256:98a7ceb94710fab3a02727a7f85ba0719dd22be5fc8b5f2ad1d7d4cfc47d64ce"}, @@ -439,6 +456,7 @@ version = "1.2.14" description = "Python @deprecated decorator to deprecate old python classes, functions or methods." optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["main"] files = [ {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, {file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"}, @@ -456,6 +474,7 @@ version = "1.1.0" description = "An implementation of lxml.xmlfile for the standard library" optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"}, {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"}, @@ -467,6 +486,7 @@ version = "1.5.1" description = "Validates fully-qualified domain names against RFC 1123, so that they are acceptable to modern bowsers" optional = false python-versions = ">=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4, <4" +groups = ["main"] files = [ {file = "fqdn-1.5.1-py3-none-any.whl", hash = "sha256:3a179af3761e4df6eb2e026ff9e1a3033d3587bf980a0b1b2e1e5d08d7358014"}, {file = "fqdn-1.5.1.tar.gz", hash = "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f"}, @@ -478,6 +498,7 @@ version = "1.4.1" description = "A list-like structure which implements collections.abc.MutableSequence" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f9aa1878d1083b276b0196f2dfbe00c9b7e752475ed3b682025ff20c1c1f51ac"}, {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:29acab3f66f0f24674b7dc4736477bcd4bc3ad4b896f5f45379a67bce8b96868"}, @@ -564,6 +585,7 @@ version = "0.20.3" description = "Simple Python interface for Graphviz" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "graphviz-0.20.3-py3-none-any.whl", hash = "sha256:81f848f2904515d8cd359cc611faba817598d2feaac4027b266aa3eda7b3dde5"}, {file = "graphviz-0.20.3.zip", hash = "sha256:09d6bc81e6a9fa392e7ba52135a9d49f1ed62526f96499325930e87ca1b5925d"}, @@ -580,6 +602,8 @@ version = "3.0.3" description = "Lightweight in-process concurrent programming" optional = false python-versions = ">=3.7" +groups = ["main"] +markers = "python_version < \"3.13\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")" files = [ {file = "greenlet-3.0.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9da2bd29ed9e4f15955dd1595ad7bc9320308a3b766ef7f837e23ad4b4aac31a"}, {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d353cadd6083fdb056bb46ed07e4340b0869c305c8ca54ef9da3421acbdf6881"}, @@ -651,6 +675,7 @@ version = "0.9.1" description = "Honey Badger reader - a generic file/url/string open and read tool" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "hbreader-0.9.1-py3-none-any.whl", hash = "sha256:9a6e76c9d1afc1b977374a5dc430a1ebb0ea0488205546d4678d6e31cc5f6801"}, {file = "hbreader-0.9.1.tar.gz", hash = "sha256:d2c132f8ba6276d794c66224c3297cec25c8079d0a4cf019c061611e0a3b94fa"}, @@ -662,6 +687,7 @@ version = "3.7" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.5" +groups = ["main"] files = [ {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, @@ -673,6 +699,7 @@ version = "2.0.0" description = "brain-dead simple config-ini parsing" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, @@ -684,6 +711,7 @@ version = "0.6.1" description = "An ISO 8601 date/time/duration parser and formatter" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, @@ -698,6 +726,7 @@ version = "20.11.0" description = "Operations with ISO 8601 durations" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "isoduration-20.11.0-py3-none-any.whl", hash = "sha256:b2904c2a4228c3d44f409c8ae8e2370eb21a26f7ac2ec5446df141dde3452042"}, {file = "isoduration-20.11.0.tar.gz", hash = "sha256:ac2f9015137935279eac671f94f89eb00584f940f5dc49462a0c4ee692ba1bd9"}, @@ -712,6 +741,7 @@ version = "3.1.5" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "jinja2-3.1.5-py3-none-any.whl", hash = "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb"}, {file = "jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb"}, @@ -729,6 +759,7 @@ version = "0.1.9" description = "Python library for denormalizing nested dicts or json objects to tables and back" optional = false python-versions = ">=3.7.0" +groups = ["main"] files = [ {file = "json_flattener-0.1.9-py3-none-any.whl", hash = "sha256:6b027746f08bf37a75270f30c6690c7149d5f704d8af1740c346a3a1236bc941"}, {file = "json_flattener-0.1.9.tar.gz", hash = "sha256:84cf8523045ffb124301a602602201665fcb003a171ece87e6f46ed02f7f0c15"}, @@ -744,6 +775,7 @@ version = "1.3.1" description = "JSON as python objects" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "jsonasobj-1.3.1-py3-none-any.whl", hash = "sha256:b9e329dc1ceaae7cf5d5b214684a0b100e0dad0be6d5bbabac281ec35ddeca65"}, {file = "jsonasobj-1.3.1.tar.gz", hash = "sha256:d52e0544a54a08f6ea3f77fa3387271e3648655e0eace2f21e825c26370e44a2"}, @@ -755,6 +787,7 @@ version = "1.0.4" description = "JSON as python objects - version 2" optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "jsonasobj2-1.0.4-py3-none-any.whl", hash = "sha256:12e86f86324d54fcf60632db94ea74488d5314e3da554c994fe1e2c6f29acb79"}, {file = "jsonasobj2-1.0.4.tar.gz", hash = "sha256:f50b1668ef478004aa487b2d2d094c304e5cb6b79337809f4a1f2975cc7fbb4e"}, @@ -769,6 +802,7 @@ version = "1.33" description = "Apply JSON-Patches (RFC 6902)" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +groups = ["main"] files = [ {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"}, {file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"}, @@ -783,6 +817,7 @@ version = "1.6.1" description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming." optional = false python-versions = "*" +groups = ["main"] files = [ {file = "jsonpath-ng-1.6.1.tar.gz", hash = "sha256:086c37ba4917304850bd837aeab806670224d3f038fe2833ff593a672ef0a5fa"}, {file = "jsonpath_ng-1.6.1-py3-none-any.whl", hash = "sha256:8f22cd8273d7772eea9aaa84d922e0841aa36fdb8a2c6b7f6c3791a16a9bc0be"}, @@ -797,6 +832,7 @@ version = "3.0.0" description = "Identify specific nodes in a JSON document (RFC 6901)" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942"}, {file = "jsonpointer-3.0.0.tar.gz", hash = "sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef"}, @@ -808,6 +844,7 @@ version = "4.23.0" description = "An implementation of JSON Schema validation for Python" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "jsonschema-4.23.0-py3-none-any.whl", hash = "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566"}, {file = "jsonschema-4.23.0.tar.gz", hash = "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4"}, @@ -837,6 +874,7 @@ version = "2023.12.1" description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "jsonschema_specifications-2023.12.1-py3-none-any.whl", hash = "sha256:87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c"}, {file = "jsonschema_specifications-2023.12.1.tar.gz", hash = "sha256:48a76787b3e70f5ed53f1160d2b81f586e4ca6d1548c5de7085d1682674764cc"}, @@ -851,6 +889,7 @@ version = "1.8.2" description = "Linked Open Data Modeling Language" optional = false python-versions = "<4.0.0,>=3.8.1" +groups = ["main"] files = [ {file = "linkml-1.8.2-py3-none-any.whl", hash = "sha256:441e284f25d8d68a1ca58456651ccac29dc0319f9d18abceb677728eaeacc655"}, {file = "linkml-1.8.2.tar.gz", hash = "sha256:29e42e46e2777eb8b77e38775686b4ab385d5955bc5a0e6ec7c8f39dd4827544"}, @@ -893,6 +932,7 @@ version = "0.1.0" description = "LinkML Data Operations API" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "linkml_dataops-0.1.0-py3-none-any.whl", hash = "sha256:193cf7f659e5f07946d2c2761896910d5f7151d91282543b1363801f68307f4c"}, {file = "linkml_dataops-0.1.0.tar.gz", hash = "sha256:4550eab65e78b70dc3b9c651724a94ac2b1d1edb2fbe576465f1d6951a54ed04"}, @@ -911,6 +951,7 @@ version = "1.8.1" description = "Runtime environment for LinkML, the Linked open data modeling language" optional = false python-versions = "<4.0,>=3.8" +groups = ["main"] files = [ {file = "linkml_runtime-1.8.1-py3-none-any.whl", hash = "sha256:bb825462d1609cea055ccf23ee772c846d3e14d34dc8286fa64a132cbf4aa3d2"}, {file = "linkml_runtime-1.8.1.tar.gz", hash = "sha256:2839143c1518579bc4baeaec3b1b854d241e1b792ccecc756bb860c66a633a44"}, @@ -937,6 +978,7 @@ version = "2.1.5" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"}, {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"}, @@ -1006,6 +1048,7 @@ version = "6.0.5" description = "multidict implementation" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:228b644ae063c10e7f324ab1ab6b548bdf6f8b47f3ec234fef1093bc2735e5f9"}, {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:896ebdcf62683551312c30e20614305f53125750803b614e9e6ce74a96232604"}, @@ -1105,6 +1148,7 @@ version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." optional = false python-versions = ">=3.5" +groups = ["main"] files = [ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, @@ -1116,6 +1160,7 @@ version = "1.26.4" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, @@ -1161,6 +1206,7 @@ version = "3.1.5" description = "A Python library to read/write Excel 2010 xlsx/xlsm files" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2"}, {file = "openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"}, @@ -1175,6 +1221,7 @@ version = "24.1" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, @@ -1186,6 +1233,7 @@ version = "1.20.2" description = "parse() is the opposite of format()" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "parse-1.20.2-py2.py3-none-any.whl", hash = "sha256:967095588cb802add9177d0c0b6133b5ba33b1ea9007ca800e526f42a85af558"}, {file = "parse-1.20.2.tar.gz", hash = "sha256:b41d604d16503c79d81af5165155c0b20f6c8d6c559efa66b4b695c3e5a0a0ce"}, @@ -1197,6 +1245,7 @@ version = "0.12.1" description = "Utility library for gitignore style pattern matching of file paths." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, @@ -1208,6 +1257,7 @@ version = "4.2.2" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "platformdirs-4.2.2-py3-none-any.whl", hash = "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee"}, {file = "platformdirs-4.2.2.tar.gz", hash = "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3"}, @@ -1224,6 +1274,7 @@ version = "1.5.0" description = "plugin and hook calling mechanisms for python" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, @@ -1239,6 +1290,7 @@ version = "3.11" description = "Python Lex & Yacc" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"}, {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"}, @@ -1250,6 +1302,7 @@ version = "0.1.12" description = "A python API for working with ID prefixes" optional = false python-versions = ">=3.7,<4.0" +groups = ["main"] files = [ {file = "prefixcommons-0.1.12-py3-none-any.whl", hash = "sha256:16dbc0a1f775e003c724f19a694fcfa3174608f5c8b0e893d494cf8098ac7f8b"}, {file = "prefixcommons-0.1.12.tar.gz", hash = "sha256:22c4e2d37b63487b3ab48f0495b70f14564cb346a15220f23919eb0c1851f69f"}, @@ -1267,6 +1320,7 @@ version = "0.2.4" description = "A python library for retrieving semantic prefix maps" optional = false python-versions = "<4.0,>=3.8" +groups = ["main"] files = [ {file = "prefixmaps-0.2.4-py3-none-any.whl", hash = "sha256:89bf0e6fb08c276f754f9624c42adf2e87c64ee92a3dde1f7eff01f22d85b512"}, {file = "prefixmaps-0.2.4.tar.gz", hash = "sha256:ae86a1b31189d0516d199756d5808f75f44b39e86546c356cc78c0fe8d2078af"}, @@ -1282,6 +1336,7 @@ version = "0.2.1" description = "Accelerated property cache" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "propcache-0.2.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6b3f39a85d671436ee3d12c017f8fdea38509e4f25b28eb25877293c98c243f6"}, {file = "propcache-0.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39d51fbe4285d5db5d92a929e3e21536ea3dd43732c5b177c7ef03f918dff9f2"}, @@ -1373,6 +1428,7 @@ version = "2.8.2" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pydantic-2.8.2-py3-none-any.whl", hash = "sha256:73ee9fddd406dc318b885c7a2eab8a6472b68b8fb5ba8150949fc3db939f23c8"}, {file = "pydantic-2.8.2.tar.gz", hash = "sha256:6f62c13d067b0755ad1c21a34bdd06c0c12625a22b0fc09c6b149816604f7c2a"}, @@ -1395,6 +1451,7 @@ version = "2.20.1" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pydantic_core-2.20.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3acae97ffd19bf091c72df4d726d552c473f3576409b2a7ca36b2f535ffff4a3"}, {file = "pydantic_core-2.20.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:41f4c96227a67a013e7de5ff8f20fb496ce573893b7f4f2707d065907bffdbd6"}, @@ -1496,6 +1553,7 @@ version = "0.11.10" description = "Python JSON Schema Grammar interpreter" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "PyJSG-0.11.10-py3-none-any.whl", hash = "sha256:10af60ff42219be7e85bf7f11c19b648715b0b29eb2ddbd269e87069a7c3f26d"}, {file = "PyJSG-0.11.10.tar.gz", hash = "sha256:4bd6e3ff2833fa2b395bbe803a2d72a5f0bab5b7285bccd0da1a1bc0aee88bfa"}, @@ -1511,6 +1569,7 @@ version = "3.1.2" description = "pyparsing module - Classes and methods to define and execute parsing grammars" optional = false python-versions = ">=3.6.8" +groups = ["main"] files = [ {file = "pyparsing-3.1.2-py3-none-any.whl", hash = "sha256:f9db75911801ed778fe61bb643079ff86601aca99fcae6345aa67292038fb742"}, {file = "pyparsing-3.1.2.tar.gz", hash = "sha256:a1bac0ce561155ecc3ed78ca94d3c9378656ad4c94c1270de543f621420f94ad"}, @@ -1525,6 +1584,7 @@ version = "0.8.1" description = "Python ShEx Implementation" optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "PyShEx-0.8.1-py3-none-any.whl", hash = "sha256:6da1b10123e191abf8dcb6bf3e54aa3e1fcf771df5d1a0ed453217c8900c8e6a"}, {file = "PyShEx-0.8.1.tar.gz", hash = "sha256:3c5c4d45fe27faaadae803cb008c41acf8ee784da7868b04fd84967e75be70d0"}, @@ -1547,6 +1607,7 @@ version = "0.9.1" description = "PyShExC - Python ShEx compiler" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "PyShExC-0.9.1-py2.py3-none-any.whl", hash = "sha256:efc55ed5cb2453e9df569b03e282505e96bb06597934288f3b23dd980ef10028"}, {file = "PyShExC-0.9.1.tar.gz", hash = "sha256:35a9975d4b9afeb20ef710fb6680871756381d0c39fbb5470b3b506581a304d3"}, @@ -1566,6 +1627,7 @@ version = "8.2.2" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "pytest-8.2.2-py3-none-any.whl", hash = "sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343"}, {file = "pytest-8.2.2.tar.gz", hash = "sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977"}, @@ -1586,6 +1648,7 @@ version = "2015.11.4" description = "Configures logging and allows tweaking the log level with a py.test flag" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "pytest-logging-2015.11.4.tar.gz", hash = "sha256:cec5c85ecf18aab7b2ead5498a31b9f758680ef5a902b9054ab3f2bdbb77c896"}, ] @@ -1599,6 +1662,7 @@ version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main"] files = [ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, @@ -1613,6 +1677,7 @@ version = "0.4.0" description = "A pure Python implementation of the trie data structure." optional = false python-versions = "*" +groups = ["main"] files = [ {file = "PyTrie-0.4.0-py3-none-any.whl", hash = "sha256:f687c224ee8c66cda8e8628a903011b692635ffbb08d4b39c5f92b18eb78c950"}, {file = "PyTrie-0.4.0.tar.gz", hash = "sha256:8f4488f402d3465993fb6b6efa09866849ed8cda7903b50647b7d0342b805379"}, @@ -1627,6 +1692,7 @@ version = "6.0.1" description = "YAML parser and emitter for Python" optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, @@ -1687,6 +1753,7 @@ version = "7.0.0" description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information." optional = false python-versions = ">=3.8.1,<4.0.0" +groups = ["main"] files = [ {file = "rdflib-7.0.0-py3-none-any.whl", hash = "sha256:0438920912a642c866a513de6fe8a0001bd86ef975057d6962c79ce4771687cd"}, {file = "rdflib-7.0.0.tar.gz", hash = "sha256:9995eb8569428059b8c1affd26b25eac510d64f5043d9ce8c84e0d0036e995ae"}, @@ -1708,6 +1775,7 @@ version = "0.6.1" description = "rdflib extension adding JSON-LD parser and serializer" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "rdflib-jsonld-0.6.1.tar.gz", hash = "sha256:eda5a42a2e09f80d4da78e32b5c684bccdf275368f1541e6b7bcddfb1382a0e0"}, {file = "rdflib_jsonld-0.6.1-py2.py3-none-any.whl", hash = "sha256:bcf84317e947a661bae0a3f2aee1eced697075fc4ac4db6065a3340ea0f10fc2"}, @@ -1722,6 +1790,7 @@ version = "1.0.3" description = "Shim for rdflib 5 and 6 incompatibilities" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "rdflib_shim-1.0.3-py3-none-any.whl", hash = "sha256:7a853e7750ef1e9bf4e35dea27d54e02d4ed087de5a9e0c329c4a6d82d647081"}, {file = "rdflib_shim-1.0.3.tar.gz", hash = "sha256:d955d11e2986aab42b6830ca56ac6bc9c893abd1d049a161c6de2f1b99d4fc0d"}, @@ -1737,6 +1806,7 @@ version = "0.35.1" description = "JSON Referencing + Python" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "referencing-0.35.1-py3-none-any.whl", hash = "sha256:eda6d3234d62814d1c64e305c1331c9a3a6132da475ab6382eaa997b21ee75de"}, {file = "referencing-0.35.1.tar.gz", hash = "sha256:25b42124a6c8b632a425174f24087783efb348a6f1e0008e63cd4466fedf703c"}, @@ -1752,6 +1822,7 @@ version = "2.32.3" description = "Python HTTP for Humans." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, @@ -1773,6 +1844,7 @@ version = "0.1.4" description = "A pure python RFC3339 validator" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +groups = ["main"] files = [ {file = "rfc3339_validator-0.1.4-py2.py3-none-any.whl", hash = "sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa"}, {file = "rfc3339_validator-0.1.4.tar.gz", hash = "sha256:138a2abdf93304ad60530167e51d2dfb9549521a836871b88d7f4695d0022f6b"}, @@ -1787,6 +1859,7 @@ version = "1.3.8" description = "Parsing and validation of URIs (RFC 3986) and IRIs (RFC 3987)" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "rfc3987-1.3.8-py2.py3-none-any.whl", hash = "sha256:10702b1e51e5658843460b189b185c0366d2cf4cff716f13111b0ea9fd2dce53"}, {file = "rfc3987-1.3.8.tar.gz", hash = "sha256:d3c4d257a560d544e9826b38bc81db676890c79ab9d7ac92b39c7a253d5ca733"}, @@ -1798,6 +1871,7 @@ version = "0.19.0" description = "Python bindings to Rust's persistent data structures (rpds)" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "rpds_py-0.19.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:fb37bd599f031f1a6fb9e58ec62864ccf3ad549cf14bac527dbfa97123edcca4"}, {file = "rpds_py-0.19.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3384d278df99ec2c6acf701d067147320b864ef6727405d6470838476e44d9e8"}, @@ -1906,6 +1980,7 @@ version = "0.18.6" description = "ruamel.yaml is a YAML parser/emitter that supports roundtrip preservation of comments, seq/map flow style, and map key order" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "ruamel.yaml-0.18.6-py3-none-any.whl", hash = "sha256:57b53ba33def16c4f3d807c0ccbc00f8a6081827e81ba2491691b76882d0c636"}, {file = "ruamel.yaml-0.18.6.tar.gz", hash = "sha256:8b27e6a217e786c6fbe5634d8f3f11bc63e0f80f6a5890f28863d9c45aac311b"}, @@ -1924,6 +1999,8 @@ version = "0.2.8" description = "C version of reader, parser and emitter for ruamel.yaml derived from libyaml" optional = false python-versions = ">=3.6" +groups = ["main"] +markers = "python_version < \"3.13\" and platform_python_implementation == \"CPython\"" files = [ {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b42169467c42b692c19cf539c38d4602069d8c1505e97b86387fcf7afb766e1d"}, {file = "ruamel.yaml.clib-0.2.8-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:07238db9cbdf8fc1e9de2489a4f68474e70dffcb32232db7c08fa61ca0c7c462"}, @@ -1983,6 +2060,7 @@ version = "0.8.2" description = "ShExJSG - Astract Syntax Tree for the ShEx 2.0 language" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "ShExJSG-0.8.2-py2.py3-none-any.whl", hash = "sha256:3b0d8432dd313bee9e1343382c5e02e9908dd941a7dd7342bf8c0200fe523766"}, {file = "ShExJSG-0.8.2.tar.gz", hash = "sha256:f17a629fc577fa344382bdee143cd9ff86588537f9f811f66cea6f63cdbcd0b6"}, @@ -1997,6 +2075,7 @@ version = "1.16.0" description = "Python 2 and 3 compatibility utilities" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +groups = ["main"] files = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, @@ -2008,6 +2087,7 @@ version = "2.4.0" description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" optional = false python-versions = "*" +groups = ["main"] files = [ {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"}, {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"}, @@ -2019,6 +2099,7 @@ version = "0.5.1" description = "SPARQL Slurper for rdflib" optional = false python-versions = ">=3.7.4" +groups = ["main"] files = [ {file = "sparqlslurper-0.5.1-py3-none-any.whl", hash = "sha256:ae49b2d8ce3dd38df7a40465b228ad5d33fb7e11b3f248d195f9cadfc9cfff87"}, {file = "sparqlslurper-0.5.1.tar.gz", hash = "sha256:9282ebb064fc6152a58269d194cb1e7b275b0f095425a578d75b96dcc851f546"}, @@ -2035,6 +2116,7 @@ version = "2.0.0" description = "SPARQL Endpoint interface to Python" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "SPARQLWrapper-2.0.0-py3-none-any.whl", hash = "sha256:c99a7204fff676ee28e6acef327dc1ff8451c6f7217dcd8d49e8872f324a8a20"}, {file = "SPARQLWrapper-2.0.0.tar.gz", hash = "sha256:3fed3ebcc77617a4a74d2644b86fd88e0f32e7f7003ac7b2b334c026201731f1"}, @@ -2055,6 +2137,7 @@ version = "2.0.31" description = "Database Abstraction Library" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "SQLAlchemy-2.0.31-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f2a213c1b699d3f5768a7272de720387ae0122f1becf0901ed6eaa1abd1baf6c"}, {file = "SQLAlchemy-2.0.31-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9fea3d0884e82d1e33226935dac990b967bef21315cbcc894605db3441347443"}, @@ -2142,6 +2225,7 @@ version = "2.9.0.20240316" description = "Typing stubs for python-dateutil" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "types-python-dateutil-2.9.0.20240316.tar.gz", hash = "sha256:5d2f2e240b86905e40944dd787db6da9263f0deabef1076ddaed797351ec0202"}, {file = "types_python_dateutil-2.9.0.20240316-py3-none-any.whl", hash = "sha256:6b8cb66d960771ce5ff974e9dd45e38facb81718cc1e208b10b1baccbfdbee3b"}, @@ -2153,6 +2237,7 @@ version = "4.12.2" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, @@ -2164,6 +2249,7 @@ version = "1.3.0" description = "RFC 6570 URI Template Processor" optional = false python-versions = ">=3.7" +groups = ["main"] files = [ {file = "uri-template-1.3.0.tar.gz", hash = "sha256:0e00f8eb65e18c7de20d595a14336e9f337ead580c70934141624b6d1ffdacc7"}, {file = "uri_template-1.3.0-py3-none-any.whl", hash = "sha256:a44a133ea12d44a0c0f06d7d42a52d71282e77e2f937d8abd5655b8d56fc1363"}, @@ -2178,13 +2264,14 @@ version = "2.2.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"}, {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"}, ] [package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] @@ -2195,6 +2282,7 @@ version = "4.0.1" description = "Filesystem events monitoring" optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "watchdog-4.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:da2dfdaa8006eb6a71051795856bedd97e5b03e57da96f98e375682c48850645"}, {file = "watchdog-4.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e93f451f2dfa433d97765ca2634628b789b49ba8b504fdde5837cdcf25fdb53b"}, @@ -2239,6 +2327,7 @@ version = "24.6.0" description = "A library for working with the color formats defined by HTML and CSS." optional = false python-versions = ">=3.8" +groups = ["main"] files = [ {file = "webcolors-24.6.0-py3-none-any.whl", hash = "sha256:8cf5bc7e28defd1d48b9e83d5fc30741328305a8195c29a8e668fa45586568a1"}, {file = "webcolors-24.6.0.tar.gz", hash = "sha256:1d160d1de46b3e81e58d0a280d0c78b467dc80f47294b91b1ad8029d2cedb55b"}, @@ -2254,6 +2343,7 @@ version = "1.16.0" description = "Module for decorators, wrappers and monkey patching." optional = false python-versions = ">=3.6" +groups = ["main"] files = [ {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, {file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"}, @@ -2333,6 +2423,7 @@ version = "1.18.3" description = "Yet another URL library" optional = false python-versions = ">=3.9" +groups = ["main"] files = [ {file = "yarl-1.18.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7df647e8edd71f000a5208fe6ff8c382a1de8edfbccdbbfe649d263de07d8c34"}, {file = "yarl-1.18.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c69697d3adff5aa4f874b19c0e4ed65180ceed6318ec856ebc423aa5850d84f7"}, @@ -2424,6 +2515,6 @@ multidict = ">=4.0" propcache = ">=0.2.0" [metadata] -lock-version = "2.0" +lock-version = "2.1" python-versions = "^3.11" -content-hash = "397a7d253118dbd69f6e82c0bb6d84496c74483c3ec67e92a12e5f6c7efed0bd" +content-hash = "52bf24dea817abad6240d89b2ff7cca89581723e1edcd7173b41613b7cc0925b" diff --git a/schema/pyproject.toml b/schema/pyproject.toml index bcf1b4fcf..82ef33099 100644 --- a/schema/pyproject.toml +++ b/schema/pyproject.toml @@ -21,7 +21,7 @@ python = "^3.11" aiohttp = "^3.10.11" async-lru = "^2.0.4" click = "^8.1.7" -linkml = "^1.8.2" +linkml = "1.8.2" numpy = "^1.26.4" pydantic = "^2.8.2" typing-extensions = "^4.12.2" diff --git a/test_infra/test_files/30001/RUN1/Reconstructions/VoxelSpacing12.300/Annotations/100/foo-1.0.json b/test_infra/test_files/30001/RUN1/Reconstructions/VoxelSpacing12.300/Annotations/100/foo-1.0.json index 0496e6aa6..94667aac9 100644 --- a/test_infra/test_files/30001/RUN1/Reconstructions/VoxelSpacing12.300/Annotations/100/foo-1.0.json +++ b/test_infra/test_files/30001/RUN1/Reconstructions/VoxelSpacing12.300/Annotations/100/foo-1.0.json @@ -53,6 +53,18 @@ "path": "30001/RUN1/Reconstructions/VoxelSpacing12.300/Annotations/100-foo-1.0_segmask.zarr", "shape": "SegmentationMask", "is_visualization_default": false + }, + { + "format": "mrc", + "path": "30001/RUN1/Reconstructions/VoxelSpacing12.300/Annotations/100-foo-1.0_instancesegmask.mrc", + "shape": "InstanceSegmentationMask", + "is_visualization_default": false + }, + { + "format": "zarr", + "path": "30001/RUN1/Reconstructions/VoxelSpacing12.300/Annotations/100-foo-1.0_instancesegmask.zarr", + "shape": "InstanceSegmentationMask", + "is_visualization_default": false } ], "method_links": [