From d4ffa2744e6c7672e79aee042844d30becfff3c3 Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Tue, 8 Apr 2025 13:21:54 +0300 Subject: [PATCH 01/18] add migrations to the manifest --- .../declarative_component_schema.yaml | 39 +- .../manifest_declarative_source.py | 33 +- .../models/declarative_component_schema.py | 23 +- .../declarative/parsers/custom_exceptions.py | 18 + .../parsers/manifest_component_transformer.py | 4 +- .../parsers/manifest_migration_handler.py | 217 +++ .../parsers/model_to_component_factory.py | 13 +- .../declarative/requesters/http_requester.py | 79 +- .../declarative/requesters/requester.py | 12 + bin/generate_component_manifest_files.py | 3 + .../test_connector_builder_handler.py | 6 +- .../sources/declarative/parsers/conftest.py | 1235 +++++++++++++++++ .../parsers/test_manifest_migration.py | 49 + 13 files changed, 1694 insertions(+), 37 deletions(-) create mode 100644 airbyte_cdk/sources/declarative/parsers/manifest_migration_handler.py create mode 100644 unit_tests/sources/declarative/parsers/conftest.py create mode 100644 unit_tests/sources/declarative/parsers/test_manifest_migration.py diff --git a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml index 2f09579ba..63d27a8e8 100644 --- a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +++ b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml @@ -1858,19 +1858,32 @@ definitions: type: enum: [Bearer] HttpRequester: + migrations: + - type: replace_field + description: The `url_base` has been deprecated, in favor of the `url` field. + original_key: url_base + replacement_key: url + - type: handle_url_parts + description: The `path` has been deprecated, in favor of the `url` field. The value from the `path` field will be joined to the `url` field. + original_key: path + replacement_key: url + - type: remove_field + description: The `path` has been deprecated, in favor of the `url` field. + original_key: path title: HTTP Requester description: Requester submitting HTTP requests and extracting records from the response. type: object required: - type - - url_base properties: type: type: string enum: [HttpRequester] url_base: + deprecated: true + sharable: true title: API Base URL - description: Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this. + description: Deprecated, use the `url` instead. Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this. type: string interpolation_context: - config @@ -1886,9 +1899,29 @@ definitions: - "{{ config['base_url'] or 'https://app.posthog.com'}}/api" - "https://connect.squareup.com/v2/quotes/{{ stream_partition['id'] }}/quote_line_groups" - "https://example.com/api/v1/resource/{{ next_page_token['id'] }}" + url: + sharable: true + title: API URL + description: The URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this. + type: string + interpolation_context: + - config + - next_page_token + - stream_interval + - stream_partition + - stream_slice + - creation_response + - polling_response + - download_target + examples: + - "https://connect.squareup.com/v2" + - "{{ config['url'] or 'https://app.posthog.com'}}/api" + - "https://connect.squareup.com/v2/quotes/{{ stream_partition['id'] }}/quote_line_groups" + - "https://example.com/api/v1/resource/{{ next_page_token['id'] }}" path: + deprecated: true title: URL Path - description: Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this. + description: Deprecated, use the `url` instead. Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this. type: string interpolation_context: - config diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py index cfd258c6c..a8683dd8a 100644 --- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py +++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py @@ -39,6 +39,9 @@ from airbyte_cdk.sources.declarative.parsers.manifest_component_transformer import ( ManifestComponentTransformer, ) +from airbyte_cdk.sources.declarative.parsers.manifest_migration_handler import ( + ManifestMigrationHandler, +) from airbyte_cdk.sources.declarative.parsers.manifest_reference_resolver import ( ManifestReferenceResolver, ) @@ -57,6 +60,24 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException +def _get_declarative_component_schema() -> Dict[str, Any]: + try: + raw_component_schema = pkgutil.get_data( + "airbyte_cdk", "sources/declarative/declarative_component_schema.yaml" + ) + if raw_component_schema is not None: + declarative_component_schema = yaml.load(raw_component_schema, Loader=yaml.SafeLoader) + return declarative_component_schema # type: ignore + else: + raise RuntimeError( + "Failed to read manifest component json schema required for deduplication" + ) + except FileNotFoundError as e: + raise FileNotFoundError( + f"Failed to read manifest component json schema required for deduplication: {e}" + ) + + class ManifestDeclarativeSource(DeclarativeSource): """Declarative source defined by a manifest of low-code components that define source connector behavior""" @@ -68,7 +89,7 @@ def __init__( debug: bool = False, emit_connector_builder_messages: bool = False, component_factory: Optional[ModelToComponentFactory] = None, - ): + ) -> None: """ Args: config: The provided config dict. @@ -78,6 +99,8 @@ def __init__( component_factory: optional factory if ModelToComponentFactory's default behavior needs to be tweaked. """ self.logger = logging.getLogger(f"airbyte.{self.name}") + + self._declarative_component_schema = _get_declarative_component_schema() # For ease of use we don't require the type to be specified at the top level manifest, but it should be included during processing manifest = dict(source_config) if "type" not in manifest: @@ -90,7 +113,13 @@ def __init__( propagated_source_config = ManifestComponentTransformer().propagate_types_and_parameters( "", resolved_source_config, {} ) - self._source_config = propagated_source_config + + # migrate definitions to the new format, if any are present + migrated_source_config = ManifestMigrationHandler( + propagated_source_config, self._declarative_component_schema + ).migrate() + + self._source_config = migrated_source_config self._debug = debug self._emit_connector_builder_messages = emit_connector_builder_messages self._constructor = ( diff --git a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py index 3566abef4..8d1a8fbe9 100644 --- a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +++ b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -1,5 +1,3 @@ -# Copyright (c) 2025 Airbyte, Inc., all rights reserved. - # generated by datamodel-codegen: # filename: declarative_component_schema.yaml @@ -2149,9 +2147,10 @@ class SessionTokenAuthenticator(BaseModel): class HttpRequester(BaseModel): type: Literal["HttpRequester"] - url_base: str = Field( - ..., - description="Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.", + url_base: Optional[str] = Field( + None, + deprecated=True, + description="Deprecated, use the `url` instead. Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.", examples=[ "https://connect.squareup.com/v2", "{{ config['base_url'] or 'https://app.posthog.com'}}/api", @@ -2160,9 +2159,21 @@ class HttpRequester(BaseModel): ], title="API Base URL", ) + url: Optional[str] = Field( + None, + description="The URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.", + examples=[ + "https://connect.squareup.com/v2", + "{{ config['url'] or 'https://app.posthog.com'}}/api", + "https://connect.squareup.com/v2/quotes/{{ stream_partition['id'] }}/quote_line_groups", + "https://example.com/api/v1/resource/{{ next_page_token['id'] }}", + ], + title="API URL", + ) path: Optional[str] = Field( None, - description="Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.", + deprecated=True, + description="Deprecated, use the `url` instead. Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.", examples=[ "/products", "/quotes/{{ stream_partition['id'] }}/quote_line_groups", diff --git a/airbyte_cdk/sources/declarative/parsers/custom_exceptions.py b/airbyte_cdk/sources/declarative/parsers/custom_exceptions.py index d6fdee695..79700e269 100644 --- a/airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +++ b/airbyte_cdk/sources/declarative/parsers/custom_exceptions.py @@ -19,3 +19,21 @@ class UndefinedReferenceException(Exception): def __init__(self, path: str, reference: str) -> None: super().__init__(f"Undefined reference {reference} from {path}") + + +class ManifestNormalizationException(Exception): + """ + Raised when a circular reference is detected in a manifest. + """ + + def __init__(self, message: str) -> None: + super().__init__(f"Failed to deduplicate manifest: {message}") + + +class ManifestMigrationException(Exception): + """ + Raised when a migration error occurs in the manifest. + """ + + def __init__(self, message: str) -> None: + super().__init__(f"Failed to migrate the manifest: {message}") diff --git a/airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py b/airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py index 6779b54ab..44f414343 100644 --- a/airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +++ b/airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py @@ -4,7 +4,7 @@ import copy import typing -from typing import Any, Mapping, Optional +from typing import Any, Dict, Mapping, Optional PARAMETERS_STR = "$parameters" @@ -95,7 +95,7 @@ def propagate_types_and_parameters( declarative_component: Mapping[str, Any], parent_parameters: Mapping[str, Any], use_parent_parameters: Optional[bool] = None, - ) -> Mapping[str, Any]: + ) -> Dict[str, Any]: """ Recursively transforms the specified declarative component and subcomponents to propagate parameters and insert the default component type if it was not already present. The resulting transformed components are a deep copy of the input diff --git a/airbyte_cdk/sources/declarative/parsers/manifest_migration_handler.py b/airbyte_cdk/sources/declarative/parsers/manifest_migration_handler.py new file mode 100644 index 000000000..204480540 --- /dev/null +++ b/airbyte_cdk/sources/declarative/parsers/manifest_migration_handler.py @@ -0,0 +1,217 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import copy +from typing import Any, Callable, Dict, List, Optional, Tuple +from urllib.parse import urljoin + +from airbyte_cdk.sources.declarative.parsers.custom_exceptions import ManifestMigrationException +from airbyte_cdk.sources.types import EmptyString + +# Type definitions for better readability +ManifestType = Dict[str, Any] +DefinitionsType = Dict[str, Any] +MigrationsType = List[Tuple[str, str, Optional[str]]] +MigrationType = Tuple[str, str, Optional[str]] +MigratedTagsType = Dict[str, List[Tuple[str, str, Optional[str]]]] +MigrationFunctionType = Callable[[Any, MigrationType], None] + + +# Configuration constants +TYPE_TAG = "type" +DEF_TAG = "definitions" +MIGRATIONS_TAG = "migrations" +ORIGINAL_KEY = "original_key" +REPLACEMENT_KEY = "replacement_key" + + +# disable migrations for these types +NON_MIGRATABLE_TYPES = [ + "DynamicDeclarativeStream", +] + + +class ManifestMigrationHandler: + """ + This class is responsible for handling migrations in the manifest. + It provides methods to migrate migrated fields and values to their new equivalents. + """ + + @property + def _migration_type_mapping(self) -> Dict[str, MigrationFunctionType]: + """ + Returns a mapping of migration types to their handler functions. + + This method defines how different types of migrations should be handled by mapping + migration type identifiers to their corresponding handler methods. + + Returns: + Dict[str, MigrationFunctionType]: A dictionary mapping migration types to handler functions: + - "replace_field": Handler for replacing migrated fields with new ones + - "remove_field": Handler for removing migrated fields + - "handle_url_parts": Handler for processing URL parts + """ + + return { + "replace_field": self._replace_migrated_field, + "remove_field": self._remove_migrated_field, + # the specific type of migration, handles the url parts and verifies the url is correct + "handle_url_parts": self._handle_url_parts, + } + + def __init__( + self, + manifest: ManifestType, + declarative_schema: DefinitionsType, + ) -> None: + self._manifest = manifest + self._declarative_schema = declarative_schema + + self._migrated_manifest: ManifestType = copy.deepcopy(self._manifest) + # get the declared migrations from schema + self._migration_tags = self._get_migration_schema_tags(self._declarative_schema) + + def migrate(self) -> ManifestType: + try: + for component_type, migrations in self._migration_tags.items(): + self._handle_migrations(component_type, migrations) + return self._migrated_manifest + except ManifestMigrationException as e: + # if any errors occurs we return the original resolved manifest + return self._manifest + + def _get_migration_schema_tags(self, schema: DefinitionsType) -> MigratedTagsType: + """ + Extracts sharable tags from schema definitions. + This function identifies properties within a schema's definitions that have the `migrations` object. + + Args: + schema (DefinitionsType): The schema definition dictionary to process + + Returns: + migrations_tags: A set of migrated tags found in the schema definitions. + """ + + # the migrated tags scope: ['definitions.*'] + schema_definitions = schema.get(DEF_TAG, {}) + migrations_tags: MigratedTagsType = {} + + for component_name, component_declaration in schema_definitions.items(): + if MIGRATIONS_TAG in component_declaration.keys(): + # create the placeholder for the migrations + migrations_tags[component_name] = [] + # iterate over the migrations + for migration in component_declaration[MIGRATIONS_TAG]: + migrations_tags[component_name].append( + ( + # type of migration + migration.get(TYPE_TAG), + # what is the migrated key + migration.get(ORIGINAL_KEY), + # (optional) what is the new key to be used + migration.get(REPLACEMENT_KEY), + ), + ) + + return migrations_tags + + def _handle_migrations( + self, + component_type: str, + migrations: MigrationsType, + ) -> None: + """ + Recursively replaces all occurrences of migrated_key with new_key in the normalized manifest. + + The structure of the `migration` Tuple is: + ( + migration[TYPE_TAG] -- type of migration, + migration["original_key"] -- what is the migrated key, + migration["replacement_key"] -- what is the new key to be used, + ) + """ + try: + for migration in migrations: + self._process_migration(self._migrated_manifest, component_type, migration) + except Exception as e: + raise ManifestMigrationException(f"Failed to migrate the manifest: {e}") from e + + def _process_migration(self, obj: Any, component_type: str, migration: MigrationType) -> None: + migration_type, migrated_key, _ = migration + + if isinstance(obj, dict): + obj_keys = obj.keys() + + # check for component type match the designed migration + if TYPE_TAG in obj_keys: + obj_type = obj[TYPE_TAG] + # do not migrate if the type is not in the list of migratable types + if obj_type in NON_MIGRATABLE_TYPES: + return + if obj_type == component_type and migrated_key in obj_keys: + if migration_type in self._migration_type_mapping.keys(): + # Call the appropriate function based on the migration type + self._migration_type_mapping[migration_type](obj, migration) + + # Process all values in the dictionary + for v in list(obj.values()): + self._process_migration(v, component_type, migration) + + elif isinstance(obj, list): + # Process all items in the list + for item in obj: + self._process_migration(item, component_type, migration) + + ## Migration Functions + def _replace_migrated_field( + self, + obj: Any, + migration: MigrationType, + ) -> None: + """ + Replaces the migrated field with the new field in the object. + The value of the migrated field is copied to the new field. + """ + _, original_key, replacement_key = migration + + obj[replacement_key] = obj[original_key] + obj.pop(original_key, None) + + def _handle_url_parts( + self, + obj: Any, + migration: MigrationType, + ) -> None: + """ + Handles the migration of URL parts by joining the original key with the replacement key. + The value of the original key is joined with the replacement key to form a full URL. + """ + _, original_key, replacement_key = migration + + original_key_value = obj[original_key].lstrip("/") + replacement_key_value = obj[replacement_key] + + # return a full-url if provided directly from interpolation context + if original_key_value == EmptyString or original_key_value is None: + obj[replacement_key] = replacement_key_value + else: + # since we didn't provide a full-url, the url_base might not have a trailing slash + # so we join the url_base and path correctly + if not replacement_key_value.endswith("/"): + replacement_key_value += "/" + + obj[replacement_key] = urljoin(replacement_key_value, original_key_value) + + def _remove_migrated_field( + self, + obj: Any, + migration: MigrationType, + ) -> None: + """ + Removes the migrated field from the object. + The value of the migrated field is neglected. + """ + _, original_key, _ = migration + + obj.pop(original_key, None) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 25840f06f..e87caff5d 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -2146,7 +2146,7 @@ def create_http_requester( self._create_component_from_model( model=model.authenticator, config=config, - url_base=model.url_base, + url_base=model.url or model.url_base, name=name, decoder=decoder, ) @@ -2183,6 +2183,7 @@ def create_http_requester( return HttpRequester( name=name, + url=model.url, url_base=model.url_base, path=model.path, authenticator=authenticator, @@ -2925,11 +2926,17 @@ def create_simple_retriever( use_cache=use_cache, config=config, ) - url_base = ( + _url = ( + model.requester.url + if hasattr(model.requester, "url") and model.requester.url is not None + else requester.get_url() + ) + _url_base = ( model.requester.url_base - if hasattr(model.requester, "url_base") + if hasattr(model.requester, "url_base") and model.requester.url_base is not None else requester.get_url_base() ) + url_base = _url or _url_base # Define cursor only if per partition or common incremental support is needed cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None diff --git a/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte_cdk/sources/declarative/requesters/http_requester.py index 78c07b725..6b0e65aab 100644 --- a/airbyte_cdk/sources/declarative/requesters/http_requester.py +++ b/airbyte_cdk/sources/declarative/requesters/http_requester.py @@ -3,7 +3,6 @@ # import logging -import os from dataclasses import InitVar, dataclass, field from typing import Any, Callable, Mapping, MutableMapping, Optional, Union from urllib.parse import urljoin @@ -53,10 +52,11 @@ class HttpRequester(Requester): """ name: str - url_base: Union[InterpolatedString, str] config: Config parameters: InitVar[Mapping[str, Any]] + url: Optional[Union[InterpolatedString, str]] = None + url_base: Optional[Union[InterpolatedString, str]] = None path: Optional[Union[InterpolatedString, str]] = None authenticator: Optional[DeclarativeAuthenticator] = None http_method: Union[str, HttpMethod] = HttpMethod.GET @@ -71,7 +71,14 @@ class HttpRequester(Requester): decoder: Decoder = field(default_factory=lambda: JsonDecoder(parameters={})) def __post_init__(self, parameters: Mapping[str, Any]) -> None: - self._url_base = InterpolatedString.create(self.url_base, parameters=parameters) + self._url = InterpolatedString.create( + self.url if self.url else EmptyString, parameters=parameters + ) + # deprecated + self._url_base = InterpolatedString.create( + self.url_base if self.url_base else EmptyString, parameters=parameters + ) + # deprecated self._path = InterpolatedString.create( self.path if self.path else EmptyString, parameters=parameters ) @@ -120,6 +127,51 @@ def exit_on_rate_limit(self, value: bool) -> None: def get_authenticator(self) -> DeclarativeAuthenticator: return self._authenticator + def get_url( + self, + *, + stream_state: Optional[StreamState] = None, + stream_slice: Optional[StreamSlice] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> str: + interpolation_context = get_interpolation_context( + stream_state=stream_state, + stream_slice=stream_slice, + next_page_token=next_page_token, + ) + + return str(self._url.eval(self.config, **interpolation_context)) + + def _get_url( + self, + *, + path: Optional[str] = None, + stream_state: Optional[StreamState] = None, + stream_slice: Optional[StreamSlice] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> str: + url = self.get_url( + stream_state=stream_state, + stream_slice=stream_slice, + next_page_token=next_page_token, + ) + + url_base = self.get_url_base( + stream_state=stream_state, + stream_slice=stream_slice, + next_page_token=next_page_token, + ) + + path = path or self.get_path( + stream_state=stream_state, + stream_slice=stream_slice, + next_page_token=next_page_token, + ) + + full_url = self._join_url(url_base, path) if url_base else url + path if path else url + + return full_url + def get_url_base( self, *, @@ -349,7 +401,7 @@ def _request_body_json( return options @classmethod - def _join_url(cls, url_base: str, path: str) -> str: + def _join_url(cls, url_base: str, path: Optional[str] = None) -> str: """ Joins a base URL with a given path and returns the resulting URL with any trailing slash removed. @@ -358,7 +410,7 @@ def _join_url(cls, url_base: str, path: str) -> str: Args: url_base (str): The base URL to which the path will be appended. - path (str): The path to join with the base URL. + path (Optional[str]): The path to join with the base URL. Returns: str: The resulting joined URL. @@ -399,18 +451,11 @@ def send_request( ) -> Optional[requests.Response]: request, response = self._http_client.send_request( http_method=self.get_method().value, - url=self._join_url( - self.get_url_base( - stream_state=stream_state, - stream_slice=stream_slice, - next_page_token=next_page_token, - ), - path - or self.get_path( - stream_state=stream_state, - stream_slice=stream_slice, - next_page_token=next_page_token, - ), + url=self._get_url( + path=path, + stream_state=stream_state, + stream_slice=stream_slice, + next_page_token=next_page_token, ), request_kwargs={"stream": self.stream_response}, headers=self._request_headers( diff --git a/airbyte_cdk/sources/declarative/requesters/requester.py b/airbyte_cdk/sources/declarative/requesters/requester.py index ddda1ddba..97b31e884 100644 --- a/airbyte_cdk/sources/declarative/requesters/requester.py +++ b/airbyte_cdk/sources/declarative/requesters/requester.py @@ -34,6 +34,18 @@ def get_authenticator(self) -> DeclarativeAuthenticator: """ pass + @abstractmethod + def get_url( + self, + *, + stream_state: Optional[StreamState], + stream_slice: Optional[StreamSlice], + next_page_token: Optional[Mapping[str, Any]], + ) -> str: + """ + :return: URL base for the API endpoint e.g: if you wanted to hit https://myapi.com/v1/some_entity then this should return "https://myapi.com/v1/" + """ + @abstractmethod def get_url_base( self, diff --git a/bin/generate_component_manifest_files.py b/bin/generate_component_manifest_files.py index 43f9b568e..732a9ec0b 100755 --- a/bin/generate_component_manifest_files.py +++ b/bin/generate_component_manifest_files.py @@ -75,6 +75,9 @@ async def main(): "--set-default-enum-member", "--use-double-quotes", "--remove-special-field-name-prefix", + # account the `deprecated` flag provided for the field. + "--field-extra-keys", + "deprecated", ], use_entrypoint=True, ) diff --git a/unit_tests/connector_builder/test_connector_builder_handler.py b/unit_tests/connector_builder/test_connector_builder_handler.py index 5c537811b..3af646597 100644 --- a/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/unit_tests/connector_builder/test_connector_builder_handler.py @@ -606,7 +606,6 @@ def test_resolve_manifest(valid_resolve_manifest_config_file): }, "requester": { "type": "HttpRequester", - "path": "/v3/marketing/lists", "authenticator": { "type": "BearerAuthenticator", "api_token": "{{ config.apikey }}", @@ -618,7 +617,7 @@ def test_resolve_manifest(valid_resolve_manifest_config_file): "request_parameters": {"a_param": "10"}, "name": _stream_name, "primary_key": _stream_primary_key, - "url_base": _stream_url_base, + "url": _stream_url_base + "/v3/marketing/lists", "$parameters": _stream_options, }, "partition_router": { @@ -1493,7 +1492,6 @@ def test_full_resolve_manifest(valid_resolve_manifest_config_file): }, }, "requester": { - "path": "/v3/marketing/lists", "authenticator": { "type": "BearerAuthenticator", "api_token": "{{ config.apikey }}", @@ -1510,7 +1508,7 @@ def test_full_resolve_manifest(valid_resolve_manifest_config_file): "type": "HttpRequester", "name": "stream_with_custom_requester", "primary_key": "id", - "url_base": "https://10.0.27.27/api/v1/", + "url": "https://10.0.27.27/api/v1/v3/marketing/lists", "$parameters": { "name": "stream_with_custom_requester", "primary_key": "id", diff --git a/unit_tests/sources/declarative/parsers/conftest.py b/unit_tests/sources/declarative/parsers/conftest.py new file mode 100644 index 000000000..6de10170e --- /dev/null +++ b/unit_tests/sources/declarative/parsers/conftest.py @@ -0,0 +1,1235 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from typing import Any, Dict + +import pytest + + +@pytest.fixture +def manifest_with_multiple_url_base() -> Dict[str, Any]: + return { + "type": "DeclarativeSource", + "definitions": { + "streams": { + "A": { + "type": "DeclarativeStream", + "name": "A", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "$ref": "#/definitions/requester_A", + "path": "A", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + "decoder": {"type": "JsonDecoder"}, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/A"}, + }, + }, + "B": { + "type": "DeclarativeStream", + "name": "B", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "$ref": "#/definitions/requester_B", + "path": "B", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + "decoder": {"type": "JsonDecoder"}, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/B"}, + }, + }, + "C": { + "type": "DeclarativeStream", + "name": "C", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "$ref": "#/definitions/requester_A", + "path": "C", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + "decoder": {"type": "JsonDecoder"}, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/C"}, + }, + }, + "D": { + "type": "DeclarativeStream", + "name": "D", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "$ref": "#/definitions/requester_B", + "path": "D", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + "decoder": {"type": "JsonDecoder"}, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/D"}, + }, + }, + "E": { + "type": "DeclarativeStream", + "name": "E", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "$ref": "#/definitions/requester_B", + "path": "E", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + "decoder": {"type": "JsonDecoder"}, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/E"}, + }, + }, + }, + # dummy requesters to be resolved and deduplicated + # to the shared `url_base` in the `definitions.shared` section + "requester_A": { + "type": "HttpRequester", + "url_base": "https://example.com/v1/", + }, + "requester_B": { + "type": "HttpRequester", + "url_base": "https://example.com/v2/", + }, + }, + "streams": [ + {"$ref": "#/definitions/streams/A"}, + {"$ref": "#/definitions/streams/B"}, + {"$ref": "#/definitions/streams/C"}, + {"$ref": "#/definitions/streams/D"}, + {"$ref": "#/definitions/streams/E"}, + ], + "schemas": { + "A": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {}, + }, + "B": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {}, + }, + "C": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {}, + }, + "D": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {}, + }, + "E": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {}, + }, + }, + } + + +@pytest.fixture +def expected_manifest_with_multiple_url_base_normalized() -> Dict[str, Any]: + return { + "type": "DeclarativeSource", + "definitions": {"shared": {"HttpRequester": {"url_base": "https://example.com/v2/"}}}, + "streams": [ + { + "type": "DeclarativeStream", + "name": "A", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "url_base": "https://example.com/v1/", + "path": "A", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + "decoder": {"type": "JsonDecoder"}, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/A"}, + }, + }, + { + "type": "DeclarativeStream", + "name": "B", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "url_base": {"$ref": "#/definitions/shared/HttpRequester/url_base"}, + "path": "B", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + "decoder": {"type": "JsonDecoder"}, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/B"}, + }, + }, + { + "type": "DeclarativeStream", + "name": "C", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "url_base": "https://example.com/v1/", + "path": "C", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + "decoder": {"type": "JsonDecoder"}, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/C"}, + }, + }, + { + "type": "DeclarativeStream", + "name": "D", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "url_base": {"$ref": "#/definitions/shared/HttpRequester/url_base"}, + "path": "D", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + "decoder": {"type": "JsonDecoder"}, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/D"}, + }, + }, + { + "type": "DeclarativeStream", + "name": "E", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "url_base": {"$ref": "#/definitions/shared/HttpRequester/url_base"}, + "path": "E", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + "decoder": {"type": "JsonDecoder"}, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/E"}, + }, + }, + ], + "schemas": { + "A": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {}, + }, + "B": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {}, + }, + "C": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {}, + }, + "D": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {}, + }, + "E": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {}, + }, + }, + } + + +@pytest.fixture +def manifest_with_url_base_shared_definition() -> Dict[str, Any]: + return { + "type": "DeclarativeSource", + "definitions": { + "shared": {"HttpRequester": {"url_base": "https://example.com/v2/"}}, + "streams": { + "A": { + "type": "DeclarativeStream", + "name": "A", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "$ref": "#/definitions/requester_A", + "path": "A", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + "decoder": {"type": "JsonDecoder"}, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/A"}, + }, + }, + "B": { + "type": "DeclarativeStream", + "name": "B", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "$ref": "#/definitions/requester_B", + "path": "B", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + "decoder": {"type": "JsonDecoder"}, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/B"}, + }, + }, + "C": { + "type": "DeclarativeStream", + "name": "C", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "$ref": "#/definitions/requester_A", + "path": "C", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + "decoder": {"type": "JsonDecoder"}, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/C"}, + }, + }, + "D": { + "type": "DeclarativeStream", + "name": "D", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "$ref": "#/definitions/requester_B", + "path": "D", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + "decoder": {"type": "JsonDecoder"}, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/D"}, + }, + }, + "E": { + "type": "DeclarativeStream", + "name": "E", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "$ref": "#/definitions/requester_B", + "path": "E", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + "decoder": {"type": "JsonDecoder"}, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/E"}, + }, + }, + }, + # dummy requesters to be resolved and deduplicated + # to the shared `url_base` in the `definitions.shared` section + "requester_A": { + "type": "HttpRequester", + "url_base": "https://example.com/v1/", + }, + "requester_B": { + "type": "HttpRequester", + "url_base": {"$ref": "#/definitions/shared/HttpRequester/url_base"}, + }, + }, + "streams": [ + {"$ref": "#/definitions/streams/A"}, + {"$ref": "#/definitions/streams/B"}, + {"$ref": "#/definitions/streams/C"}, + {"$ref": "#/definitions/streams/D"}, + {"$ref": "#/definitions/streams/E"}, + ], + "schemas": { + "A": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {}, + }, + "B": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {}, + }, + "C": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {}, + }, + "D": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {}, + }, + "E": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {}, + }, + }, + } + + +@pytest.fixture +def expected_manifest_with_url_base_shared_definition_normalized() -> Dict[str, Any]: + return { + "type": "DeclarativeSource", + "definitions": {"shared": {"HttpRequester": {"url_base": "https://example.com/v2/"}}}, + "streams": [ + { + "type": "DeclarativeStream", + "name": "A", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "url_base": "https://example.com/v1/", + "path": "A", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + "decoder": {"type": "JsonDecoder"}, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/A"}, + }, + }, + { + "type": "DeclarativeStream", + "name": "B", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "url_base": {"$ref": "#/definitions/shared/HttpRequester/url_base"}, + "path": "B", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + "decoder": {"type": "JsonDecoder"}, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/B"}, + }, + }, + { + "type": "DeclarativeStream", + "name": "C", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "url_base": "https://example.com/v1/", + "path": "C", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + "decoder": {"type": "JsonDecoder"}, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/C"}, + }, + }, + { + "type": "DeclarativeStream", + "name": "D", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "url_base": {"$ref": "#/definitions/shared/HttpRequester/url_base"}, + "path": "D", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + "decoder": {"type": "JsonDecoder"}, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/D"}, + }, + }, + { + "type": "DeclarativeStream", + "name": "E", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "url_base": {"$ref": "#/definitions/shared/HttpRequester/url_base"}, + "path": "E", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + "decoder": {"type": "JsonDecoder"}, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/E"}, + }, + }, + ], + "schemas": { + "A": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {}, + }, + "B": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {}, + }, + "C": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {}, + }, + "D": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {}, + }, + "E": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {}, + }, + }, + } + + +@pytest.fixture +def manifest_with_url_base_to_migrate_to_url() -> Dict[str, Any]: + return { + "version": "0.0.0", + "type": "DeclarativeSource", + "check": { + "type": "CheckStream", + "stream_names": ["A"], + }, + "definitions": { + "streams": { + "A": { + "type": "DeclarativeStream", + "name": "A", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "$ref": "#/definitions/requester_A", + "path": "/path_to_A", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/A"}, + }, + }, + "B": { + "type": "DeclarativeStream", + "name": "B", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "$ref": "#/definitions/requester_A", + "path": "path_to_A", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/B"}, + }, + }, + "C": { + "type": "DeclarativeStream", + "name": "C", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "$ref": "#/definitions/requester_B", + "path": "path_to_B", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/C"}, + }, + }, + "D": { + "type": "DeclarativeStream", + "name": "D", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "$ref": "#/definitions/requester_B", + # ! the double-slash is intentional here for the test. + "path": "//path_to_B", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/D"}, + }, + }, + "E": { + "type": "DeclarativeStream", + "name": "E", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "$ref": "#/definitions/requester_B", + "path": "/path_to_B", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/E"}, + }, + }, + }, + # both requesters have duplicated `url_base`, + # which should be migrated to `url` in the new format + # and the `url_base` and `path` key should be removed + "requester_A": { + "type": "HttpRequester", + "url_base": "https://example.com/v1/", + }, + "requester_B": { + "type": "HttpRequester", + "url_base": "https://example.com/v2/", + }, + }, + "streams": [ + {"$ref": "#/definitions/streams/A"}, + {"$ref": "#/definitions/streams/B"}, + {"$ref": "#/definitions/streams/C"}, + {"$ref": "#/definitions/streams/D"}, + {"$ref": "#/definitions/streams/E"}, + ], + "schemas": { + "A": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": { + "field_a1": { + "type": "string", + }, + }, + }, + "B": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": { + "field_b1": { + "type": "string", + }, + }, + }, + "C": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": { + "field_c1": { + "type": "string", + }, + }, + }, + "D": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": { + "field_d1": { + "type": "string", + }, + }, + }, + "E": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": { + "field_e1": { + "type": "string", + }, + }, + }, + }, + } + + +@pytest.fixture +def expected_manifest_with_url_base_migrated_to_url() -> Dict[str, Any]: + return { + "version": "0.0.0", + "type": "DeclarativeSource", + "check": {"type": "CheckStream", "stream_names": ["A"]}, + "definitions": { + "streams": { + "A": { + "type": "DeclarativeStream", + "name": "A", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v1/path_to_A", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_a1": {"type": "string"}}, + }, + }, + }, + "B": { + "type": "DeclarativeStream", + "name": "B", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v1/path_to_A", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_b1": {"type": "string"}}, + }, + }, + }, + "C": { + "type": "DeclarativeStream", + "name": "C", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v2/path_to_B", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_c1": {"type": "string"}}, + }, + }, + }, + "D": { + "type": "DeclarativeStream", + "name": "D", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v2/path_to_B", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_d1": {"type": "string"}}, + }, + }, + }, + "E": { + "type": "DeclarativeStream", + "name": "E", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v2/path_to_B", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_e1": {"type": "string"}}, + }, + }, + }, + }, + "requester_A": {"type": "HttpRequester", "url": "https://example.com/v1/"}, + "requester_B": {"type": "HttpRequester", "url": "https://example.com/v2/"}, + }, + "streams": [ + { + "type": "DeclarativeStream", + "name": "A", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v1/path_to_A", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_a1": {"type": "string"}}, + }, + }, + }, + { + "type": "DeclarativeStream", + "name": "B", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v1/path_to_A", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_b1": {"type": "string"}}, + }, + }, + }, + { + "type": "DeclarativeStream", + "name": "C", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v2/path_to_B", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_c1": {"type": "string"}}, + }, + }, + }, + { + "type": "DeclarativeStream", + "name": "D", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v2/path_to_B", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_d1": {"type": "string"}}, + }, + }, + }, + { + "type": "DeclarativeStream", + "name": "E", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v2/path_to_B", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_e1": {"type": "string"}}, + }, + }, + }, + ], + "schemas": { + "A": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_a1": {"type": "string"}}, + }, + "B": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_b1": {"type": "string"}}, + }, + "C": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_c1": {"type": "string"}}, + }, + "D": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_d1": {"type": "string"}}, + }, + "E": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_e1": {"type": "string"}}, + }, + }, + } + + +@pytest.fixture +def manifest_with_migrated_url_base_and_path_is_joined_to_url() -> Dict[str, Any]: + return { + "version": "0.0.0", + "type": "DeclarativeSource", + "check": {"type": "CheckStream", "stream_names": ["A"]}, + "definitions": {}, + "streams": [ + { + "type": "DeclarativeStream", + "name": "A", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v1/path_to_A", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_a1": {"type": "string"}}, + }, + }, + }, + { + "type": "DeclarativeStream", + "name": "B", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v2/path_to_B", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_b1": {"type": "string"}}, + }, + }, + }, + { + "type": "DeclarativeStream", + "name": "C", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v2/path_to_B", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_c1": {"type": "string"}}, + }, + }, + }, + ], + "schemas": { + "A": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_a1": {"type": "string"}}, + }, + "B": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_b1": {"type": "string"}}, + }, + "C": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_c1": {"type": "string"}}, + }, + }, + } diff --git a/unit_tests/sources/declarative/parsers/test_manifest_migration.py b/unit_tests/sources/declarative/parsers/test_manifest_migration.py new file mode 100644 index 000000000..89317def8 --- /dev/null +++ b/unit_tests/sources/declarative/parsers/test_manifest_migration.py @@ -0,0 +1,49 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from airbyte_cdk.sources.declarative.manifest_declarative_source import ( + _get_declarative_component_schema, +) +from airbyte_cdk.sources.declarative.parsers.manifest_migration_handler import ( + ManifestMigrationHandler, +) +from airbyte_cdk.sources.declarative.parsers.manifest_reference_resolver import ( + ManifestReferenceResolver, +) + +resolver = ManifestReferenceResolver() + + +def test_manifest_resolve_migrate( + manifest_with_url_base_to_migrate_to_url, + expected_manifest_with_url_base_migrated_to_url, +) -> None: + """ + This test is to check that the manifest is migrated and normalized + when the `url_base` is migrated to `url` and the `path` is joined to `url`. + """ + schema = _get_declarative_component_schema() + resolved_manifest = resolver.preprocess_manifest(manifest_with_url_base_to_migrate_to_url) + migrated_manifest = ManifestMigrationHandler(resolved_manifest, schema).migrate() + + assert migrated_manifest == expected_manifest_with_url_base_migrated_to_url + + +def test_manifest_resolve_do_not_migrate( + manifest_with_migrated_url_base_and_path_is_joined_to_url, +) -> None: + """ + This test is to check that the manifest remains migrated already, + after the `url_base` and `path` is joined to `url`. + """ + + schema = _get_declarative_component_schema() + resolved_manifest = resolver.preprocess_manifest( + manifest_with_migrated_url_base_and_path_is_joined_to_url + ) + migrated_manifest = ManifestMigrationHandler(resolved_manifest, schema).migrate() + + # it's expected that the manifest is the same after the processing + assert migrated_manifest == manifest_with_migrated_url_base_and_path_is_joined_to_url From a73d162dbffdcece49b7c363c35546d72d633d36 Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Wed, 9 Apr 2025 15:16:22 +0300 Subject: [PATCH 02/18] add stack of migrations --- .../declarative_component_schema.yaml | 16 ++-- .../parsers/manifest_migration_handler.py | 78 ++++++++++++++----- 2 files changed, 69 insertions(+), 25 deletions(-) diff --git a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml index 63d27a8e8..cd00ba0ce 100644 --- a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +++ b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml @@ -1863,13 +1863,15 @@ definitions: description: The `url_base` has been deprecated, in favor of the `url` field. original_key: url_base replacement_key: url - - type: handle_url_parts - description: The `path` has been deprecated, in favor of the `url` field. The value from the `path` field will be joined to the `url` field. - original_key: path - replacement_key: url - - type: remove_field - description: The `path` has been deprecated, in favor of the `url` field. - original_key: path + - type: stack + migrations: + - type: handle_url_parts + description: The `path` has been deprecated, in favor of the `url` field. The value from the `path` field will be joined to the `url` field. + original_key: path + replacement_key: url + - type: remove_field + description: The `path` has been deprecated, in favor of the `url` field. + original_key: path title: HTTP Requester description: Requester submitting HTTP requests and extracting records from the response. type: object diff --git a/airbyte_cdk/sources/declarative/parsers/manifest_migration_handler.py b/airbyte_cdk/sources/declarative/parsers/manifest_migration_handler.py index 204480540..1639b9d32 100644 --- a/airbyte_cdk/sources/declarative/parsers/manifest_migration_handler.py +++ b/airbyte_cdk/sources/declarative/parsers/manifest_migration_handler.py @@ -12,9 +12,9 @@ # Type definitions for better readability ManifestType = Dict[str, Any] DefinitionsType = Dict[str, Any] -MigrationsType = List[Tuple[str, str, Optional[str]]] MigrationType = Tuple[str, str, Optional[str]] -MigratedTagsType = Dict[str, List[Tuple[str, str, Optional[str]]]] +MigrationsType = List[MigrationType] +MigratedTagsType = Dict[str, MigrationsType] MigrationFunctionType = Callable[[Any, MigrationType], None] @@ -22,6 +22,7 @@ TYPE_TAG = "type" DEF_TAG = "definitions" MIGRATIONS_TAG = "migrations" +MIGRATIONS_STACK_TAG = "stack" ORIGINAL_KEY = "original_key" REPLACEMENT_KEY = "replacement_key" @@ -67,18 +68,28 @@ def __init__( ) -> None: self._manifest = manifest self._declarative_schema = declarative_schema - self._migrated_manifest: ManifestType = copy.deepcopy(self._manifest) # get the declared migrations from schema self._migration_tags = self._get_migration_schema_tags(self._declarative_schema) def migrate(self) -> ManifestType: + """ + Migrates the manifest by applying configured migrations to different component types. + + This method iterates through all registered component types and their associated + migrations in `_migration_tags`, applying them sequentially by calling + `_handle_migrations` for each component type. + + Returns: + ManifestType: The migrated manifest if migration succeeds, or the original + manifest if a ManifestMigrationException occurs during migration. + """ try: for component_type, migrations in self._migration_tags.items(): self._handle_migrations(component_type, migrations) return self._migrated_manifest - except ManifestMigrationException as e: - # if any errors occurs we return the original resolved manifest + except ManifestMigrationException: + # if any errors occur we return the original resolved manifest return self._manifest def _get_migration_schema_tags(self, schema: DefinitionsType) -> MigratedTagsType: @@ -99,23 +110,37 @@ def _get_migration_schema_tags(self, schema: DefinitionsType) -> MigratedTagsTyp for component_name, component_declaration in schema_definitions.items(): if MIGRATIONS_TAG in component_declaration.keys(): - # create the placeholder for the migrations migrations_tags[component_name] = [] - # iterate over the migrations + for migration in component_declaration[MIGRATIONS_TAG]: - migrations_tags[component_name].append( - ( - # type of migration - migration.get(TYPE_TAG), - # what is the migrated key - migration.get(ORIGINAL_KEY), - # (optional) what is the new key to be used - migration.get(REPLACEMENT_KEY), - ), - ) + # register the stack of migrations + if migration[TYPE_TAG] == MIGRATIONS_STACK_TAG: + for migration in migration[MIGRATIONS_TAG]: + self._register_migration(migrations_tags, component_name, migration) + # register a single migration + else: + self._register_migration(migrations_tags, component_name, migration) return migrations_tags + def _register_migration( + self, + migrations_tags: MigratedTagsType, + component_name: str, + migration: Dict[str, Any], + ) -> None: + """ + Registers the migration in the migrations_tags dictionary. + """ + + migrations_tags[component_name].append( + ( + migration[TYPE_TAG], # type of migration + migration[ORIGINAL_KEY], # what is the migrated key + migration.get(REPLACEMENT_KEY), # (optional) what is the new key to be used + ), + ) + def _handle_migrations( self, component_type: str, @@ -138,6 +163,21 @@ def _handle_migrations( raise ManifestMigrationException(f"Failed to migrate the manifest: {e}") from e def _process_migration(self, obj: Any, component_type: str, migration: MigrationType) -> None: + """ + Process a migration rule by recursively traversing through a nested data structure. + + This method applies migrations to components of a specified type that contain the migrated key. + It recursively processes dictionaries and lists, looking for components that match the criteria. + Migration is skipped for component types listed in NON_MIGRATABLE_TYPES. + + Args: + obj: The object to process, which can be a dictionary, list, or other type. + component_type: The type of component to apply the migration to. + migration: A tuple containing migration type, migrated key, and additional migration info. + + Returns: + None + """ migration_type, migrated_key, _ = migration if isinstance(obj, dict): @@ -146,9 +186,11 @@ def _process_migration(self, obj: Any, component_type: str, migration: Migration # check for component type match the designed migration if TYPE_TAG in obj_keys: obj_type = obj[TYPE_TAG] - # do not migrate if the type is not in the list of migratable types + + # do not migrate if the particular type is in the list of non-migratable types if obj_type in NON_MIGRATABLE_TYPES: return + if obj_type == component_type and migrated_key in obj_keys: if migration_type in self._migration_type_mapping.keys(): # Call the appropriate function based on the migration type From b358c0210908a1412da97b0c086b84ada0d55b28 Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Thu, 10 Apr 2025 16:28:38 +0300 Subject: [PATCH 03/18] add deprecation warnings --- .../declarative_component_schema.yaml | 2 + .../models/base_model_with_deprecations.py | 71 +++++++++++++++++++ .../models/declarative_component_schema.py | 8 ++- bin/generate_component_manifest_files.py | 68 +++++++++++++++++- 4 files changed, 147 insertions(+), 2 deletions(-) create mode 100644 airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py diff --git a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml index cd00ba0ce..f871bd594 100644 --- a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +++ b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml @@ -1883,6 +1883,7 @@ definitions: enum: [HttpRequester] url_base: deprecated: true + deprecation_message: "Use `url` field instead." sharable: true title: API Base URL description: Deprecated, use the `url` instead. Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this. @@ -1922,6 +1923,7 @@ definitions: - "https://example.com/api/v1/resource/{{ next_page_token['id'] }}" path: deprecated: true + deprecation_message: "Use `url` field instead." title: URL Path description: Deprecated, use the `url` instead. Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this. type: string diff --git a/airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py b/airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py new file mode 100644 index 000000000..152e26aa8 --- /dev/null +++ b/airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py @@ -0,0 +1,71 @@ +# Copyright (c) 2025 Airbyte, Inc., all rights reserved. + +# THIS IS A STATIC CLASS MODEL USED TO DISPLAY DEPRECATION WARNINGS +# WHEN DEPRECATED FIELDS ARE ACCESSED + +import warnings +from typing import Any + +from pydantic.v1 import BaseModel + +# format the warning message +warnings.formatwarning = ( + lambda message, category, *args, **kwargs: f"{category.__name__}: {message}" +) + +FIELDS_TAG = "__fields__" +DEPRECATED = "deprecated" +DEPRECATION_MESSAGE = "deprecation_message" + + +class BaseModelWithDeprecations(BaseModel): + """ + Pydantic BaseModel that warns when deprecated fields are accessed. + """ + + def _deprecated_warning(self, field_name: str, message: str) -> None: + """ + Show a warning message for deprecated fields (to stdout). + Args: + field_name (str): Name of the deprecated field. + message (str): Warning message to be displayed. + """ + + warnings.warn( + f"Component type: `{self.__class__.__name__}`. Field '{field_name}' is deprecated. {message}", + DeprecationWarning, + ) + + def __init__(self, **data: Any) -> None: + """ + Show warnings for deprecated fields during component initialization. + """ + + model_fields = self.__fields__ + + for field_name in data: + if field_name in model_fields: + if model_fields[field_name].field_info.extra.get(DEPRECATED, False): + message = model_fields[field_name].field_info.extra.get(DEPRECATION_MESSAGE, "") + self._deprecated_warning(field_name, message) + + # Call the parent constructor + super().__init__(**data) + + def __getattribute__(self, name: str) -> Any: + """ + Show warnings for deprecated fields during field usage. + """ + + value = super().__getattribute__(name) + + if name == FIELDS_TAG: + try: + model_fields = super().__getattribute__(FIELDS_TAG) + field_info = model_fields.get(name) + if field_info and field_info.field_info.extra.get(DEPRECATED): + self._deprecated_warning(name, field_info) + except (AttributeError, KeyError): + pass + + return value diff --git a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py index 8d1a8fbe9..1bc866652 100644 --- a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +++ b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -8,6 +8,10 @@ from pydantic.v1 import BaseModel, Extra, Field +from airbyte_cdk.sources.declarative.models.base_model_with_deprecations import ( + BaseModelWithDeprecations, +) + class AuthFlowType(Enum): oauth2_0 = "oauth2.0" @@ -2145,11 +2149,12 @@ class SessionTokenAuthenticator(BaseModel): parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") -class HttpRequester(BaseModel): +class HttpRequester(BaseModelWithDeprecations): type: Literal["HttpRequester"] url_base: Optional[str] = Field( None, deprecated=True, + deprecation_message="Use `url` field instead.", description="Deprecated, use the `url` instead. Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.", examples=[ "https://connect.squareup.com/v2", @@ -2173,6 +2178,7 @@ class HttpRequester(BaseModel): path: Optional[str] = Field( None, deprecated=True, + deprecation_message="Use `url` field instead.", description="Deprecated, use the `url` instead. Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.", examples=[ "/products", diff --git a/bin/generate_component_manifest_files.py b/bin/generate_component_manifest_files.py index 732a9ec0b..51b3d8efb 100755 --- a/bin/generate_component_manifest_files.py +++ b/bin/generate_component_manifest_files.py @@ -1,5 +1,6 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. +import re import sys from glob import glob from pathlib import Path @@ -28,6 +29,63 @@ def generate_init_module_content() -> str: return header +def replace_base_model_for_classes_with_deprecated_fields(post_processed_content: str) -> str: + """ + Replace the base model for classes with deprecated fields. + This function looks for classes that inherit from `BaseModel` and have fields marked as deprecated. + It replaces the base model with `BaseModelWithDeprecations` for those classes. + """ + + # Find classes with deprecated fields + classes_with_deprecated_fields = set() + class_matches = re.finditer(r"class (\w+)\(BaseModel\):", post_processed_content) + + for class_match in class_matches: + class_name = class_match.group(1) + class_start = class_match.start() + # Find the next class definition or end of file + next_class_match = re.search( + r"class \w+\(", + post_processed_content[class_start + len(class_match.group(0)) :], + ) + class_end = ( + len(post_processed_content) + if next_class_match is None + else class_start + len(class_match.group(0)) + next_class_match.start() + ) + class_content = post_processed_content[class_start:class_end] + + # Check if any field has deprecated=True + if re.search(r"deprecated\s*=\s*True", class_content): + classes_with_deprecated_fields.add(class_name) + + # update the imports to include the new base model with deprecation warinings + # only if there are classes with the fields marked as deprecated. + if len(classes_with_deprecated_fields) > 0: + # Find where to insert the base model - after imports but before class definitions + imports_end = post_processed_content.find( + "\n\n", + post_processed_content.find("from pydantic.v1 import"), + ) + if imports_end > 0: + post_processed_content = ( + post_processed_content[:imports_end] + + "\n\n" + + "from airbyte_cdk.sources.declarative.models.base_model_with_deprecations import (\n" + + " BaseModelWithDeprecations,\n" + + ")" + + post_processed_content[imports_end:] + ) + + # Use the `BaseModelWithDeprecations` base model for the classes with deprecated fields + for class_name in classes_with_deprecated_fields: + pattern = rf"class {class_name}\(BaseModel\):" + replacement = f"class {class_name}(BaseModelWithDeprecations):" + post_processed_content = re.sub(pattern, replacement, post_processed_content) + + return post_processed_content + + async def post_process_codegen(codegen_container: dagger.Container): codegen_container = codegen_container.with_exec( ["mkdir", "/generated_post_processed"], use_entrypoint=True @@ -41,6 +99,11 @@ async def post_process_codegen(codegen_container: dagger.Container): post_processed_content = original_content.replace( " _parameters:", " parameters:" ).replace("from pydantic", "from pydantic.v1") + + post_processed_content = replace_base_model_for_classes_with_deprecated_fields( + post_processed_content + ) + codegen_container = codegen_container.with_new_file( f"/generated_post_processed/{generated_file}", contents=post_processed_content ) @@ -75,9 +138,12 @@ async def main(): "--set-default-enum-member", "--use-double-quotes", "--remove-special-field-name-prefix", - # account the `deprecated` flag provided for the field. + # allow usage of the extra key such as `deprecated`, etc. "--field-extra-keys", + # account the `deprecated` flag provided for the field. "deprecated", + # account the `deprecation_message` provided for the field. + "deprecation_message", ], use_entrypoint=True, ) From a0bc96e45b691b1630fa2ed56fc6456340a4fa24 Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Fri, 11 Apr 2025 14:40:39 +0300 Subject: [PATCH 04/18] updated the migrations approach --- .../declarative_component_schema.yaml | 14 - .../manifest_declarative_source.py | 11 +- .../migrations/manifest/__init__.py | 0 .../migrations/manifest/manifest_migration.py | 75 +++ .../migrations/manifest/migration_handler.py | 62 ++ .../manifest/migrations/__init__.py | 11 + .../http_requester_path_to_url_migration.py | 36 ++ ...ttp_requester_url_base_to_url_migration.py | 20 + .../manifest/migrations_registry.py | 17 + .../models/base_model_with_deprecations.py | 17 + .../declarative/migrations/conftest.py | 600 ++++++++++++++++++ .../test_manifest_migration.py | 12 +- .../sources/declarative/parsers/conftest.py | 593 ----------------- 13 files changed, 847 insertions(+), 621 deletions(-) create mode 100644 airbyte_cdk/sources/declarative/migrations/manifest/__init__.py create mode 100644 airbyte_cdk/sources/declarative/migrations/manifest/manifest_migration.py create mode 100644 airbyte_cdk/sources/declarative/migrations/manifest/migration_handler.py create mode 100644 airbyte_cdk/sources/declarative/migrations/manifest/migrations/__init__.py create mode 100644 airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_path_to_url_migration.py create mode 100644 airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_url_base_to_url_migration.py create mode 100644 airbyte_cdk/sources/declarative/migrations/manifest/migrations_registry.py create mode 100644 unit_tests/sources/declarative/migrations/conftest.py rename unit_tests/sources/declarative/{parsers => migrations}/test_manifest_migration.py (73%) diff --git a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml index f871bd594..a40d18037 100644 --- a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +++ b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml @@ -1858,20 +1858,6 @@ definitions: type: enum: [Bearer] HttpRequester: - migrations: - - type: replace_field - description: The `url_base` has been deprecated, in favor of the `url` field. - original_key: url_base - replacement_key: url - - type: stack - migrations: - - type: handle_url_parts - description: The `path` has been deprecated, in favor of the `url` field. The value from the `path` field will be joined to the `url` field. - original_key: path - replacement_key: url - - type: remove_field - description: The `path` has been deprecated, in favor of the `url` field. - original_key: path title: HTTP Requester description: Requester submitting HTTP requests and extracting records from the response. type: object diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py index a8683dd8a..983dae839 100644 --- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py +++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py @@ -26,6 +26,9 @@ from airbyte_cdk.sources.declarative.checks import COMPONENTS_CHECKER_TYPE_MAPPING from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource +from airbyte_cdk.sources.declarative.migrations.manifest.migration_handler import ( + ManifestMigrationHandler, +) from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( DeclarativeStream as DeclarativeStreamModel, ) @@ -39,9 +42,6 @@ from airbyte_cdk.sources.declarative.parsers.manifest_component_transformer import ( ManifestComponentTransformer, ) -from airbyte_cdk.sources.declarative.parsers.manifest_migration_handler import ( - ManifestMigrationHandler, -) from airbyte_cdk.sources.declarative.parsers.manifest_reference_resolver import ( ManifestReferenceResolver, ) @@ -114,10 +114,9 @@ def __init__( "", resolved_source_config, {} ) - # migrate definitions to the new format, if any are present migrated_source_config = ManifestMigrationHandler( - propagated_source_config, self._declarative_component_schema - ).migrate() + propagated_source_config + ).apply_migrations() self._source_config = migrated_source_config self._debug = debug diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/__init__.py b/airbyte_cdk/sources/declarative/migrations/manifest/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/manifest_migration.py b/airbyte_cdk/sources/declarative/migrations/manifest/manifest_migration.py new file mode 100644 index 000000000..5d70dfb83 --- /dev/null +++ b/airbyte_cdk/sources/declarative/migrations/manifest/manifest_migration.py @@ -0,0 +1,75 @@ +# Copyright (c) 2024 Airbyte, Inc., all rights reserved. + +from abc import abstractmethod +from typing import Any, Dict + +ManifestType = Dict[str, Any] + + +TYPE_TAG = "type" + +NON_MIGRATABLE_TYPES = [ + "DynamicDeclarativeStream", +] + + +class ManifestMigration: + @abstractmethod + def should_migrate(self, manifest: ManifestType) -> bool: + """ + Check if the manifest should be migrated. + + :param manifest: The manifest to potentially migrate + :param kwargs: Additional arguments for migration + + :return: true if the manifest is of the expected format and should be migrated. False otherwise. + """ + + @abstractmethod + def migrate(self, manifest: ManifestType) -> None: + """ + Migrate the manifest. Assumes should_migrate(manifest) returned True. + + :param manifest: The manifest to migrate + :param kwargs: Additional arguments for migration + """ + + def _process_manifest(self, obj: Any) -> None: + """ + Recursively processes a manifest object, migrating components that match the migration criteria. + + This method traverses the entire manifest structure (dictionaries and lists) and applies + migrations to components that: + 1. Have a type tag + 2. Are not in the list of non-migratable types + 3. Meet the conditions defined in the should_migrate method + + Parameters: + obj (Any): The object to process, which can be a dictionary, list, or any other type. + Dictionary objects are checked for component type tags and potentially migrated. + List objects have each of their items processed recursively. + Other types are ignored. + + Returns: + None, since we process the manifest in place. + """ + if isinstance(obj, dict): + obj_keys = obj.keys() + # check for component type match the designed migration + if TYPE_TAG in obj_keys: + obj_type = obj[TYPE_TAG] + + # do not migrate if the particular type is in the list of non-migratable types + if obj_type in NON_MIGRATABLE_TYPES: + return + + if self.should_migrate(obj): + self.migrate(obj) + + # Process all values in the dictionary + for v in list(obj.values()): + self._process_manifest(v) + elif isinstance(obj, list): + # Process all items in the list + for item in obj: + self._process_manifest(item) diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migration_handler.py b/airbyte_cdk/sources/declarative/migrations/manifest/migration_handler.py new file mode 100644 index 000000000..664c02681 --- /dev/null +++ b/airbyte_cdk/sources/declarative/migrations/manifest/migration_handler.py @@ -0,0 +1,62 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import copy +from typing import Type + +from airbyte_cdk.sources.declarative.migrations.manifest.manifest_migration import ( + ManifestMigration, + ManifestType, +) +from airbyte_cdk.sources.declarative.migrations.manifest.migrations_registry import ( + migrations_registry, +) +from airbyte_cdk.sources.declarative.parsers.custom_exceptions import ( + ManifestMigrationException, +) + + +class ManifestMigrationHandler: + """ + This class is responsible for handling migrations in the manifest. + """ + + def __init__(self, manifest: ManifestType) -> None: + self._manifest = manifest + self._migrated_manifest: ManifestType = copy.deepcopy(self._manifest) + + def apply_migrations(self) -> ManifestType: + """ + Apply all registered migrations to the manifest. + + This method iterates through all migrations in the migrations registry and applies + them sequentially to the current manifest. If any migration fails with a + ManifestMigrationException, the original unmodified manifest is returned instead. + + Returns: + ManifestType: The migrated manifest if all migrations succeeded, or the original + manifest if any migration failed. + """ + try: + for migration_class in migrations_registry: + self._handle_migration(migration_class) + return self._migrated_manifest + except ManifestMigrationException: + # if any errors occur we return the original resolved manifest + return self._manifest + + def _handle_migration(self, migration_class: Type[ManifestMigration]) -> None: + """ + Handles a single manifest migration by instantiating the migration class and processing the manifest. + + Args: + migration_class (Type[ManifestMigration]): The migration class to apply to the manifest. + + Raises: + ManifestMigrationException: If the migration process encounters any errors. + """ + try: + migration_class()._process_manifest(self._migrated_manifest) + except Exception as e: + raise ManifestMigrationException(f"Failed to migrate the manifest: {e}") from e diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/__init__.py b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/__init__.py new file mode 100644 index 000000000..0b8631018 --- /dev/null +++ b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/__init__.py @@ -0,0 +1,11 @@ +from airbyte_cdk.sources.declarative.migrations.manifest.migrations.http_requester_path_to_url_migration import ( + HttpRequesterPathToUrlMigration, +) +from airbyte_cdk.sources.declarative.migrations.manifest.migrations.http_requester_url_base_to_url_migration import ( + HttpRequesterUrlBaseToUrlMigration, +) + +__all__ = [ + "HttpRequesterUrlBaseToUrlMigration", + "HttpRequesterPathToUrlMigration", +] diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_path_to_url_migration.py b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_path_to_url_migration.py new file mode 100644 index 000000000..b7abb7c15 --- /dev/null +++ b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_path_to_url_migration.py @@ -0,0 +1,36 @@ +from urllib.parse import urljoin + +from airbyte_cdk.sources.declarative.migrations.manifest.manifest_migration import ( + TYPE_TAG, + ManifestMigration, + ManifestType, +) +from airbyte_cdk.sources.types import EmptyString + + +class HttpRequesterPathToUrlMigration(ManifestMigration): + component_type = "HttpRequester" + original_key = "path" + replacement_key = "url" + + def should_migrate(self, manifest: ManifestType) -> bool: + return manifest[TYPE_TAG] == self.component_type and self.original_key in list( + manifest.keys() + ) + + def migrate(self, manifest: ManifestType) -> None: + original_key_value = manifest[self.original_key].lstrip("/") + replacement_key_value = manifest[self.replacement_key] + + # return a full-url if provided directly from interpolation context + if original_key_value == EmptyString or original_key_value is None: + manifest[self.replacement_key] = replacement_key_value + manifest.pop(self.original_key, None) + else: + # since we didn't provide a full-url, the url_base might not have a trailing slash + # so we join the url_base and path correctly + if not replacement_key_value.endswith("/"): + replacement_key_value += "/" + + manifest[self.replacement_key] = urljoin(replacement_key_value, original_key_value) + manifest.pop(self.original_key, None) diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_url_base_to_url_migration.py b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_url_base_to_url_migration.py new file mode 100644 index 000000000..93a171176 --- /dev/null +++ b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_url_base_to_url_migration.py @@ -0,0 +1,20 @@ +from airbyte_cdk.sources.declarative.migrations.manifest.manifest_migration import ( + TYPE_TAG, + ManifestMigration, + ManifestType, +) + + +class HttpRequesterUrlBaseToUrlMigration(ManifestMigration): + component_type = "HttpRequester" + original_key = "url_base" + replacement_key = "url" + + def should_migrate(self, manifest: ManifestType) -> bool: + return manifest[TYPE_TAG] == self.component_type and self.original_key in list( + manifest.keys() + ) + + def migrate(self, manifest: ManifestType) -> None: + manifest[self.replacement_key] = manifest[self.original_key] + manifest.pop(self.original_key, None) diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migrations_registry.py b/airbyte_cdk/sources/declarative/migrations/manifest/migrations_registry.py new file mode 100644 index 000000000..40a741e82 --- /dev/null +++ b/airbyte_cdk/sources/declarative/migrations/manifest/migrations_registry.py @@ -0,0 +1,17 @@ +from typing import List, Type + +from airbyte_cdk.sources.declarative.migrations.manifest.manifest_migration import ( + ManifestMigration, +) +from airbyte_cdk.sources.declarative.migrations.manifest.migrations import ( + HttpRequesterPathToUrlMigration, + HttpRequesterUrlBaseToUrlMigration, +) + +# This is the registry of all the migrations that are available. +# Add new migrations to the bottom of the list, +# ( ! ) make sure the order of the migrations is correct. +migrations_registry: List[Type[ManifestMigration]] = [ + HttpRequesterUrlBaseToUrlMigration, + HttpRequesterPathToUrlMigration, +] diff --git a/airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py b/airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py index 152e26aa8..50adc38a8 100644 --- a/airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py +++ b/airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py @@ -8,6 +8,13 @@ from pydantic.v1 import BaseModel +from airbyte_cdk.models import ( + AirbyteLogMessage, + AirbyteMessage, + Level, + Type, +) + # format the warning message warnings.formatwarning = ( lambda message, category, *args, **kwargs: f"{category.__name__}: {message}" @@ -36,6 +43,16 @@ def _deprecated_warning(self, field_name: str, message: str) -> None: DeprecationWarning, ) + # print( + # AirbyteMessage( + # type=Type.LOG, + # log=AirbyteLogMessage( + # level=Level.WARN, + # message=f"Component type: `{self.__class__.__name__}`. Field '{field_name}' is deprecated. {message}", + # ), + # ) + # ) + def __init__(self, **data: Any) -> None: """ Show warnings for deprecated fields during component initialization. diff --git a/unit_tests/sources/declarative/migrations/conftest.py b/unit_tests/sources/declarative/migrations/conftest.py new file mode 100644 index 000000000..38cdbdfb1 --- /dev/null +++ b/unit_tests/sources/declarative/migrations/conftest.py @@ -0,0 +1,600 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from typing import Any, Dict + +import pytest + + +@pytest.fixture +def manifest_with_url_base_to_migrate_to_url() -> Dict[str, Any]: + return { + "version": "0.0.0", + "type": "DeclarativeSource", + "check": { + "type": "CheckStream", + "stream_names": ["A"], + }, + "definitions": { + "streams": { + "A": { + "type": "DeclarativeStream", + "name": "A", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "$ref": "#/definitions/requester_A", + "path": "/path_to_A", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/A"}, + }, + }, + "B": { + "type": "DeclarativeStream", + "name": "B", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "$ref": "#/definitions/requester_A", + "path": "path_to_A", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/B"}, + }, + }, + "C": { + "type": "DeclarativeStream", + "name": "C", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "$ref": "#/definitions/requester_B", + "path": "path_to_B", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/C"}, + }, + }, + "D": { + "type": "DeclarativeStream", + "name": "D", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "$ref": "#/definitions/requester_B", + # ! the double-slash is intentional here for the test. + "path": "//path_to_B", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/D"}, + }, + }, + "E": { + "type": "DeclarativeStream", + "name": "E", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "$ref": "#/definitions/requester_B", + "path": "/path_to_B", + "http_method": "GET", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": {"$ref": "#/schemas/E"}, + }, + }, + }, + # both requesters have duplicated `url_base`, + # which should be migrated to `url` in the new format + # and the `url_base` and `path` key should be removed + "requester_A": { + "type": "HttpRequester", + "url_base": "https://example.com/v1/", + }, + "requester_B": { + "type": "HttpRequester", + "url_base": "https://example.com/v2/", + }, + }, + "streams": [ + {"$ref": "#/definitions/streams/A"}, + {"$ref": "#/definitions/streams/B"}, + {"$ref": "#/definitions/streams/C"}, + {"$ref": "#/definitions/streams/D"}, + {"$ref": "#/definitions/streams/E"}, + ], + "schemas": { + "A": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": { + "field_a1": { + "type": "string", + }, + }, + }, + "B": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": { + "field_b1": { + "type": "string", + }, + }, + }, + "C": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": { + "field_c1": { + "type": "string", + }, + }, + }, + "D": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": { + "field_d1": { + "type": "string", + }, + }, + }, + "E": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": { + "field_e1": { + "type": "string", + }, + }, + }, + }, + } + + +@pytest.fixture +def expected_manifest_with_url_base_migrated_to_url() -> Dict[str, Any]: + return { + "version": "0.0.0", + "type": "DeclarativeSource", + "check": {"type": "CheckStream", "stream_names": ["A"]}, + "definitions": { + "streams": { + "A": { + "type": "DeclarativeStream", + "name": "A", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v1/path_to_A", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_a1": {"type": "string"}}, + }, + }, + }, + "B": { + "type": "DeclarativeStream", + "name": "B", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v1/path_to_A", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_b1": {"type": "string"}}, + }, + }, + }, + "C": { + "type": "DeclarativeStream", + "name": "C", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v2/path_to_B", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_c1": {"type": "string"}}, + }, + }, + }, + "D": { + "type": "DeclarativeStream", + "name": "D", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v2/path_to_B", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_d1": {"type": "string"}}, + }, + }, + }, + "E": { + "type": "DeclarativeStream", + "name": "E", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v2/path_to_B", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_e1": {"type": "string"}}, + }, + }, + }, + }, + "requester_A": {"type": "HttpRequester", "url": "https://example.com/v1/"}, + "requester_B": {"type": "HttpRequester", "url": "https://example.com/v2/"}, + }, + "streams": [ + { + "type": "DeclarativeStream", + "name": "A", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v1/path_to_A", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_a1": {"type": "string"}}, + }, + }, + }, + { + "type": "DeclarativeStream", + "name": "B", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v1/path_to_A", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_b1": {"type": "string"}}, + }, + }, + }, + { + "type": "DeclarativeStream", + "name": "C", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v2/path_to_B", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_c1": {"type": "string"}}, + }, + }, + }, + { + "type": "DeclarativeStream", + "name": "D", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v2/path_to_B", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_d1": {"type": "string"}}, + }, + }, + }, + { + "type": "DeclarativeStream", + "name": "E", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v2/path_to_B", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_e1": {"type": "string"}}, + }, + }, + }, + ], + "schemas": { + "A": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_a1": {"type": "string"}}, + }, + "B": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_b1": {"type": "string"}}, + }, + "C": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_c1": {"type": "string"}}, + }, + "D": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_d1": {"type": "string"}}, + }, + "E": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_e1": {"type": "string"}}, + }, + }, + } + + +@pytest.fixture +def manifest_with_migrated_url_base_and_path_is_joined_to_url() -> Dict[str, Any]: + return { + "version": "0.0.0", + "type": "DeclarativeSource", + "check": {"type": "CheckStream", "stream_names": ["A"]}, + "definitions": {}, + "streams": [ + { + "type": "DeclarativeStream", + "name": "A", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v1/path_to_A", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_a1": {"type": "string"}}, + }, + }, + }, + { + "type": "DeclarativeStream", + "name": "B", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v2/path_to_B", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_b1": {"type": "string"}}, + }, + }, + }, + { + "type": "DeclarativeStream", + "name": "C", + "retriever": { + "type": "SimpleRetriever", + "requester": { + "type": "HttpRequester", + "http_method": "GET", + "url": "https://example.com/v2/path_to_B", + }, + "record_selector": { + "type": "RecordSelector", + "extractor": {"type": "DpathExtractor", "field_path": []}, + }, + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_c1": {"type": "string"}}, + }, + }, + }, + ], + "schemas": { + "A": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_a1": {"type": "string"}}, + }, + "B": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_b1": {"type": "string"}}, + }, + "C": { + "type": "object", + "$schema": "http://json-schema.org/draft-07/schema#", + "additionalProperties": True, + "properties": {"field_c1": {"type": "string"}}, + }, + }, + } diff --git a/unit_tests/sources/declarative/parsers/test_manifest_migration.py b/unit_tests/sources/declarative/migrations/test_manifest_migration.py similarity index 73% rename from unit_tests/sources/declarative/parsers/test_manifest_migration.py rename to unit_tests/sources/declarative/migrations/test_manifest_migration.py index 89317def8..6490ae7b7 100644 --- a/unit_tests/sources/declarative/parsers/test_manifest_migration.py +++ b/unit_tests/sources/declarative/migrations/test_manifest_migration.py @@ -3,10 +3,7 @@ # -from airbyte_cdk.sources.declarative.manifest_declarative_source import ( - _get_declarative_component_schema, -) -from airbyte_cdk.sources.declarative.parsers.manifest_migration_handler import ( +from airbyte_cdk.sources.declarative.migrations.manifest.migration_handler import ( ManifestMigrationHandler, ) from airbyte_cdk.sources.declarative.parsers.manifest_reference_resolver import ( @@ -24,9 +21,9 @@ def test_manifest_resolve_migrate( This test is to check that the manifest is migrated and normalized when the `url_base` is migrated to `url` and the `path` is joined to `url`. """ - schema = _get_declarative_component_schema() + resolved_manifest = resolver.preprocess_manifest(manifest_with_url_base_to_migrate_to_url) - migrated_manifest = ManifestMigrationHandler(resolved_manifest, schema).migrate() + migrated_manifest = ManifestMigrationHandler(dict(resolved_manifest)).apply_migrations() assert migrated_manifest == expected_manifest_with_url_base_migrated_to_url @@ -39,11 +36,10 @@ def test_manifest_resolve_do_not_migrate( after the `url_base` and `path` is joined to `url`. """ - schema = _get_declarative_component_schema() resolved_manifest = resolver.preprocess_manifest( manifest_with_migrated_url_base_and_path_is_joined_to_url ) - migrated_manifest = ManifestMigrationHandler(resolved_manifest, schema).migrate() + migrated_manifest = ManifestMigrationHandler(dict(resolved_manifest)).apply_migrations() # it's expected that the manifest is the same after the processing assert migrated_manifest == manifest_with_migrated_url_base_and_path_is_joined_to_url diff --git a/unit_tests/sources/declarative/parsers/conftest.py b/unit_tests/sources/declarative/parsers/conftest.py index 6de10170e..4b3312b2f 100644 --- a/unit_tests/sources/declarative/parsers/conftest.py +++ b/unit_tests/sources/declarative/parsers/conftest.py @@ -640,596 +640,3 @@ def expected_manifest_with_url_base_shared_definition_normalized() -> Dict[str, }, }, } - - -@pytest.fixture -def manifest_with_url_base_to_migrate_to_url() -> Dict[str, Any]: - return { - "version": "0.0.0", - "type": "DeclarativeSource", - "check": { - "type": "CheckStream", - "stream_names": ["A"], - }, - "definitions": { - "streams": { - "A": { - "type": "DeclarativeStream", - "name": "A", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "$ref": "#/definitions/requester_A", - "path": "/path_to_A", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/A"}, - }, - }, - "B": { - "type": "DeclarativeStream", - "name": "B", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "$ref": "#/definitions/requester_A", - "path": "path_to_A", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/B"}, - }, - }, - "C": { - "type": "DeclarativeStream", - "name": "C", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "$ref": "#/definitions/requester_B", - "path": "path_to_B", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/C"}, - }, - }, - "D": { - "type": "DeclarativeStream", - "name": "D", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "$ref": "#/definitions/requester_B", - # ! the double-slash is intentional here for the test. - "path": "//path_to_B", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/D"}, - }, - }, - "E": { - "type": "DeclarativeStream", - "name": "E", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "$ref": "#/definitions/requester_B", - "path": "/path_to_B", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/E"}, - }, - }, - }, - # both requesters have duplicated `url_base`, - # which should be migrated to `url` in the new format - # and the `url_base` and `path` key should be removed - "requester_A": { - "type": "HttpRequester", - "url_base": "https://example.com/v1/", - }, - "requester_B": { - "type": "HttpRequester", - "url_base": "https://example.com/v2/", - }, - }, - "streams": [ - {"$ref": "#/definitions/streams/A"}, - {"$ref": "#/definitions/streams/B"}, - {"$ref": "#/definitions/streams/C"}, - {"$ref": "#/definitions/streams/D"}, - {"$ref": "#/definitions/streams/E"}, - ], - "schemas": { - "A": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": { - "field_a1": { - "type": "string", - }, - }, - }, - "B": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": { - "field_b1": { - "type": "string", - }, - }, - }, - "C": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": { - "field_c1": { - "type": "string", - }, - }, - }, - "D": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": { - "field_d1": { - "type": "string", - }, - }, - }, - "E": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": { - "field_e1": { - "type": "string", - }, - }, - }, - }, - } - - -@pytest.fixture -def expected_manifest_with_url_base_migrated_to_url() -> Dict[str, Any]: - return { - "version": "0.0.0", - "type": "DeclarativeSource", - "check": {"type": "CheckStream", "stream_names": ["A"]}, - "definitions": { - "streams": { - "A": { - "type": "DeclarativeStream", - "name": "A", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "http_method": "GET", - "url": "https://example.com/v1/path_to_A", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_a1": {"type": "string"}}, - }, - }, - }, - "B": { - "type": "DeclarativeStream", - "name": "B", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "http_method": "GET", - "url": "https://example.com/v1/path_to_A", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_b1": {"type": "string"}}, - }, - }, - }, - "C": { - "type": "DeclarativeStream", - "name": "C", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "http_method": "GET", - "url": "https://example.com/v2/path_to_B", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_c1": {"type": "string"}}, - }, - }, - }, - "D": { - "type": "DeclarativeStream", - "name": "D", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "http_method": "GET", - "url": "https://example.com/v2/path_to_B", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_d1": {"type": "string"}}, - }, - }, - }, - "E": { - "type": "DeclarativeStream", - "name": "E", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "http_method": "GET", - "url": "https://example.com/v2/path_to_B", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_e1": {"type": "string"}}, - }, - }, - }, - }, - "requester_A": {"type": "HttpRequester", "url": "https://example.com/v1/"}, - "requester_B": {"type": "HttpRequester", "url": "https://example.com/v2/"}, - }, - "streams": [ - { - "type": "DeclarativeStream", - "name": "A", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "http_method": "GET", - "url": "https://example.com/v1/path_to_A", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_a1": {"type": "string"}}, - }, - }, - }, - { - "type": "DeclarativeStream", - "name": "B", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "http_method": "GET", - "url": "https://example.com/v1/path_to_A", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_b1": {"type": "string"}}, - }, - }, - }, - { - "type": "DeclarativeStream", - "name": "C", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "http_method": "GET", - "url": "https://example.com/v2/path_to_B", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_c1": {"type": "string"}}, - }, - }, - }, - { - "type": "DeclarativeStream", - "name": "D", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "http_method": "GET", - "url": "https://example.com/v2/path_to_B", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_d1": {"type": "string"}}, - }, - }, - }, - { - "type": "DeclarativeStream", - "name": "E", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "http_method": "GET", - "url": "https://example.com/v2/path_to_B", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_e1": {"type": "string"}}, - }, - }, - }, - ], - "schemas": { - "A": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_a1": {"type": "string"}}, - }, - "B": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_b1": {"type": "string"}}, - }, - "C": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_c1": {"type": "string"}}, - }, - "D": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_d1": {"type": "string"}}, - }, - "E": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_e1": {"type": "string"}}, - }, - }, - } - - -@pytest.fixture -def manifest_with_migrated_url_base_and_path_is_joined_to_url() -> Dict[str, Any]: - return { - "version": "0.0.0", - "type": "DeclarativeSource", - "check": {"type": "CheckStream", "stream_names": ["A"]}, - "definitions": {}, - "streams": [ - { - "type": "DeclarativeStream", - "name": "A", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "http_method": "GET", - "url": "https://example.com/v1/path_to_A", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_a1": {"type": "string"}}, - }, - }, - }, - { - "type": "DeclarativeStream", - "name": "B", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "http_method": "GET", - "url": "https://example.com/v2/path_to_B", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_b1": {"type": "string"}}, - }, - }, - }, - { - "type": "DeclarativeStream", - "name": "C", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "http_method": "GET", - "url": "https://example.com/v2/path_to_B", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_c1": {"type": "string"}}, - }, - }, - }, - ], - "schemas": { - "A": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_a1": {"type": "string"}}, - }, - "B": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_b1": {"type": "string"}}, - }, - "C": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {"field_c1": {"type": "string"}}, - }, - }, - } From 424ddbcf9adb83c322b782f2f50450733e1de5a0 Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Fri, 11 Apr 2025 14:49:53 +0300 Subject: [PATCH 05/18] formatted, removed old implementation --- .../migrations/manifest/exceptions.py | 12 + .../migrations/manifest/migration_handler.py | 6 +- .../declarative/parsers/custom_exceptions.py | 9 - .../parsers/manifest_migration_handler.py | 259 ------------------ 4 files changed, 15 insertions(+), 271 deletions(-) create mode 100644 airbyte_cdk/sources/declarative/migrations/manifest/exceptions.py delete mode 100644 airbyte_cdk/sources/declarative/parsers/manifest_migration_handler.py diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/exceptions.py b/airbyte_cdk/sources/declarative/migrations/manifest/exceptions.py new file mode 100644 index 000000000..7a140706f --- /dev/null +++ b/airbyte_cdk/sources/declarative/migrations/manifest/exceptions.py @@ -0,0 +1,12 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +class ManifestMigrationException(Exception): + """ + Raised when a migration error occurs in the manifest. + """ + + def __init__(self, message: str) -> None: + super().__init__(f"Failed to migrate the manifest: {message}") diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migration_handler.py b/airbyte_cdk/sources/declarative/migrations/manifest/migration_handler.py index 664c02681..0b64eaa7c 100644 --- a/airbyte_cdk/sources/declarative/migrations/manifest/migration_handler.py +++ b/airbyte_cdk/sources/declarative/migrations/manifest/migration_handler.py @@ -5,6 +5,9 @@ import copy from typing import Type +from airbyte_cdk.sources.declarative.migrations.manifest.exceptions import ( + ManifestMigrationException, +) from airbyte_cdk.sources.declarative.migrations.manifest.manifest_migration import ( ManifestMigration, ManifestType, @@ -12,9 +15,6 @@ from airbyte_cdk.sources.declarative.migrations.manifest.migrations_registry import ( migrations_registry, ) -from airbyte_cdk.sources.declarative.parsers.custom_exceptions import ( - ManifestMigrationException, -) class ManifestMigrationHandler: diff --git a/airbyte_cdk/sources/declarative/parsers/custom_exceptions.py b/airbyte_cdk/sources/declarative/parsers/custom_exceptions.py index 79700e269..6c5847d3f 100644 --- a/airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +++ b/airbyte_cdk/sources/declarative/parsers/custom_exceptions.py @@ -28,12 +28,3 @@ class ManifestNormalizationException(Exception): def __init__(self, message: str) -> None: super().__init__(f"Failed to deduplicate manifest: {message}") - - -class ManifestMigrationException(Exception): - """ - Raised when a migration error occurs in the manifest. - """ - - def __init__(self, message: str) -> None: - super().__init__(f"Failed to migrate the manifest: {message}") diff --git a/airbyte_cdk/sources/declarative/parsers/manifest_migration_handler.py b/airbyte_cdk/sources/declarative/parsers/manifest_migration_handler.py deleted file mode 100644 index 1639b9d32..000000000 --- a/airbyte_cdk/sources/declarative/parsers/manifest_migration_handler.py +++ /dev/null @@ -1,259 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - -import copy -from typing import Any, Callable, Dict, List, Optional, Tuple -from urllib.parse import urljoin - -from airbyte_cdk.sources.declarative.parsers.custom_exceptions import ManifestMigrationException -from airbyte_cdk.sources.types import EmptyString - -# Type definitions for better readability -ManifestType = Dict[str, Any] -DefinitionsType = Dict[str, Any] -MigrationType = Tuple[str, str, Optional[str]] -MigrationsType = List[MigrationType] -MigratedTagsType = Dict[str, MigrationsType] -MigrationFunctionType = Callable[[Any, MigrationType], None] - - -# Configuration constants -TYPE_TAG = "type" -DEF_TAG = "definitions" -MIGRATIONS_TAG = "migrations" -MIGRATIONS_STACK_TAG = "stack" -ORIGINAL_KEY = "original_key" -REPLACEMENT_KEY = "replacement_key" - - -# disable migrations for these types -NON_MIGRATABLE_TYPES = [ - "DynamicDeclarativeStream", -] - - -class ManifestMigrationHandler: - """ - This class is responsible for handling migrations in the manifest. - It provides methods to migrate migrated fields and values to their new equivalents. - """ - - @property - def _migration_type_mapping(self) -> Dict[str, MigrationFunctionType]: - """ - Returns a mapping of migration types to their handler functions. - - This method defines how different types of migrations should be handled by mapping - migration type identifiers to their corresponding handler methods. - - Returns: - Dict[str, MigrationFunctionType]: A dictionary mapping migration types to handler functions: - - "replace_field": Handler for replacing migrated fields with new ones - - "remove_field": Handler for removing migrated fields - - "handle_url_parts": Handler for processing URL parts - """ - - return { - "replace_field": self._replace_migrated_field, - "remove_field": self._remove_migrated_field, - # the specific type of migration, handles the url parts and verifies the url is correct - "handle_url_parts": self._handle_url_parts, - } - - def __init__( - self, - manifest: ManifestType, - declarative_schema: DefinitionsType, - ) -> None: - self._manifest = manifest - self._declarative_schema = declarative_schema - self._migrated_manifest: ManifestType = copy.deepcopy(self._manifest) - # get the declared migrations from schema - self._migration_tags = self._get_migration_schema_tags(self._declarative_schema) - - def migrate(self) -> ManifestType: - """ - Migrates the manifest by applying configured migrations to different component types. - - This method iterates through all registered component types and their associated - migrations in `_migration_tags`, applying them sequentially by calling - `_handle_migrations` for each component type. - - Returns: - ManifestType: The migrated manifest if migration succeeds, or the original - manifest if a ManifestMigrationException occurs during migration. - """ - try: - for component_type, migrations in self._migration_tags.items(): - self._handle_migrations(component_type, migrations) - return self._migrated_manifest - except ManifestMigrationException: - # if any errors occur we return the original resolved manifest - return self._manifest - - def _get_migration_schema_tags(self, schema: DefinitionsType) -> MigratedTagsType: - """ - Extracts sharable tags from schema definitions. - This function identifies properties within a schema's definitions that have the `migrations` object. - - Args: - schema (DefinitionsType): The schema definition dictionary to process - - Returns: - migrations_tags: A set of migrated tags found in the schema definitions. - """ - - # the migrated tags scope: ['definitions.*'] - schema_definitions = schema.get(DEF_TAG, {}) - migrations_tags: MigratedTagsType = {} - - for component_name, component_declaration in schema_definitions.items(): - if MIGRATIONS_TAG in component_declaration.keys(): - migrations_tags[component_name] = [] - - for migration in component_declaration[MIGRATIONS_TAG]: - # register the stack of migrations - if migration[TYPE_TAG] == MIGRATIONS_STACK_TAG: - for migration in migration[MIGRATIONS_TAG]: - self._register_migration(migrations_tags, component_name, migration) - # register a single migration - else: - self._register_migration(migrations_tags, component_name, migration) - - return migrations_tags - - def _register_migration( - self, - migrations_tags: MigratedTagsType, - component_name: str, - migration: Dict[str, Any], - ) -> None: - """ - Registers the migration in the migrations_tags dictionary. - """ - - migrations_tags[component_name].append( - ( - migration[TYPE_TAG], # type of migration - migration[ORIGINAL_KEY], # what is the migrated key - migration.get(REPLACEMENT_KEY), # (optional) what is the new key to be used - ), - ) - - def _handle_migrations( - self, - component_type: str, - migrations: MigrationsType, - ) -> None: - """ - Recursively replaces all occurrences of migrated_key with new_key in the normalized manifest. - - The structure of the `migration` Tuple is: - ( - migration[TYPE_TAG] -- type of migration, - migration["original_key"] -- what is the migrated key, - migration["replacement_key"] -- what is the new key to be used, - ) - """ - try: - for migration in migrations: - self._process_migration(self._migrated_manifest, component_type, migration) - except Exception as e: - raise ManifestMigrationException(f"Failed to migrate the manifest: {e}") from e - - def _process_migration(self, obj: Any, component_type: str, migration: MigrationType) -> None: - """ - Process a migration rule by recursively traversing through a nested data structure. - - This method applies migrations to components of a specified type that contain the migrated key. - It recursively processes dictionaries and lists, looking for components that match the criteria. - Migration is skipped for component types listed in NON_MIGRATABLE_TYPES. - - Args: - obj: The object to process, which can be a dictionary, list, or other type. - component_type: The type of component to apply the migration to. - migration: A tuple containing migration type, migrated key, and additional migration info. - - Returns: - None - """ - migration_type, migrated_key, _ = migration - - if isinstance(obj, dict): - obj_keys = obj.keys() - - # check for component type match the designed migration - if TYPE_TAG in obj_keys: - obj_type = obj[TYPE_TAG] - - # do not migrate if the particular type is in the list of non-migratable types - if obj_type in NON_MIGRATABLE_TYPES: - return - - if obj_type == component_type and migrated_key in obj_keys: - if migration_type in self._migration_type_mapping.keys(): - # Call the appropriate function based on the migration type - self._migration_type_mapping[migration_type](obj, migration) - - # Process all values in the dictionary - for v in list(obj.values()): - self._process_migration(v, component_type, migration) - - elif isinstance(obj, list): - # Process all items in the list - for item in obj: - self._process_migration(item, component_type, migration) - - ## Migration Functions - def _replace_migrated_field( - self, - obj: Any, - migration: MigrationType, - ) -> None: - """ - Replaces the migrated field with the new field in the object. - The value of the migrated field is copied to the new field. - """ - _, original_key, replacement_key = migration - - obj[replacement_key] = obj[original_key] - obj.pop(original_key, None) - - def _handle_url_parts( - self, - obj: Any, - migration: MigrationType, - ) -> None: - """ - Handles the migration of URL parts by joining the original key with the replacement key. - The value of the original key is joined with the replacement key to form a full URL. - """ - _, original_key, replacement_key = migration - - original_key_value = obj[original_key].lstrip("/") - replacement_key_value = obj[replacement_key] - - # return a full-url if provided directly from interpolation context - if original_key_value == EmptyString or original_key_value is None: - obj[replacement_key] = replacement_key_value - else: - # since we didn't provide a full-url, the url_base might not have a trailing slash - # so we join the url_base and path correctly - if not replacement_key_value.endswith("/"): - replacement_key_value += "/" - - obj[replacement_key] = urljoin(replacement_key_value, original_key_value) - - def _remove_migrated_field( - self, - obj: Any, - migration: MigrationType, - ) -> None: - """ - Removes the migrated field from the object. - The value of the migrated field is neglected. - """ - _, original_key, _ = migration - - obj.pop(original_key, None) From 475ea83947ec781a2bcb2f7d393e221a4173eefd Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Fri, 11 Apr 2025 14:51:54 +0300 Subject: [PATCH 06/18] removed custom non-related exception --- .../sources/declarative/parsers/custom_exceptions.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/custom_exceptions.py b/airbyte_cdk/sources/declarative/parsers/custom_exceptions.py index 6c5847d3f..d6fdee695 100644 --- a/airbyte_cdk/sources/declarative/parsers/custom_exceptions.py +++ b/airbyte_cdk/sources/declarative/parsers/custom_exceptions.py @@ -19,12 +19,3 @@ class UndefinedReferenceException(Exception): def __init__(self, path: str, reference: str) -> None: super().__init__(f"Undefined reference {reference} from {path}") - - -class ManifestNormalizationException(Exception): - """ - Raised when a circular reference is detected in a manifest. - """ - - def __init__(self, message: str) -> None: - super().__init__(f"Failed to deduplicate manifest: {message}") From fd5d69624db7d7544b915fffacd20f904b5d5d51 Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Fri, 11 Apr 2025 14:54:59 +0300 Subject: [PATCH 07/18] removed unused conftest --- .../sources/declarative/parsers/conftest.py | 642 ------------------ 1 file changed, 642 deletions(-) delete mode 100644 unit_tests/sources/declarative/parsers/conftest.py diff --git a/unit_tests/sources/declarative/parsers/conftest.py b/unit_tests/sources/declarative/parsers/conftest.py deleted file mode 100644 index 4b3312b2f..000000000 --- a/unit_tests/sources/declarative/parsers/conftest.py +++ /dev/null @@ -1,642 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - -from typing import Any, Dict - -import pytest - - -@pytest.fixture -def manifest_with_multiple_url_base() -> Dict[str, Any]: - return { - "type": "DeclarativeSource", - "definitions": { - "streams": { - "A": { - "type": "DeclarativeStream", - "name": "A", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "$ref": "#/definitions/requester_A", - "path": "A", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - "decoder": {"type": "JsonDecoder"}, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/A"}, - }, - }, - "B": { - "type": "DeclarativeStream", - "name": "B", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "$ref": "#/definitions/requester_B", - "path": "B", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - "decoder": {"type": "JsonDecoder"}, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/B"}, - }, - }, - "C": { - "type": "DeclarativeStream", - "name": "C", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "$ref": "#/definitions/requester_A", - "path": "C", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - "decoder": {"type": "JsonDecoder"}, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/C"}, - }, - }, - "D": { - "type": "DeclarativeStream", - "name": "D", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "$ref": "#/definitions/requester_B", - "path": "D", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - "decoder": {"type": "JsonDecoder"}, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/D"}, - }, - }, - "E": { - "type": "DeclarativeStream", - "name": "E", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "$ref": "#/definitions/requester_B", - "path": "E", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - "decoder": {"type": "JsonDecoder"}, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/E"}, - }, - }, - }, - # dummy requesters to be resolved and deduplicated - # to the shared `url_base` in the `definitions.shared` section - "requester_A": { - "type": "HttpRequester", - "url_base": "https://example.com/v1/", - }, - "requester_B": { - "type": "HttpRequester", - "url_base": "https://example.com/v2/", - }, - }, - "streams": [ - {"$ref": "#/definitions/streams/A"}, - {"$ref": "#/definitions/streams/B"}, - {"$ref": "#/definitions/streams/C"}, - {"$ref": "#/definitions/streams/D"}, - {"$ref": "#/definitions/streams/E"}, - ], - "schemas": { - "A": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {}, - }, - "B": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {}, - }, - "C": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {}, - }, - "D": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {}, - }, - "E": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {}, - }, - }, - } - - -@pytest.fixture -def expected_manifest_with_multiple_url_base_normalized() -> Dict[str, Any]: - return { - "type": "DeclarativeSource", - "definitions": {"shared": {"HttpRequester": {"url_base": "https://example.com/v2/"}}}, - "streams": [ - { - "type": "DeclarativeStream", - "name": "A", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "url_base": "https://example.com/v1/", - "path": "A", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - "decoder": {"type": "JsonDecoder"}, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/A"}, - }, - }, - { - "type": "DeclarativeStream", - "name": "B", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "url_base": {"$ref": "#/definitions/shared/HttpRequester/url_base"}, - "path": "B", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - "decoder": {"type": "JsonDecoder"}, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/B"}, - }, - }, - { - "type": "DeclarativeStream", - "name": "C", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "url_base": "https://example.com/v1/", - "path": "C", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - "decoder": {"type": "JsonDecoder"}, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/C"}, - }, - }, - { - "type": "DeclarativeStream", - "name": "D", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "url_base": {"$ref": "#/definitions/shared/HttpRequester/url_base"}, - "path": "D", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - "decoder": {"type": "JsonDecoder"}, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/D"}, - }, - }, - { - "type": "DeclarativeStream", - "name": "E", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "url_base": {"$ref": "#/definitions/shared/HttpRequester/url_base"}, - "path": "E", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - "decoder": {"type": "JsonDecoder"}, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/E"}, - }, - }, - ], - "schemas": { - "A": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {}, - }, - "B": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {}, - }, - "C": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {}, - }, - "D": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {}, - }, - "E": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {}, - }, - }, - } - - -@pytest.fixture -def manifest_with_url_base_shared_definition() -> Dict[str, Any]: - return { - "type": "DeclarativeSource", - "definitions": { - "shared": {"HttpRequester": {"url_base": "https://example.com/v2/"}}, - "streams": { - "A": { - "type": "DeclarativeStream", - "name": "A", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "$ref": "#/definitions/requester_A", - "path": "A", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - "decoder": {"type": "JsonDecoder"}, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/A"}, - }, - }, - "B": { - "type": "DeclarativeStream", - "name": "B", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "$ref": "#/definitions/requester_B", - "path": "B", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - "decoder": {"type": "JsonDecoder"}, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/B"}, - }, - }, - "C": { - "type": "DeclarativeStream", - "name": "C", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "$ref": "#/definitions/requester_A", - "path": "C", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - "decoder": {"type": "JsonDecoder"}, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/C"}, - }, - }, - "D": { - "type": "DeclarativeStream", - "name": "D", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "$ref": "#/definitions/requester_B", - "path": "D", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - "decoder": {"type": "JsonDecoder"}, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/D"}, - }, - }, - "E": { - "type": "DeclarativeStream", - "name": "E", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "$ref": "#/definitions/requester_B", - "path": "E", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - "decoder": {"type": "JsonDecoder"}, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/E"}, - }, - }, - }, - # dummy requesters to be resolved and deduplicated - # to the shared `url_base` in the `definitions.shared` section - "requester_A": { - "type": "HttpRequester", - "url_base": "https://example.com/v1/", - }, - "requester_B": { - "type": "HttpRequester", - "url_base": {"$ref": "#/definitions/shared/HttpRequester/url_base"}, - }, - }, - "streams": [ - {"$ref": "#/definitions/streams/A"}, - {"$ref": "#/definitions/streams/B"}, - {"$ref": "#/definitions/streams/C"}, - {"$ref": "#/definitions/streams/D"}, - {"$ref": "#/definitions/streams/E"}, - ], - "schemas": { - "A": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {}, - }, - "B": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {}, - }, - "C": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {}, - }, - "D": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {}, - }, - "E": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {}, - }, - }, - } - - -@pytest.fixture -def expected_manifest_with_url_base_shared_definition_normalized() -> Dict[str, Any]: - return { - "type": "DeclarativeSource", - "definitions": {"shared": {"HttpRequester": {"url_base": "https://example.com/v2/"}}}, - "streams": [ - { - "type": "DeclarativeStream", - "name": "A", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "url_base": "https://example.com/v1/", - "path": "A", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - "decoder": {"type": "JsonDecoder"}, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/A"}, - }, - }, - { - "type": "DeclarativeStream", - "name": "B", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "url_base": {"$ref": "#/definitions/shared/HttpRequester/url_base"}, - "path": "B", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - "decoder": {"type": "JsonDecoder"}, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/B"}, - }, - }, - { - "type": "DeclarativeStream", - "name": "C", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "url_base": "https://example.com/v1/", - "path": "C", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - "decoder": {"type": "JsonDecoder"}, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/C"}, - }, - }, - { - "type": "DeclarativeStream", - "name": "D", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "url_base": {"$ref": "#/definitions/shared/HttpRequester/url_base"}, - "path": "D", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - "decoder": {"type": "JsonDecoder"}, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/D"}, - }, - }, - { - "type": "DeclarativeStream", - "name": "E", - "retriever": { - "type": "SimpleRetriever", - "requester": { - "type": "HttpRequester", - "url_base": {"$ref": "#/definitions/shared/HttpRequester/url_base"}, - "path": "E", - "http_method": "GET", - }, - "record_selector": { - "type": "RecordSelector", - "extractor": {"type": "DpathExtractor", "field_path": []}, - }, - "decoder": {"type": "JsonDecoder"}, - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": {"$ref": "#/schemas/E"}, - }, - }, - ], - "schemas": { - "A": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {}, - }, - "B": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {}, - }, - "C": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {}, - }, - "D": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {}, - }, - "E": { - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "additionalProperties": True, - "properties": {}, - }, - }, - } From 403a41ff633e650a15902f8532168852e9ef97f0 Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Fri, 11 Apr 2025 17:24:48 +0300 Subject: [PATCH 08/18] formatted --- .../migrations/manifest/manifest_migration.py | 36 ++++++++++++++----- .../http_requester_path_to_url_migration.py | 6 ++++ ...ttp_requester_url_base_to_url_migration.py | 6 ++++ 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/manifest_migration.py b/airbyte_cdk/sources/declarative/migrations/manifest/manifest_migration.py index 5d70dfb83..963b17364 100644 --- a/airbyte_cdk/sources/declarative/migrations/manifest/manifest_migration.py +++ b/airbyte_cdk/sources/declarative/migrations/manifest/manifest_migration.py @@ -34,6 +34,24 @@ def migrate(self, manifest: ManifestType) -> None: :param kwargs: Additional arguments for migration """ + def _is_component(self, obj: Dict[str, Any]) -> bool: + """ + Check if the object is a component. + + :param obj: The object to check + :return: True if the object is a component, False otherwise + """ + return TYPE_TAG in obj.keys() + + def _is_migratable(self, obj: Dict[str, Any]) -> bool: + """ + Check if the object is a migratable component. + + :param obj: The object to check + :return: True if the object is a migratable component, False otherwise + """ + return obj[TYPE_TAG] not in NON_MIGRATABLE_TYPES + def _process_manifest(self, obj: Any) -> None: """ Recursively processes a manifest object, migrating components that match the migration criteria. @@ -54,21 +72,21 @@ def _process_manifest(self, obj: Any) -> None: None, since we process the manifest in place. """ if isinstance(obj, dict): - obj_keys = obj.keys() - # check for component type match the designed migration - if TYPE_TAG in obj_keys: - obj_type = obj[TYPE_TAG] - - # do not migrate if the particular type is in the list of non-migratable types - if obj_type in NON_MIGRATABLE_TYPES: + # Check if the object is a component + if self._is_component(obj): + # Check if the object is allowed to be migrated + if not self._is_migratable(obj): return + # Check if the object should be migrated if self.should_migrate(obj): + # Perform the migration, if needed self.migrate(obj) # Process all values in the dictionary - for v in list(obj.values()): - self._process_manifest(v) + for value in list(obj.values()): + self._process_manifest(value) + elif isinstance(obj, list): # Process all items in the list for item in obj: diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_path_to_url_migration.py b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_path_to_url_migration.py index b7abb7c15..cebaa4e35 100644 --- a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_path_to_url_migration.py +++ b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_path_to_url_migration.py @@ -9,6 +9,12 @@ class HttpRequesterPathToUrlMigration(ManifestMigration): + """ + This migration is responsible for migrating the `path` key to `url` in the HttpRequester component. + The `path` key is expected to be a relative path, and the `url` key is expected to be a full URL. + The migration will concatenate the `url_base` and `path` to form a full URL. + """ + component_type = "HttpRequester" original_key = "path" replacement_key = "url" diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_url_base_to_url_migration.py b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_url_base_to_url_migration.py index 93a171176..2455dc198 100644 --- a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_url_base_to_url_migration.py +++ b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_url_base_to_url_migration.py @@ -6,6 +6,12 @@ class HttpRequesterUrlBaseToUrlMigration(ManifestMigration): + """ + This migration is responsible for migrating the `url_base` key to `url` in the HttpRequester component. + The `url_base` key is expected to be a base URL, and the `url` key is expected to be a full URL. + The migration will copy the value of `url_base` to `url`. + """ + component_type = "HttpRequester" original_key = "url_base" replacement_key = "url" From 046c308b5e467d9a9f7c3213c144a630b25630ec Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Mon, 14 Apr 2025 12:32:36 +0300 Subject: [PATCH 09/18] cleaned up --- .../parsers/model_to_component_factory.py | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index e87caff5d..7730a2857 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -2862,6 +2862,25 @@ def create_simple_retriever( use_cache: Optional[bool] = None, **kwargs: Any, ) -> SimpleRetriever: + def _get_url() -> str: + """ + Closure to get the URL from the requester. This is used to get the URL in the case of a lazy retriever. + This is needed because the URL is not set until the requester is created. + """ + + _url = ( + model.requester.url + if hasattr(model.requester, "url") and model.requester.url is not None + else requester.get_url() + ) + _url_base = ( + model.requester.url_base + if hasattr(model.requester, "url_base") and model.requester.url_base is not None + else requester.get_url_base() + ) + + return _url or _url_base + decoder = ( self._create_component_from_model(model=model.decoder, config=config) if model.decoder @@ -2926,17 +2945,6 @@ def create_simple_retriever( use_cache=use_cache, config=config, ) - _url = ( - model.requester.url - if hasattr(model.requester, "url") and model.requester.url is not None - else requester.get_url() - ) - _url_base = ( - model.requester.url_base - if hasattr(model.requester, "url_base") and model.requester.url_base is not None - else requester.get_url_base() - ) - url_base = _url or _url_base # Define cursor only if per partition or common incremental support is needed cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None @@ -2960,7 +2968,7 @@ def create_simple_retriever( self._create_component_from_model( model=model.paginator, config=config, - url_base=url_base, + url_base=_get_url(), decoder=decoder, cursor_used_for_stop_condition=cursor_used_for_stop_condition, ) From 54bf3640e6e8881ab3791cc336329a759bb8540c Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Mon, 14 Apr 2025 21:08:14 +0300 Subject: [PATCH 10/18] emit deprecation warnings in Connector Builder --- .../connector_builder/test_reader/reader.py | 25 ++++- .../sources/declarative/declarative_source.py | 11 ++- .../manifest_declarative_source.py | 23 +---- .../models/base_model_with_deprecations.py | 91 ++++++++++++------- .../parsers/model_to_component_factory.py | 26 +++++- .../test_connector_builder_handler.py | 5 +- 6 files changed, 124 insertions(+), 57 deletions(-) diff --git a/airbyte_cdk/connector_builder/test_reader/reader.py b/airbyte_cdk/connector_builder/test_reader/reader.py index b776811eb..2b56524ed 100644 --- a/airbyte_cdk/connector_builder/test_reader/reader.py +++ b/airbyte_cdk/connector_builder/test_reader/reader.py @@ -110,11 +110,16 @@ def run_test_read( record_limit = self._check_record_limit(record_limit) # The connector builder currently only supports reading from a single stream at a time stream = source.streams(config)[0] + + # get any deprecation warnings during the component creation + deprecation_warnings: List[AirbyteLogMessage] = source.deprecation_warnings() + schema_inferrer = SchemaInferrer( self._pk_to_nested_and_composite_field(stream.primary_key), self._cursor_field_to_nested_and_composite_field(stream.cursor_field), ) datetime_format_inferrer = DatetimeFormatInferrer() + message_group = get_message_groups( self._read_stream(source, config, configured_catalog, state), schema_inferrer, @@ -123,7 +128,7 @@ def run_test_read( ) slices, log_messages, auxiliary_requests, latest_config_update = self._categorise_groups( - message_group + message_group, deprecation_warnings ) schema, log_messages = self._get_infered_schema( configured_catalog, schema_inferrer, log_messages @@ -236,7 +241,11 @@ def _check_record_limit(self, record_limit: Optional[int] = None) -> int: return record_limit - def _categorise_groups(self, message_groups: MESSAGE_GROUPS) -> GROUPED_MESSAGES: + def _categorise_groups( + self, + message_groups: MESSAGE_GROUPS, + deprecation_warnings: Optional[List[Any]] = None, + ) -> GROUPED_MESSAGES: """ Categorizes a sequence of message groups into slices, log messages, auxiliary requests, and the latest configuration update. @@ -267,6 +276,7 @@ def _categorise_groups(self, message_groups: MESSAGE_GROUPS) -> GROUPED_MESSAGES auxiliary_requests = [] latest_config_update: Optional[AirbyteControlMessage] = None + # process the message groups first for message_group in message_groups: match message_group: case AirbyteLogMessage(): @@ -296,6 +306,17 @@ def _categorise_groups(self, message_groups: MESSAGE_GROUPS) -> GROUPED_MESSAGES case _: raise ValueError(f"Unknown message group type: {type(message_group)}") + # process deprecation warnings, if present + if deprecation_warnings is not None: + for deprecation in deprecation_warnings: + match deprecation: + case AirbyteLogMessage(): + log_messages.append( + LogMessage(message=deprecation.message, level=deprecation.level.value) + ) + case _: + raise ValueError(f"Unknown message group type: {type(deprecation)}") + return slices, log_messages, auxiliary_requests, latest_config_update def _get_infered_schema( diff --git a/airbyte_cdk/sources/declarative/declarative_source.py b/airbyte_cdk/sources/declarative/declarative_source.py index 77bf427a1..55f425e50 100644 --- a/airbyte_cdk/sources/declarative/declarative_source.py +++ b/airbyte_cdk/sources/declarative/declarative_source.py @@ -4,8 +4,11 @@ import logging from abc import abstractmethod -from typing import Any, Mapping, Tuple +from typing import Any, List, Mapping, Tuple +from airbyte_cdk.models import ( + AirbyteLogMessage, +) from airbyte_cdk.sources.abstract_source import AbstractSource from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker @@ -34,3 +37,9 @@ def check_connection( The error object will be cast to string to display the problem to the user. """ return self.connection_checker.check_connection(self, logger, config) + + def deprecation_warnings(self) -> List[AirbyteLogMessage]: + """ + Returns a list of deprecation warnings for the source. + """ + return [] diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py index 983dae839..1e54ede11 100644 --- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py +++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py @@ -17,6 +17,7 @@ from airbyte_cdk.models import ( AirbyteConnectionStatus, + AirbyteLogMessage, AirbyteMessage, AirbyteStateMessage, ConfiguredAirbyteCatalog, @@ -60,24 +61,6 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException -def _get_declarative_component_schema() -> Dict[str, Any]: - try: - raw_component_schema = pkgutil.get_data( - "airbyte_cdk", "sources/declarative/declarative_component_schema.yaml" - ) - if raw_component_schema is not None: - declarative_component_schema = yaml.load(raw_component_schema, Loader=yaml.SafeLoader) - return declarative_component_schema # type: ignore - else: - raise RuntimeError( - "Failed to read manifest component json schema required for deduplication" - ) - except FileNotFoundError as e: - raise FileNotFoundError( - f"Failed to read manifest component json schema required for deduplication: {e}" - ) - - class ManifestDeclarativeSource(DeclarativeSource): """Declarative source defined by a manifest of low-code components that define source connector behavior""" @@ -100,7 +83,6 @@ def __init__( """ self.logger = logging.getLogger(f"airbyte.{self.name}") - self._declarative_component_schema = _get_declarative_component_schema() # For ease of use we don't require the type to be specified at the top level manifest, but it should be included during processing manifest = dict(source_config) if "type" not in manifest: @@ -151,6 +133,9 @@ def dynamic_streams(self) -> List[Dict[str, Any]]: manifest=self._source_config, config=self._config, with_dynamic_stream_name=True ) + def deprecation_warnings(self) -> List[AirbyteLogMessage]: + return self._constructor.get_model_deprecations() or [] + @property def connection_checker(self) -> ConnectionChecker: check = self._source_config["check"] diff --git a/airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py b/airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py index 50adc38a8..a0fde745b 100644 --- a/airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py +++ b/airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py @@ -4,15 +4,13 @@ # WHEN DEPRECATED FIELDS ARE ACCESSED import warnings -from typing import Any +from typing import Any, List from pydantic.v1 import BaseModel from airbyte_cdk.models import ( AirbyteLogMessage, - AirbyteMessage, Level, - Type, ) # format the warning message @@ -23,48 +21,49 @@ FIELDS_TAG = "__fields__" DEPRECATED = "deprecated" DEPRECATION_MESSAGE = "deprecation_message" +DEPRECATION_LOGS_TAG = "_deprecation_logs" class BaseModelWithDeprecations(BaseModel): """ Pydantic BaseModel that warns when deprecated fields are accessed. + The deprecation message is stored in the field's extra attributes. + This class is used to create models that can have deprecated fields + and show warnings when those fields are accessed or initialized. + + The `_deprecation_logs` attribute is storred in the model itself. + The collected deprecation warnings are further proparated to the Airbyte log messages, + during the component creation process, in `model_to_component._collect_model_deprecations()`. + + The component implementation is not responsible for handling the deprecation warnings, + since the deprecation warnings are already handled in the model itself. """ - def _deprecated_warning(self, field_name: str, message: str) -> None: + class Config: """ - Show a warning message for deprecated fields (to stdout). - Args: - field_name (str): Name of the deprecated field. - message (str): Warning message to be displayed. + Allow extra fields in the model. In case the model restricts extra fields. """ - warnings.warn( - f"Component type: `{self.__class__.__name__}`. Field '{field_name}' is deprecated. {message}", - DeprecationWarning, - ) + extra = "allow" - # print( - # AirbyteMessage( - # type=Type.LOG, - # log=AirbyteLogMessage( - # level=Level.WARN, - # message=f"Component type: `{self.__class__.__name__}`. Field '{field_name}' is deprecated. {message}", - # ), - # ) - # ) + _deprecation_logs: List[AirbyteLogMessage] = [] def __init__(self, **data: Any) -> None: """ Show warnings for deprecated fields during component initialization. """ - model_fields = self.__fields__ for field_name in data: if field_name in model_fields: - if model_fields[field_name].field_info.extra.get(DEPRECATED, False): - message = model_fields[field_name].field_info.extra.get(DEPRECATION_MESSAGE, "") - self._deprecated_warning(field_name, message) + is_deprecated_field = model_fields[field_name].field_info.extra.get( + DEPRECATED, False + ) + if is_deprecated_field: + deprecation_message = model_fields[field_name].field_info.extra.get( + DEPRECATION_MESSAGE, "" + ) + self._deprecated_warning(field_name, deprecation_message) # Call the parent constructor super().__init__(**data) @@ -76,13 +75,39 @@ def __getattribute__(self, name: str) -> Any: value = super().__getattribute__(name) - if name == FIELDS_TAG: - try: - model_fields = super().__getattribute__(FIELDS_TAG) - field_info = model_fields.get(name) - if field_info and field_info.field_info.extra.get(DEPRECATED): - self._deprecated_warning(name, field_info) - except (AttributeError, KeyError): - pass + try: + model_fields = super().__getattribute__(FIELDS_TAG) + field_info = model_fields.get(name) + is_deprecated_field = ( + field_info.field_info.extra.get(DEPRECATED, False) if field_info else False + ) + if is_deprecated_field: + deprecation_message = field_info.extra.get(DEPRECATION_MESSAGE, "") + self._deprecated_warning(name, deprecation_message) + except (AttributeError, KeyError): + pass return value + + def _deprecated_warning(self, field_name: str, message: str) -> None: + """ + Show a warning message for deprecated fields (to stdout). + Args: + field_name (str): Name of the deprecated field. + message (str): Warning message to be displayed. + """ + + # Emit a warning message for deprecated fields (to stdout) (Python Default behavior) + warnings.warn( + f"Component type: `{self.__class__.__name__}`. Field '{field_name}' is deprecated. {message}", + DeprecationWarning, + ) + + # Add the deprecation message to the Airbyte log messages, + # this logs are displayed in the Connector Builder. + self._deprecation_logs.append( + AirbyteLogMessage( + level=Level.WARN, + message=f"Component type: `{self.__class__.__name__}`. Field '{field_name}' is deprecated. {message}", + ), + ) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 7730a2857..958ea6055 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -27,7 +27,7 @@ from isodate import parse_duration from pydantic.v1 import BaseModel -from airbyte_cdk.models import FailureType, Level +from airbyte_cdk.models import AirbyteLogMessage, FailureType, Level from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator from airbyte_cdk.sources.declarative.async_job.job_tracker import JobTracker @@ -108,6 +108,10 @@ CustomStateMigration, GzipDecoder, ) +from airbyte_cdk.sources.declarative.models.base_model_with_deprecations import ( + DEPRECATION_LOGS_TAG, + BaseModelWithDeprecations, +) from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( AddedFieldDefinition as AddedFieldDefinitionModel, ) @@ -583,6 +587,8 @@ def __init__( self._connector_state_manager = connector_state_manager or ConnectorStateManager() self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None self._job_tracker: JobTracker = JobTracker(max_concurrent_async_job_count or 1) + # placeholder for deprecation warnings + self._deprecation_logs: List[AirbyteLogMessage] = [] def _init_mappings(self) -> None: self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = { @@ -729,8 +735,26 @@ def _create_component_from_model(self, model: BaseModel, config: Config, **kwarg component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__) if not component_constructor: raise ValueError(f"Could not find constructor for {model.__class__}") + + # collect deprecation warnings for supported models. + if isinstance(model, BaseModelWithDeprecations): + self._collect_model_deprecations(model) + return component_constructor(model=model, config=config, **kwargs) + def get_model_deprecations(self) -> List[Any]: + """ + Returns the deprecation warnings that were collected during the creation of components. + """ + return self._deprecation_logs + + def _collect_model_deprecations(self, model: BaseModelWithDeprecations) -> None: + if hasattr(model, DEPRECATION_LOGS_TAG) and model._deprecation_logs is not None: + for log in model._deprecation_logs: + # avoid duplicates for deprecation logs observed. + if log not in self._deprecation_logs: + self._deprecation_logs.append(log) + @staticmethod def create_added_field_definition( model: AddedFieldDefinitionModel, config: Config, **kwargs: Any diff --git a/unit_tests/connector_builder/test_connector_builder_handler.py b/unit_tests/connector_builder/test_connector_builder_handler.py index 3af646597..54dc403b6 100644 --- a/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/unit_tests/connector_builder/test_connector_builder_handler.py @@ -7,7 +7,7 @@ import json import logging import os -from typing import Literal +from typing import List, Literal from unittest import mock from unittest.mock import MagicMock, patch @@ -817,6 +817,9 @@ def spec(self, logger: logging.Logger) -> ConnectorSpecification: connector_specification.connectionSpecification = {} return connector_specification + def deprecation_warnings(self) -> List[AirbyteLogMessage]: + return [] + @property def check_config_against_spec(self) -> Literal[False]: return False From b2a49d35c2d9239262fa226cda98944594a5074d Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Tue, 15 Apr 2025 14:35:29 +0300 Subject: [PATCH 11/18] added version checks for manifest migrations. Added auto-import for all migrations available --- .../migrations/manifest/manifest_migration.py | 48 ++++++++++++++- .../migrations/manifest/migration_handler.py | 6 +- ...tp_requester_url_base_to_url_migration.py} | 2 +- ...2_http_requester_path_to_url_migration.py} | 2 +- .../manifest/migrations/__init__.py | 11 ---- .../manifest/migrations_registry.py | 58 +++++++++++++++---- 6 files changed, 98 insertions(+), 29 deletions(-) rename airbyte_cdk/sources/declarative/migrations/manifest/migrations/{http_requester_url_base_to_url_migration.py => 0_v6_45_2_http_requester_url_base_to_url_migration.py} (91%) rename airbyte_cdk/sources/declarative/migrations/manifest/migrations/{http_requester_path_to_url_migration.py => 1_v6_45_2_http_requester_path_to_url_migration.py} (95%) diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/manifest_migration.py b/airbyte_cdk/sources/declarative/migrations/manifest/manifest_migration.py index 963b17364..3f51a9df5 100644 --- a/airbyte_cdk/sources/declarative/migrations/manifest/manifest_migration.py +++ b/airbyte_cdk/sources/declarative/migrations/manifest/manifest_migration.py @@ -1,5 +1,6 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. +import re from abc import abstractmethod from typing import Any, Dict @@ -34,6 +35,15 @@ def migrate(self, manifest: ManifestType) -> None: :param kwargs: Additional arguments for migration """ + @property + def migration_version(self) -> str: + """ + Get the migration version. + + :return: The migration version as a string + """ + return self._get_migration_version() + def _is_component(self, obj: Dict[str, Any]) -> bool: """ Check if the object is a component. @@ -45,12 +55,16 @@ def _is_component(self, obj: Dict[str, Any]) -> bool: def _is_migratable(self, obj: Dict[str, Any]) -> bool: """ - Check if the object is a migratable component. + Check if the object is a migratable component, + based on the Type of the component and the migration version. :param obj: The object to check :return: True if the object is a migratable component, False otherwise """ - return obj[TYPE_TAG] not in NON_MIGRATABLE_TYPES + return ( + obj[TYPE_TAG] not in NON_MIGRATABLE_TYPES + and self._get_manifest_version(obj) <= self.migration_version + ) def _process_manifest(self, obj: Any) -> None: """ @@ -91,3 +105,33 @@ def _process_manifest(self, obj: Any) -> None: # Process all items in the list for item in obj: self._process_manifest(item) + + def _get_manifest_version(self, manifest: ManifestType) -> str: + """ + Get the manifest version from the manifest. + + :param manifest: The manifest to get the version from + :return: The manifest version + """ + return manifest.get("version", "0.0.0") + + def _get_migration_version(self) -> str: + """ + Get the migration version from the class name. + The migration version is extracted from the class name using a regular expression. + The expected format is "V____". + + For example, "V_6_45_2_ManifestMigration_HttpRequesterPathToUrl" -> "6.45.2" + + :return: The migration version as a string in the format "major.minor.patch" + :raises ValueError: If the class name does not match the expected format + """ + + class_name = self.__class__.__name__ + migration_version = re.search(r"V_(\d+_\d+_\d+)", class_name) + if migration_version: + return migration_version.group(1).replace("_", ".") + else: + raise ValueError( + f"Invalid migration class name, make sure the class name has the version (e.g `V_0_0_0_`): {class_name}" + ) diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migration_handler.py b/airbyte_cdk/sources/declarative/migrations/manifest/migration_handler.py index 0b64eaa7c..501672183 100644 --- a/airbyte_cdk/sources/declarative/migrations/manifest/migration_handler.py +++ b/airbyte_cdk/sources/declarative/migrations/manifest/migration_handler.py @@ -13,7 +13,7 @@ ManifestType, ) from airbyte_cdk.sources.declarative.migrations.manifest.migrations_registry import ( - migrations_registry, + MIGRATIONS, ) @@ -39,8 +39,8 @@ def apply_migrations(self) -> ManifestType: manifest if any migration failed. """ try: - for migration_class in migrations_registry: - self._handle_migration(migration_class) + for migration_cls in MIGRATIONS: + self._handle_migration(migration_cls) return self._migrated_manifest except ManifestMigrationException: # if any errors occur we return the original resolved manifest diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_url_base_to_url_migration.py b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/0_v6_45_2_http_requester_url_base_to_url_migration.py similarity index 91% rename from airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_url_base_to_url_migration.py rename to airbyte_cdk/sources/declarative/migrations/manifest/migrations/0_v6_45_2_http_requester_url_base_to_url_migration.py index 2455dc198..118c382f0 100644 --- a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_url_base_to_url_migration.py +++ b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/0_v6_45_2_http_requester_url_base_to_url_migration.py @@ -5,7 +5,7 @@ ) -class HttpRequesterUrlBaseToUrlMigration(ManifestMigration): +class V_6_45_2_ManifestMigration_HttpRequesterUrlBaseToUrl(ManifestMigration): """ This migration is responsible for migrating the `url_base` key to `url` in the HttpRequester component. The `url_base` key is expected to be a base URL, and the `url` key is expected to be a full URL. diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_path_to_url_migration.py b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/1_v6_45_2_http_requester_path_to_url_migration.py similarity index 95% rename from airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_path_to_url_migration.py rename to airbyte_cdk/sources/declarative/migrations/manifest/migrations/1_v6_45_2_http_requester_path_to_url_migration.py index cebaa4e35..eb96e9a6f 100644 --- a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_path_to_url_migration.py +++ b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/1_v6_45_2_http_requester_path_to_url_migration.py @@ -8,7 +8,7 @@ from airbyte_cdk.sources.types import EmptyString -class HttpRequesterPathToUrlMigration(ManifestMigration): +class V_6_45_2_ManifestMigration_HttpRequesterPathToUrl(ManifestMigration): """ This migration is responsible for migrating the `path` key to `url` in the HttpRequester component. The `path` key is expected to be a relative path, and the `url` key is expected to be a full URL. diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/__init__.py b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/__init__.py index 0b8631018..e69de29bb 100644 --- a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/__init__.py +++ b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/__init__.py @@ -1,11 +0,0 @@ -from airbyte_cdk.sources.declarative.migrations.manifest.migrations.http_requester_path_to_url_migration import ( - HttpRequesterPathToUrlMigration, -) -from airbyte_cdk.sources.declarative.migrations.manifest.migrations.http_requester_url_base_to_url_migration import ( - HttpRequesterUrlBaseToUrlMigration, -) - -__all__ = [ - "HttpRequesterUrlBaseToUrlMigration", - "HttpRequesterPathToUrlMigration", -] diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migrations_registry.py b/airbyte_cdk/sources/declarative/migrations/manifest/migrations_registry.py index 40a741e82..7c7d25748 100644 --- a/airbyte_cdk/sources/declarative/migrations/manifest/migrations_registry.py +++ b/airbyte_cdk/sources/declarative/migrations/manifest/migrations_registry.py @@ -1,17 +1,53 @@ +import importlib +import inspect +import pkgutil +import re +import sys from typing import List, Type +import airbyte_cdk.sources.declarative.migrations.manifest.migrations as migrations_pkg from airbyte_cdk.sources.declarative.migrations.manifest.manifest_migration import ( ManifestMigration, ) -from airbyte_cdk.sources.declarative.migrations.manifest.migrations import ( - HttpRequesterPathToUrlMigration, - HttpRequesterUrlBaseToUrlMigration, -) -# This is the registry of all the migrations that are available. -# Add new migrations to the bottom of the list, -# ( ! ) make sure the order of the migrations is correct. -migrations_registry: List[Type[ManifestMigration]] = [ - HttpRequesterUrlBaseToUrlMigration, - HttpRequesterPathToUrlMigration, -] +# Dynamically import all modules in the migrations package +for _, module_name, is_pkg in pkgutil.iter_modules(migrations_pkg.__path__): + if not is_pkg: + importlib.import_module(f"{migrations_pkg.__name__}.{module_name}") + + +def _migration_order_key(cls): + # Extract the migration order from the module name, e.g., 0_v6_45_2_http_requester_url_base_to_url_migration + # The order is the integer at the start of the module name, before the first underscore + module_name = cls.__module__.split(".")[-1] + match = re.match(r"(\d+)_", module_name) + return int(match.group(1)) if match else float("inf") + + +def _discover_migrations() -> List[Type[ManifestMigration]]: + migration_classes = [] + for name, obj in inspect.getmembers(sys.modules[migrations_pkg.__name__], inspect.isclass): + if ( + issubclass(obj, ManifestMigration) + and obj is not ManifestMigration + and obj not in migration_classes + ): + migration_classes.append(obj) + + for _, module_name, _ in pkgutil.iter_modules(migrations_pkg.__path__): + module = sys.modules.get(f"{migrations_pkg.__name__}.{module_name}") + if module: + for name, obj in inspect.getmembers(module, inspect.isclass): + if ( + issubclass(obj, ManifestMigration) + and obj is not ManifestMigration + and obj not in migration_classes + ): + migration_classes.append(obj) + + # Sort by migration order key + migration_classes.sort(key=_migration_order_key) + return migration_classes + + +MIGRATIONS: List[Type[ManifestMigration]] = _discover_migrations() From 7fcb29dd15dab2005b65ff23bb95007950d65b57 Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Tue, 15 Apr 2025 15:02:52 +0300 Subject: [PATCH 12/18] formatted --- .../declarative/migrations/manifest/manifest_migration.py | 2 +- .../declarative/migrations/manifest/migrations_registry.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/manifest_migration.py b/airbyte_cdk/sources/declarative/migrations/manifest/manifest_migration.py index 3f51a9df5..6e4b3bb2f 100644 --- a/airbyte_cdk/sources/declarative/migrations/manifest/manifest_migration.py +++ b/airbyte_cdk/sources/declarative/migrations/manifest/manifest_migration.py @@ -113,7 +113,7 @@ def _get_manifest_version(self, manifest: ManifestType) -> str: :param manifest: The manifest to get the version from :return: The manifest version """ - return manifest.get("version", "0.0.0") + return str(manifest.get("version", "0.0.0")) def _get_migration_version(self) -> str: """ diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migrations_registry.py b/airbyte_cdk/sources/declarative/migrations/manifest/migrations_registry.py index 7c7d25748..a5febf308 100644 --- a/airbyte_cdk/sources/declarative/migrations/manifest/migrations_registry.py +++ b/airbyte_cdk/sources/declarative/migrations/manifest/migrations_registry.py @@ -16,12 +16,12 @@ importlib.import_module(f"{migrations_pkg.__name__}.{module_name}") -def _migration_order_key(cls): +def _migration_order_key(cls: object) -> int: # Extract the migration order from the module name, e.g., 0_v6_45_2_http_requester_url_base_to_url_migration # The order is the integer at the start of the module name, before the first underscore module_name = cls.__module__.split(".")[-1] match = re.match(r"(\d+)_", module_name) - return int(match.group(1)) if match else float("inf") + return int(match.group(1)) if match else 0 def _discover_migrations() -> List[Type[ManifestMigration]]: From b912e827bdb417e2a8a564bc43160c18ee7c7b18 Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Tue, 15 Apr 2025 15:07:51 +0300 Subject: [PATCH 13/18] updated --- ..._migration.py => 0_v6_45_2_http_requester_url_base_to_url.py} | 0 ..._url_migration.py => 1_v6_45_2_http_requester_path_to_url.py} | 0 .../declarative/migrations/manifest/migrations/__init__.py | 1 + 3 files changed, 1 insertion(+) rename airbyte_cdk/sources/declarative/migrations/manifest/migrations/{0_v6_45_2_http_requester_url_base_to_url_migration.py => 0_v6_45_2_http_requester_url_base_to_url.py} (100%) rename airbyte_cdk/sources/declarative/migrations/manifest/migrations/{1_v6_45_2_http_requester_path_to_url_migration.py => 1_v6_45_2_http_requester_path_to_url.py} (100%) diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/0_v6_45_2_http_requester_url_base_to_url_migration.py b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/0_v6_45_2_http_requester_url_base_to_url.py similarity index 100% rename from airbyte_cdk/sources/declarative/migrations/manifest/migrations/0_v6_45_2_http_requester_url_base_to_url_migration.py rename to airbyte_cdk/sources/declarative/migrations/manifest/migrations/0_v6_45_2_http_requester_url_base_to_url.py diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/1_v6_45_2_http_requester_path_to_url_migration.py b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/1_v6_45_2_http_requester_path_to_url.py similarity index 100% rename from airbyte_cdk/sources/declarative/migrations/manifest/migrations/1_v6_45_2_http_requester_path_to_url_migration.py rename to airbyte_cdk/sources/declarative/migrations/manifest/migrations/1_v6_45_2_http_requester_path_to_url.py diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/__init__.py b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/__init__.py index e69de29bb..184d57e91 100644 --- a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/__init__.py +++ b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/__init__.py @@ -0,0 +1 @@ +from airbyte_cdk.sources.declarative.migrations.manifest.migrations import * \ No newline at end of file From 4085c8ebd4db30f53805f7a6778b6a7223d89bac Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Tue, 15 Apr 2025 15:22:44 +0300 Subject: [PATCH 14/18] changed the naming structure for the migrations --- .../declarative/migrations/manifest/migrations/__init__.py | 1 - ...h_to_url.py => http_requester_path_to_url_v6_45_2__1.py} | 0 ..._url.py => http_requester_url_base_to_url_v6_45_2__0.py} | 0 .../declarative/migrations/manifest/migrations_registry.py | 6 +++--- 4 files changed, 3 insertions(+), 4 deletions(-) rename airbyte_cdk/sources/declarative/migrations/manifest/migrations/{1_v6_45_2_http_requester_path_to_url.py => http_requester_path_to_url_v6_45_2__1.py} (100%) rename airbyte_cdk/sources/declarative/migrations/manifest/migrations/{0_v6_45_2_http_requester_url_base_to_url.py => http_requester_url_base_to_url_v6_45_2__0.py} (100%) diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/__init__.py b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/__init__.py index 184d57e91..e69de29bb 100644 --- a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/__init__.py +++ b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/__init__.py @@ -1 +0,0 @@ -from airbyte_cdk.sources.declarative.migrations.manifest.migrations import * \ No newline at end of file diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/1_v6_45_2_http_requester_path_to_url.py b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_path_to_url_v6_45_2__1.py similarity index 100% rename from airbyte_cdk/sources/declarative/migrations/manifest/migrations/1_v6_45_2_http_requester_path_to_url.py rename to airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_path_to_url_v6_45_2__1.py diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/0_v6_45_2_http_requester_url_base_to_url.py b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_url_base_to_url_v6_45_2__0.py similarity index 100% rename from airbyte_cdk/sources/declarative/migrations/manifest/migrations/0_v6_45_2_http_requester_url_base_to_url.py rename to airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_url_base_to_url_v6_45_2__0.py diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migrations_registry.py b/airbyte_cdk/sources/declarative/migrations/manifest/migrations_registry.py index a5febf308..1b0bb1aea 100644 --- a/airbyte_cdk/sources/declarative/migrations/manifest/migrations_registry.py +++ b/airbyte_cdk/sources/declarative/migrations/manifest/migrations_registry.py @@ -17,10 +17,10 @@ def _migration_order_key(cls: object) -> int: - # Extract the migration order from the module name, e.g., 0_v6_45_2_http_requester_url_base_to_url_migration - # The order is the integer at the start of the module name, before the first underscore + # Extract the migration order from the module name, e.g., http_requester_url_base_to_url_v6_45_2__0 + # The order is the integer after the double underscore at the end of the module name module_name = cls.__module__.split(".")[-1] - match = re.match(r"(\d+)_", module_name) + match = re.search(r"__(\d+)$", module_name) return int(match.group(1)) if match else 0 From a3679a3b2f2fbbbc01dbc61214d6297d3df63384 Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Tue, 15 Apr 2025 15:30:12 +0300 Subject: [PATCH 15/18] add README.md --- .../declarative/migrations/manifest/README.md | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 airbyte_cdk/sources/declarative/migrations/manifest/README.md diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/README.md b/airbyte_cdk/sources/declarative/migrations/manifest/README.md new file mode 100644 index 000000000..79ddd4b19 --- /dev/null +++ b/airbyte_cdk/sources/declarative/migrations/manifest/README.md @@ -0,0 +1,66 @@ +# Manifest Migrations + +This directory contains the logic and registry for manifest migrations in the Airbyte CDK. Migrations are used to update or transform manifest components to newer formats or schemas as the CDK evolves. + +## Adding a New Migration + +1. **Create a Migration File:** + - Add a new Python file in the `migrations/` subdirectory. + - Name the file using the pattern: `_v____.py`. + - Example: `http_requester_url_base_to_url_v6_45_2__0.py` + - The `` integer is used to determine the order of migrations for the same version. + +2. **Define the Migration Class:** + - The migration class must inherit from `ManifestMigration`. + - Name the class using the pattern: `V____ManifestMigration_`. + - Example: `V_6_45_2_ManifestMigration_HttpRequesterUrlBaseToUrl` + - Implement the following methods: + - `should_migrate(self, manifest: ManifestType) -> bool`: Return `True` if the migration should be applied to the given manifest. + - `migrate(self, manifest: ManifestType) -> None`: Perform the migration in-place. + +3. **Migration Versioning:** + - The migration version is extracted from the class name and used to determine applicability. + - Only manifests with a version less than or equal to the migration version will be migrated. + +4. **Component Type:** + - Use the `TYPE_TAG` constant to check the component type in your migration logic. + +5. **Examples:** + - See `migrations/http_requester_url_base_to_url_v6_45_2__0.py` and `migrations/http_requester_path_to_url_v6_45_2__1.py` for reference implementations. + +## Migration Registry + +- All migration classes in the `migrations/` folder are automatically discovered and registered in `migrations_registry.py`. +- Migrations are applied in order, determined by the `` suffix in the filename. + +## Testing + +- Ensure your migration is covered by unit tests. +- Tests should verify both `should_migrate` and `migrate` behaviors. + +## Example Migration Skeleton + +```python +from airbyte_cdk.sources.declarative.migrations.manifest.manifest_migration import TYPE_TAG, ManifestMigration, ManifestType + +class V_1_2_3_ManifestMigration_Example(ManifestMigration): + component_type = "ExampleComponent" + original_key = "old_key" + replacement_key = "new_key" + + def should_migrate(self, manifest: ManifestType) -> bool: + return manifest[TYPE_TAG] == self.component_type and self.original_key in manifest + + def migrate(self, manifest: ManifestType) -> None: + manifest[self.replacement_key] = manifest[self.original_key] + manifest.pop(self.original_key, None) +``` + +## Additional Notes + +- Do not modify the migration registry manually; it will pick up all valid migration classes automatically. +- If you need to skip certain component types, use the `NON_MIGRATABLE_TYPES` list in `manifest_migration.py`. + +--- + +For more details, see the docstrings in `manifest_migration.py` and the examples in the `migrations/` folder. From 7b16aab7faac63e80cb98d29065c2c0be7b37b88 Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Tue, 15 Apr 2025 17:56:25 +0300 Subject: [PATCH 16/18] add __requires_migration: bool flag to manifest config --- .../connector_builder_handler.py | 11 +++++++++++ .../declarative/manifest_declarative_source.py | 17 ++++++++++------- .../test_connector_builder_handler.py | 6 ++++-- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/airbyte_cdk/connector_builder/connector_builder_handler.py b/airbyte_cdk/connector_builder/connector_builder_handler.py index 6b6b31111..1c334ecea 100644 --- a/airbyte_cdk/connector_builder/connector_builder_handler.py +++ b/airbyte_cdk/connector_builder/connector_builder_handler.py @@ -56,12 +56,23 @@ def get_limits(config: Mapping[str, Any]) -> TestLimits: return TestLimits(max_records, max_pages_per_slice, max_slices, max_streams) +def requires_migration(config: Mapping[str, Any]) -> bool: + """ + Check if the manifest requires migration. + + :param config: The config to check + :return: True if the manifest requires migration, False otherwise + """ + return config.get("__requires_migration", False) + + def create_source(config: Mapping[str, Any], limits: TestLimits) -> ManifestDeclarativeSource: manifest = config["__injected_declarative_manifest"] return ManifestDeclarativeSource( config=config, emit_connector_builder_messages=True, source_config=manifest, + migrate_manifest=requires_migration(config), component_factory=ModelToComponentFactory( emit_connector_builder_messages=True, limit_pages_fetched_per_slice=limits.max_pages_per_slice, diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py index 1e54ede11..d03066220 100644 --- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py +++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py @@ -72,14 +72,16 @@ def __init__( debug: bool = False, emit_connector_builder_messages: bool = False, component_factory: Optional[ModelToComponentFactory] = None, + migrate_manifest: Optional[bool] = False, ) -> None: """ Args: config: The provided config dict. source_config: The manifest of low-code components that describe the source connector. - debug: True if debug mode is enabled. - emit_connector_builder_messages: True if messages should be emitted to the connector builder. - component_factory: optional factory if ModelToComponentFactory's default behavior needs to be tweaked. + debug: bool True if debug mode is enabled. + emit_connector_builder_messages: Optional[bool] True if messages should be emitted to the connector builder. + component_factory: Optional factory if ModelToComponentFactory's default behavior needs to be tweaked. + migrate_manifest: Optional[bool] if the manifest should be migrated to pick up the latest declarative component schema changes at runtime. """ self.logger = logging.getLogger(f"airbyte.{self.name}") @@ -96,11 +98,12 @@ def __init__( "", resolved_source_config, {} ) - migrated_source_config = ManifestMigrationHandler( - propagated_source_config - ).apply_migrations() + if migrate_manifest: + propagated_source_config = ManifestMigrationHandler( + propagated_source_config + ).apply_migrations() - self._source_config = migrated_source_config + self._source_config = propagated_source_config self._debug = debug self._emit_connector_builder_messages = emit_connector_builder_messages self._constructor = ( diff --git a/unit_tests/connector_builder/test_connector_builder_handler.py b/unit_tests/connector_builder/test_connector_builder_handler.py index 00c62a070..4abff8f19 100644 --- a/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/unit_tests/connector_builder/test_connector_builder_handler.py @@ -606,6 +606,7 @@ def test_resolve_manifest(valid_resolve_manifest_config_file): }, "requester": { "type": "HttpRequester", + "path": "/v3/marketing/lists", "authenticator": { "type": "BearerAuthenticator", "api_token": "{{ config.apikey }}", @@ -617,7 +618,7 @@ def test_resolve_manifest(valid_resolve_manifest_config_file): "request_parameters": {"a_param": "10"}, "name": _stream_name, "primary_key": _stream_primary_key, - "url": _stream_url_base + "/v3/marketing/lists", + "url_base": _stream_url_base, "$parameters": _stream_options, }, "partition_router": { @@ -1495,6 +1496,7 @@ def test_full_resolve_manifest(valid_resolve_manifest_config_file): }, }, "requester": { + "path": "/v3/marketing/lists", "authenticator": { "type": "BearerAuthenticator", "api_token": "{{ config.apikey }}", @@ -1511,7 +1513,7 @@ def test_full_resolve_manifest(valid_resolve_manifest_config_file): "type": "HttpRequester", "name": "stream_with_custom_requester", "primary_key": "id", - "url": "https://10.0.27.27/api/v1/v3/marketing/lists", + "url_base": "https://10.0.27.27/api/v1/", "$parameters": { "name": "stream_with_custom_requester", "primary_key": "id", From 4a051c2e5b735f16021c90df8b0444f5729fffa2 Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Wed, 16 Apr 2025 17:44:56 +0300 Subject: [PATCH 17/18] dummy change line --- .../sources/declarative/models/base_model_with_deprecations.py | 1 + 1 file changed, 1 insertion(+) diff --git a/airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py b/airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py index a0fde745b..93300a1c8 100644 --- a/airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py +++ b/airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py @@ -111,3 +111,4 @@ def _deprecated_warning(self, field_name: str, message: str) -> None: message=f"Component type: `{self.__class__.__name__}`. Field '{field_name}' is deprecated. {message}", ), ) + # dummy change to trigger CI tests. From 6814f089c31183f35e2cd688dcbba7fbed8b02c8 Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Tue, 22 Apr 2025 11:17:37 +0300 Subject: [PATCH 18/18] updated --- airbyte_cdk/connector_builder/connector_builder_handler.py | 6 +++--- .../migrations/http_requester_path_to_url_v6_45_2__1.py | 2 +- .../migrations/http_requester_url_base_to_url_v6_45_2__0.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/airbyte_cdk/connector_builder/connector_builder_handler.py b/airbyte_cdk/connector_builder/connector_builder_handler.py index 1c334ecea..28e0fa578 100644 --- a/airbyte_cdk/connector_builder/connector_builder_handler.py +++ b/airbyte_cdk/connector_builder/connector_builder_handler.py @@ -56,14 +56,14 @@ def get_limits(config: Mapping[str, Any]) -> TestLimits: return TestLimits(max_records, max_pages_per_slice, max_slices, max_streams) -def requires_migration(config: Mapping[str, Any]) -> bool: +def should_migrate_manifest(config: Mapping[str, Any]) -> bool: """ Check if the manifest requires migration. :param config: The config to check :return: True if the manifest requires migration, False otherwise """ - return config.get("__requires_migration", False) + return config.get("__should_migrate", False) def create_source(config: Mapping[str, Any], limits: TestLimits) -> ManifestDeclarativeSource: @@ -72,7 +72,7 @@ def create_source(config: Mapping[str, Any], limits: TestLimits) -> ManifestDecl config=config, emit_connector_builder_messages=True, source_config=manifest, - migrate_manifest=requires_migration(config), + migrate_manifest=should_migrate_manifest(config), component_factory=ModelToComponentFactory( emit_connector_builder_messages=True, limit_pages_fetched_per_slice=limits.max_pages_per_slice, diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_path_to_url_v6_45_2__1.py b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_path_to_url_v6_45_2__1.py index eb96e9a6f..4ad67cfa4 100644 --- a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_path_to_url_v6_45_2__1.py +++ b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_path_to_url_v6_45_2__1.py @@ -8,7 +8,7 @@ from airbyte_cdk.sources.types import EmptyString -class V_6_45_2_ManifestMigration_HttpRequesterPathToUrl(ManifestMigration): +class V_6_45_2_HttpRequesterPathToUrl(ManifestMigration): """ This migration is responsible for migrating the `path` key to `url` in the HttpRequester component. The `path` key is expected to be a relative path, and the `url` key is expected to be a full URL. diff --git a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_url_base_to_url_v6_45_2__0.py b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_url_base_to_url_v6_45_2__0.py index 118c382f0..c41b44371 100644 --- a/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_url_base_to_url_v6_45_2__0.py +++ b/airbyte_cdk/sources/declarative/migrations/manifest/migrations/http_requester_url_base_to_url_v6_45_2__0.py @@ -5,7 +5,7 @@ ) -class V_6_45_2_ManifestMigration_HttpRequesterUrlBaseToUrl(ManifestMigration): +class V_6_45_2_HttpRequesterUrlBaseToUrl(ManifestMigration): """ This migration is responsible for migrating the `url_base` key to `url` in the HttpRequester component. The `url_base` key is expected to be a base URL, and the `url` key is expected to be a full URL.