Skip to content

fix: handle unstructured FileType.from_extension(None) + update unstr…

76c7faf
Select commit
Loading
Failed to load commit list.
Sign in for the full log view
Draft

fix(cdk): upgrade unstructured from 0.10.27 to 0.18.18 to fix CVE-2025-64712 #907

fix: handle unstructured FileType.from_extension(None) + update unstr…
76c7faf
Select commit
Loading
Failed to load commit list.
GitHub Actions / PyTest Results (Fast) failed Feb 17, 2026 in 0s

1 fail, 11 skipped, 578 pass in 3m 15s

590 tests   - 1 626   578 ✅  - 1 626   3m 15s ⏱️ - 3m 9s
  1 suites ±    0    11 💤 ±    0 
  1 files   ±    0     1 ❌ ±    0 

Results for commit 76c7faf. ± Comparison against earlier commit e011d9b.

Annotations

Check warning on line 0 in unit_tests.sources.declarative.test_concurrent_declarative_source

See this annotation in the file changed.

@github-actions github-actions / PyTest Results (Fast)

test_read_concurrent_declarative_source[test_no_pagination_with_partition_router-manifest4-pages4-expected_records4-expected_calls4] (unit_tests.sources.declarative.test_concurrent_declarative_source) failed

build/test-results/pytest-results.xml [took 1s]
Raw output
AssertionError: assert [{'ABC': 2, '...artition': 0}] == [{'ABC': 0, '...artition': 1}]
  At index 0 diff: {'ABC': 2, 'partition': 1} != {'ABC': 0, 'partition': 0}
  Full diff:
    [
  +  {'ABC': 2, 'partition': 1},
     {'ABC': 0, 'partition': 0},
     {'AED': 1, 'partition': 0},
  -  {'ABC': 2, 'partition': 1},
    ]
test_name = 'test_no_pagination_with_partition_router'
manifest = {'check': {'stream_names': ['Rates'], 'type': 'CheckStream'}, 'spec': {'connection_specification': {'$schema': 'http:/... {...}, 'partition': {...}}, 'type': 'object'}, 'type': 'InlineSchemaLoader'}, ...}], 'type': 'DeclarativeSource', ...}
pages = (<Response [200]>, <Response [200]>)
expected_records = [{'ABC': 0, 'partition': 0}, {'AED': 1, 'partition': 0}, {'ABC': 2, 'partition': 1}]
expected_calls = [call({'partition': '0'}, None), call({'partition': '1'}, None)]

    @pytest.mark.parametrize(
        "test_name, manifest, pages, expected_records, expected_calls",
        [
            (
                "test_read_manifest_no_pagination_no_partitions",
                {
                    "version": "0.34.2",
                    "type": "DeclarativeSource",
                    "check": {"type": "CheckStream", "stream_names": ["Rates"]},
                    "streams": [
                        {
                            "type": "DeclarativeStream",
                            "name": "Rates",
                            "primary_key": [],
                            "schema_loader": {
                                "type": "InlineSchemaLoader",
                                "schema": {
                                    "$schema": "http://json-schema.org/schema#",
                                    "properties": {
                                        "ABC": {"type": "number"},
                                        "AED": {"type": "number"},
                                    },
                                    "type": "object",
                                },
                            },
                            "retriever": {
                                "type": "SimpleRetriever",
                                "requester": {
                                    "type": "HttpRequester",
                                    "url_base": "https://api.apilayer.com",
                                    "path": "/exchangerates_data/latest",
                                    "http_method": "GET",
                                    "request_parameters": {},
                                    "request_headers": {},
                                    "request_body_json": {},
                                    "authenticator": {
                                        "type": "ApiKeyAuthenticator",
                                        "header": "apikey",
                                        "api_token": "{{ config['api_key'] }}",
                                    },
                                },
                                "record_selector": {
                                    "type": "RecordSelector",
                                    "extractor": {"type": "DpathExtractor", "field_path": ["rates"]},
                                },
                                "paginator": {"type": "NoPagination"},
                            },
                        }
                    ],
                    "spec": {
                        "connection_specification": {
                            "$schema": "http://json-schema.org/draft-07/schema#",
                            "type": "object",
                            "required": ["api_key"],
                            "properties": {
                                "api_key": {
                                    "type": "string",
                                    "title": "API Key",
                                    "airbyte_secret": True,
                                }
                            },
                            "additionalProperties": True,
                        },
                        "documentation_url": "https://example.org",
                        "type": "Spec",
                    },
                },
                (
                    _create_page({"rates": [{"ABC": 0}, {"AED": 1}], "_metadata": {"next": "next"}}),
                    _create_page({"rates": [{"USD": 2}], "_metadata": {"next": "next"}}),
                )
                * 10,
                [{"ABC": 0}, {"AED": 1}],
                [call({}, None)],
            ),
            (
                "test_read_manifest_with_added_fields",
                {
                    "version": "0.34.2",
                    "type": "DeclarativeSource",
                    "check": {"type": "CheckStream", "stream_names": ["Rates"]},
                    "streams": [
                        {
                            "type": "DeclarativeStream",
                            "name": "Rates",
                            "primary_key": [],
                            "schema_loader": {
                                "type": "InlineSchemaLoader",
                                "schema": {
                                    "$schema": "http://json-schema.org/schema#",
                                    "properties": {
                                        "ABC": {"type": "number"},
                                        "AED": {"type": "number"},
                                    },
                                    "type": "object",
                                },
                            },
                            "transformations": [
                                {
                                    "type": "AddFields",
                                    "fields": [
                                        {
                                            "type": "AddedFieldDefinition",
                                            "path": ["added_field_key"],
                                            "value": "added_field_value",
                                        }
                                    ],
                                }
                            ],
                            "retriever": {
                                "type": "SimpleRetriever",
                                "requester": {
                                    "type": "HttpRequester",
                                    "url_base": "https://api.apilayer.com",
                                    "path": "/exchangerates_data/latest",
                                    "http_method": "GET",
                                    "request_parameters": {},
                                    "request_headers": {},
                                    "request_body_json": {},
                                    "authenticator": {
                                        "type": "ApiKeyAuthenticator",
                                        "header": "apikey",
                                        "api_token": "{{ config['api_key'] }}",
                                    },
                                },
                                "record_selector": {
                                    "type": "RecordSelector",
                                    "extractor": {"type": "DpathExtractor", "field_path": ["rates"]},
                                },
                                "paginator": {"type": "NoPagination"},
                            },
                        }
                    ],
                    "spec": {
                        "connection_specification": {
                            "$schema": "http://json-schema.org/draft-07/schema#",
                            "type": "object",
                            "required": ["api_key"],
                            "properties": {
                                "api_key": {
                                    "type": "string",
                                    "title": "API Key",
                                    "airbyte_secret": True,
                                }
                            },
                            "additionalProperties": True,
                        },
                        "documentation_url": "https://example.org",
                        "type": "Spec",
                    },
                },
                (
                    _create_page({"rates": [{"ABC": 0}, {"AED": 1}], "_metadata": {"next": "next"}}),
                    _create_page({"rates": [{"USD": 2}], "_metadata": {"next": "next"}}),
                )
                * 10,
                [
                    {"ABC": 0, "added_field_key": "added_field_value"},
                    {"AED": 1, "added_field_key": "added_field_value"},
                ],
                [call({}, None)],
            ),
            (
                "test_read_manifest_with_flatten_fields",
                {
                    "version": "0.34.2",
                    "type": "DeclarativeSource",
                    "check": {"type": "CheckStream", "stream_names": ["Rates"]},
                    "streams": [
                        {
                            "type": "DeclarativeStream",
                            "name": "Rates",
                            "primary_key": [],
                            "schema_loader": {
                                "type": "InlineSchemaLoader",
                                "schema": {
                                    "$schema": "http://json-schema.org/schema#",
                                    "properties": {
                                        "ABC": {"type": "number"},
                                        "AED": {"type": "number"},
                                    },
                                    "type": "object",
                                },
                            },
                            "transformations": [{"type": "FlattenFields"}],
                            "retriever": {
                                "type": "SimpleRetriever",
                                "requester": {
                                    "type": "HttpRequester",
                                    "url_base": "https://api.apilayer.com",
                                    "path": "/exchangerates_data/latest",
                                    "http_method": "GET",
                                    "request_parameters": {},
                                    "request_headers": {},
                                    "request_body_json": {},
                                    "authenticator": {
                                        "type": "ApiKeyAuthenticator",
                                        "header": "apikey",
                                        "api_token": "{{ config['api_key'] }}",
                                    },
                                },
                                "record_selector": {
                                    "type": "RecordSelector",
                                    "extractor": {"type": "DpathExtractor", "field_path": ["rates"]},
                                },
                                "paginator": {"type": "NoPagination"},
                            },
                        }
                    ],
                    "spec": {
                        "connection_specification": {
                            "$schema": "http://json-schema.org/draft-07/schema#",
                            "type": "object",
                            "required": ["api_key"],
                            "properties": {
                                "api_key": {
                                    "type": "string",
                                    "title": "API Key",
                                    "airbyte_secret": True,
                                }
                            },
                            "additionalProperties": True,
                        },
                        "documentation_url": "https://example.org",
                        "type": "Spec",
                    },
                },
                (
                    _create_page(
                        {
                            "rates": [
                                {"nested_fields": {"ABC": 0}, "id": 1},
                                {"nested_fields": {"AED": 1}, "id": 2},
                            ],
                            "_metadata": {"next": "next"},
                        }
                    ),
                    _create_page({"rates": [{"USD": 2}], "_metadata": {"next": "next"}}),
                )
                * 10,
                [
                    {"ABC": 0, "id": 1},
                    {"AED": 1, "id": 2},
                ],
                [call({}, None)],
            ),
            (
                "test_read_with_pagination_no_partitions",
                {
                    "version": "0.34.2",
                    "type": "DeclarativeSource",
                    "check": {"type": "CheckStream", "stream_names": ["Rates"]},
                    "streams": [
                        {
                            "type": "DeclarativeStream",
                            "name": "Rates",
                            "primary_key": [],
                            "schema_loader": {
                                "type": "InlineSchemaLoader",
                                "schema": {
                                    "$schema": "http://json-schema.org/schema#",
                                    "properties": {
                                        "ABC": {"type": "number"},
                                        "AED": {"type": "number"},
                                        "USD": {"type": "number"},
                                    },
                                    "type": "object",
                                },
                            },
                            "retriever": {
                                "type": "SimpleRetriever",
                                "requester": {
                                    "type": "HttpRequester",
                                    "url_base": "https://api.apilayer.com",
                                    "path": "/exchangerates_data/latest",
                                    "http_method": "GET",
                                    "request_parameters": {},
                                    "request_headers": {},
                                    "request_body_json": {},
                                    "authenticator": {
                                        "type": "ApiKeyAuthenticator",
                                        "header": "apikey",
                                        "api_token": "{{ config['api_key'] }}",
                                    },
                                },
                                "record_selector": {
                                    "type": "RecordSelector",
                                    "extractor": {"type": "DpathExtractor", "field_path": ["rates"]},
                                },
                                "paginator": {
                                    "type": "DefaultPaginator",
                                    "page_size": 2,
                                    "page_size_option": {
                                        "inject_into": "request_parameter",
                                        "field_name": "page_size",
                                    },
                                    "page_token_option": {"inject_into": "path", "type": "RequestPath"},
                                    "pagination_strategy": {
                                        "type": "CursorPagination",
                                        "cursor_value": "{{ response._metadata.next }}",
                                        "page_size": 2,
                                    },
                                },
                            },
                        }
                    ],
                    "spec": {
                        "connection_specification": {
                            "$schema": "http://json-schema.org/draft-07/schema#",
                            "type": "object",
                            "required": ["api_key"],
                            "properties": {
                                "api_key": {
                                    "type": "string",
                                    "title": "API Key",
                                    "airbyte_secret": True,
                                }
                            },
                            "additionalProperties": True,
                        },
                        "documentation_url": "https://example.org",
                        "type": "Spec",
                    },
                },
                (
                    _create_page({"rates": [{"ABC": 0}, {"AED": 1}], "_metadata": {"next": "next"}}),
                    _create_page({"rates": [{"USD": 2}], "_metadata": {}}),
                )
                * 10,
                [{"ABC": 0}, {"AED": 1}, {"USD": 2}],
                [
                    call({}, None),
                    call({}, {"next_page_token": "next"}),
                ],
            ),
            (
                "test_no_pagination_with_partition_router",
                {
                    "version": "0.34.2",
                    "type": "DeclarativeSource",
                    "check": {"type": "CheckStream", "stream_names": ["Rates"]},
                    "streams": [
                        {
                            "type": "DeclarativeStream",
                            "name": "Rates",
                            "primary_key": [],
                            "schema_loader": {
                                "type": "InlineSchemaLoader",
                                "schema": {
                                    "$schema": "http://json-schema.org/schema#",
                                    "properties": {
                                        "ABC": {"type": "number"},
                                        "AED": {"type": "number"},
                                        "partition": {"type": "number"},
                                    },
                                    "type": "object",
                                },
                            },
                            "retriever": {
                                "type": "SimpleRetriever",
                                "requester": {
                                    "type": "HttpRequester",
                                    "url_base": "https://api.apilayer.com",
                                    "path": "/exchangerates_data/latest",
                                    "http_method": "GET",
                                    "request_parameters": {},
                                    "request_headers": {},
                                    "request_body_json": {},
                                    "authenticator": {
                                        "type": "ApiKeyAuthenticator",
                                        "header": "apikey",
                                        "api_token": "{{ config['api_key'] }}",
                                    },
                                },
                                "partition_router": {
                                    "type": "ListPartitionRouter",
                                    "values": ["0", "1"],
                                    "cursor_field": "partition",
                                },
                                "record_selector": {
                                    "type": "RecordSelector",
                                    "extractor": {"type": "DpathExtractor", "field_path": ["rates"]},
                                },
                                "paginator": {"type": "NoPagination"},
                            },
                        }
                    ],
                    "spec": {
                        "connection_specification": {
                            "$schema": "http://json-schema.org/draft-07/schema#",
                            "type": "object",
                            "required": ["api_key"],
                            "properties": {
                                "api_key": {
                                    "type": "string",
                                    "title": "API Key",
                                    "airbyte_secret": True,
                                }
                            },
                            "additionalProperties": True,
                        },
                        "documentation_url": "https://example.org",
                        "type": "Spec",
                    },
                },
                (
                    _create_page(
                        {
                            "rates": [{"ABC": 0, "partition": 0}, {"AED": 1, "partition": 0}],
                            "_metadata": {"next": "next"},
                        }
                    ),
                    _create_page(
                        {"rates": [{"ABC": 2, "partition": 1}], "_metadata": {"next": "next"}}
                    ),
                ),
                [{"ABC": 0, "partition": 0}, {"AED": 1, "partition": 0}, {"ABC": 2, "partition": 1}],
                [
                    call({"partition": "0"}, None),
                    call({"partition": "1"}, None),
                ],
            ),
            (
                "test_with_pagination_and_partition_router",
                {
                    "version": "0.34.2",
                    "type": "DeclarativeSource",
                    "check": {"type": "CheckStream", "stream_names": ["Rates"]},
                    "streams": [
                        {
                            "type": "DeclarativeStream",
                            "name": "Rates",
                            "primary_key": [],
                            "schema_loader": {
                                "type": "InlineSchemaLoader",
                                "schema": {
                                    "$schema": "http://json-schema.org/schema#",
                                    "properties": {
                                        "ABC": {"type": "number"},
                                        "AED": {"type": "number"},
                                        "partition": {"type": "number"},
                                    },
                                    "type": "object",
                                },
                            },
                            "retriever": {
                                "type": "SimpleRetriever",
                                "requester": {
                                    "type": "HttpRequester",
                                    "url_base": "https://api.apilayer.com",
                                    "path": "/exchangerates_data/latest",
                                    "http_method": "GET",
                                    "request_parameters": {},
                                    "request_headers": {},
                                    "request_body_json": {},
                                    "authenticator": {
                                        "type": "ApiKeyAuthenticator",
                                        "header": "apikey",
                                        "api_token": "{{ config['api_key'] }}",
                                    },
                                },
                                "partition_router": {
                                    "type": "ListPartitionRouter",
                                    "values": ["0", "1"],
                                    "cursor_field": "partition",
                                },
                                "record_selector": {
                                    "type": "RecordSelector",
                                    "extractor": {"type": "DpathExtractor", "field_path": ["rates"]},
                                },
                                "paginator": {
                                    "type": "DefaultPaginator",
                                    "page_size": 2,
                                    "page_size_option": {
                                        "inject_into": "request_parameter",
                                        "field_name": "page_size",
                                    },
                                    "page_token_option": {"inject_into": "path", "type": "RequestPath"},
                                    "pagination_strategy": {
                                        "type": "CursorPagination",
                                        "cursor_value": "{{ response._metadata.next }}",
                                        "page_size": 2,
                                    },
                                },
                            },
                        }
                    ],
                    "spec": {
                        "connection_specification": {
                            "$schema": "http://json-schema.org/draft-07/schema#",
                            "type": "object",
                            "required": ["api_key"],
                            "properties": {
                                "api_key": {
                                    "type": "string",
                                    "title": "API Key",
                                    "airbyte_secret": True,
                                }
                            },
                            "additionalProperties": True,
                        },
                        "documentation_url": "https://example.org",
                        "type": "Spec",
                    },
                },
                (
                    _create_page(
                        {
                            "rates": [{"ABC": 0, "partition": 0}, {"AED": 1, "partition": 0}],
                            "_metadata": {"next": "next"},
                        }
                    ),
                    _create_page({"rates": [{"USD": 3, "partition": 0}], "_metadata": {}}),
                    _create_page({"rates": [{"ABC": 2, "partition": 1}], "_metadata": {}}),
                ),
                [
                    {"ABC": 0, "partition": 0},
                    {"AED": 1, "partition": 0},
                    {"USD": 3, "partition": 0},
                    {"ABC": 2, "partition": 1},
                ],
                [
                    call({"partition": "0"}, None),
                    call({"partition": "0"}, {"next_page_token": "next"}),
                    call({"partition": "1"}, None),
                ],
            ),
        ],
    )
    def test_read_concurrent_declarative_source(
        test_name, manifest, pages, expected_records, expected_calls
    ):
        _stream_name = "Rates"
        with patch.object(SimpleRetriever, "_fetch_next_page", side_effect=pages) as mock_retriever:
            output_data = [
                message.record.data for message in _run_read(manifest, _stream_name) if message.record
            ]
>           assert output_data == expected_records
E           AssertionError: assert [{'ABC': 2, '...artition': 0}] == [{'ABC': 0, '...artition': 1}]
E             At index 0 diff: {'ABC': 2, 'partition': 1} != {'ABC': 0, 'partition': 0}
E             Full diff:
E               [
E             +  {'ABC': 2, 'partition': 1},
E                {'ABC': 0, 'partition': 0},
E                {'AED': 1, 'partition': 0},
E             -  {'ABC': 2, 'partition': 1},
E               ]

unit_tests/sources/declarative/test_concurrent_declarative_source.py:3757: AssertionError