44import io
55import logging
66import threading
7- from asyncio import Task
87from collections import Counter
98from concurrent import futures
109from functools import partial
1110from pathlib import Path
11+ from typing import Any , Coroutine
1212from unittest .mock import AsyncMock , MagicMock , patch
1313
1414import httpx
1515import pytest
16- import requests
17- from requests_toolbelt import MultipartDecoder
16+ import requests # type: ignore[import-untyped]
17+ from requests_toolbelt import MultipartDecoder # type: ignore[import-untyped]
1818
1919from unstructured_client ._hooks .custom import form_utils , pdf_utils , request_utils
2020from unstructured_client ._hooks .custom .form_utils import (
@@ -438,7 +438,7 @@ async def _request_mock(
438438@pytest .mark .asyncio
439439async def test_unit_disallow_failed_coroutines (
440440 allow_failed : bool ,
441- tasks : list [Task ],
441+ tasks : list [partial [ Coroutine [ Any , Any , httpx . Response ]] ],
442442 expected_responses : list [str ],
443443):
444444 """Test disallow failed coroutines method properly sets the flag to False."""
@@ -824,6 +824,70 @@ def after_error(self, hook_ctx, response, error): # pragma: no cover - dispatch
824824 assert "Cancellation cleanup cancelled" in caplog .text
825825
826826
827+ @pytest .mark .asyncio
828+ async def test_unit_do_request_async_secondary_cancellation_waits_for_cleanup ():
829+ cleanup_started = asyncio .Event ()
830+ release_cleanup = asyncio .Event ()
831+ cleanup_finished = asyncio .Event ()
832+
833+ class PreparedRequestHook :
834+ def before_request (self , hook_ctx , request ):
835+ del hook_ctx , request
836+ return httpx .Request (
837+ "GET" ,
838+ "http://localhost:8888/general/docs" ,
839+ headers = {"operation_id" : "secondary-cancel-cleanup" },
840+ extensions = {"split_pdf_operation_id" : "secondary-cancel-cleanup" },
841+ )
842+
843+ class SlowCleanupHook :
844+ async def after_error_async (self , hook_ctx , response , error ):
845+ del hook_ctx , response , error
846+ cleanup_started .set ()
847+ await release_cleanup .wait ()
848+ cleanup_finished .set ()
849+ return None , None
850+
851+ def after_error (self , hook_ctx , response , error ): # pragma: no cover - dispatch guard
852+ raise AssertionError ("async hook should be awaited" )
853+
854+ hooks = SDKHooks ()
855+ hooks .before_request_hooks = [PreparedRequestHook ()] # type: ignore[list-item]
856+ hooks .after_error_hooks = [SlowCleanupHook ()] # type: ignore[list-item]
857+
858+ client = _BlockingAsyncClient ()
859+ config = SDKConfiguration (
860+ client = None ,
861+ client_supplied = False ,
862+ async_client = client , # type: ignore[arg-type]
863+ async_client_supplied = True ,
864+ debug_logger = logging .getLogger ("test" ),
865+ )
866+ config .__dict__ ["_hooks" ] = hooks
867+ sdk = BaseSDK (config )
868+ task = asyncio .create_task (
869+ sdk .do_request_async (
870+ _make_sdk_hook_context (),
871+ httpx .Request ("POST" , "http://localhost:8888/general/v0/general" ),
872+ error_status_codes = [],
873+ )
874+ )
875+
876+ await client .started .wait ()
877+ task .cancel ()
878+ await cleanup_started .wait ()
879+ task .cancel ()
880+ await asyncio .sleep (0 )
881+
882+ assert not task .done ()
883+
884+ release_cleanup .set ()
885+ with pytest .raises (asyncio .CancelledError ):
886+ await task
887+
888+ assert cleanup_finished .is_set ()
889+
890+
827891def test_before_request_returns_dummy_with_timeout_and_operation_id ():
828892 hook , mock_hook_ctx , result = _make_hook_with_split_request ()
829893 operation_id = result .headers ["operation_id" ]
@@ -836,6 +900,17 @@ def test_before_request_returns_dummy_with_timeout_and_operation_id():
836900 assert operation_id in hook .pending_operation_ids
837901
838902
903+ def test_before_request_rejects_reused_operation_id ():
904+ hook = SplitPdfHook ()
905+ hook .coroutines_to_execute ["reused-operation-id" ] = []
906+
907+ with patch (
908+ "unstructured_client._hooks.custom.split_pdf_hook.uuid.uuid4" ,
909+ return_value = "reused-operation-id" ,
910+ ), pytest .raises (RuntimeError , match = "Split PDF operation ID already in use" ):
911+ _make_hook_with_split_request (hook = hook )
912+
913+
839914def test_before_request_logs_split_plan (caplog : pytest .LogCaptureFixture ):
840915 caplog .set_level (logging .INFO , logger = "unstructured-client" )
841916
@@ -1784,15 +1859,16 @@ def test_unit_allow_failed_partial_results(caplog: pytest.LogCaptureFixture):
17841859 hook .concurrency_level [operation_id ] = 3
17851860 hook .allow_failed [operation_id ] = True
17861861 hook .cache_tmp_data_feature [operation_id ] = False
1787- hook .executors [operation_id ] = MagicMock ()
1862+ executor = MagicMock ()
1863+ hook .executors [operation_id ] = executor
17881864
17891865 fake_future = MagicMock ()
17901866 fake_future .result .return_value = [
17911867 (1 , _httpx_json_response ([{"page_number" : 1 }])),
17921868 (2 , _httpx_response ("boom" , status_code = 500 )),
17931869 (3 , _httpx_json_response ([{"page_number" : 3 }])),
17941870 ]
1795- hook . executors [ operation_id ] .submit .return_value = fake_future
1871+ executor .submit .return_value = fake_future
17961872
17971873 elements = hook ._await_elements (operation_id )
17981874
0 commit comments