From 4afb54698de9f33e428bf22738938fa5c2c47ece Mon Sep 17 00:00:00 2001 From: Austin Walker Date: Mon, 23 Mar 2026 15:10:40 -0400 Subject: [PATCH 1/4] fix(ibm-watsonx-s3): fail fast on precheck and guard bearer token JSON parsing - Add optional max_retries param to get_catalog() so callers can cap retries - Use max_retries=1 in precheck() to surface connection errors immediately instead of blocking through the full exponential-backoff window - Move response.json() inside try/except in generate_bearer_token() so non-JSON IAM responses are caught and wrapped rather than raised raw Co-Authored-By: Claude Sonnet 4.6 --- CHANGELOG.md | 4 ++++ unstructured_ingest/__version__.py | 2 +- .../connectors/ibm_watsonx/ibm_watsonx_s3.py | 12 ++++++++---- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6dc1e785..0974d5799 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## [1.4.15] + +* **fix(ibm-watsonx-s3): fail fast on precheck and guard bearer token JSON parsing** + ## [1.4.14] * **fix(teradata): enable Unicode Pass Through on session to prevent Error 6705 on non-BMP characters** diff --git a/unstructured_ingest/__version__.py b/unstructured_ingest/__version__.py index b9997971a..b890013da 100644 --- a/unstructured_ingest/__version__.py +++ b/unstructured_ingest/__version__.py @@ -1 +1 @@ -__version__ = "1.4.14" # pragma: no cover +__version__ = "1.4.15" # pragma: no cover diff --git a/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py b/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py index d418a8568..562163a35 100644 --- a/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +++ b/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py @@ -132,9 +132,9 @@ def generate_bearer_token(self) -> dict[str, Any]: try: response = httpx.post(DEFAULT_IBM_CLOUD_AUTH_URL, headers=headers, data=data) response.raise_for_status() + return response.json() except Exception as e: raise self.wrap_error(e) - return response.json() def get_catalog_config(self) -> dict[str, Any]: return { @@ -155,7 +155,7 @@ def get_catalog_config(self) -> dict[str, Any]: @requires_dependencies(["pyiceberg"], extras="ibm-watsonx-s3") @contextmanager - def get_catalog(self) -> Generator["RestCatalog", None, None]: + def get_catalog(self, max_retries: Optional[int] = None) -> Generator["RestCatalog", None, None]: from pyiceberg.catalog import load_catalog from pyiceberg.exceptions import RESTError from tenacity import ( @@ -166,9 +166,11 @@ def get_catalog(self) -> Generator["RestCatalog", None, None]: wait_exponential, ) + retries = max_retries if max_retries is not None else self.max_retries_connection + # Retry connection in case of a connection error @retry( - stop=stop_after_attempt(self.max_retries_connection), + stop=stop_after_attempt(retries), wait=wait_exponential(exp_base=2, multiplier=1, min=2, max=10), retry=retry_if_exception_type(RESTError), before=before_log(logger, logging.DEBUG), @@ -225,7 +227,9 @@ class IbmWatsonxUploader(SQLUploader): connector_type: str = CONNECTOR_TYPE def precheck(self) -> None: - with self.connection_config.get_catalog() as catalog: + # Use max_retries=1 (no retries) during precheck so a transient provider error + # surfaces quickly rather than blocking for the full exponential-backoff window. + with self.connection_config.get_catalog(max_retries=1) as catalog: if not catalog.namespace_exists(self.upload_config.namespace): raise UserError(f"Namespace '{self.upload_config.namespace}' does not exist") if not catalog.table_exists(self.upload_config.table_identifier): From bb0662310ce1e97f00d53d07493504bea5c4bee2 Mon Sep 17 00:00:00 2001 From: Austin Walker Date: Mon, 23 Mar 2026 16:09:07 -0400 Subject: [PATCH 2/4] test(ibm-watsonx-s3): add unit tests for precheck and bearer token fixes Co-Authored-By: Claude Sonnet 4.6 --- .../ibm_watsonx/test_ibm_watsonx_s3.py | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py b/test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py index b18f950ff..7940ea387 100644 --- a/test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py +++ b/test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py @@ -547,3 +547,54 @@ def test_ibm_watsonx_uploader_delete_cannot_delete( "Table doesn't contain expected record id column test_record_id_key, skipping delete" in caplog.text ) + + +def test_ibm_watsonx_generate_bearer_token_non_json_response( + mocker: MockerFixture, + connection_config: IbmWatsonxConnectionConfig, +): + """response.json() failures are caught and re-raised via wrap_error rather than escaping raw.""" + mock_response = mocker.MagicMock() + mock_response.raise_for_status.return_value = None + mock_response.json.side_effect = ValueError("No JSON object could be decoded") + mocker.patch("httpx.post", return_value=mock_response) + + with pytest.raises(ValueError): + connection_config.generate_bearer_token() + + mock_response.raise_for_status.assert_called_once() + mock_response.json.assert_called_once() + + +def test_ibm_watsonx_connection_config_get_catalog_max_retries_override( + mocker: MockerFixture, + connection_config: IbmWatsonxConnectionConfig, +): + """max_retries param overrides max_retries_connection; max_retries=1 means a single attempt.""" + mock_load_catalog = mocker.patch( + "pyiceberg.catalog.load_catalog", + side_effect=RESTError("Connection error"), + ) + mocker.patch.object( + IbmWatsonxConnectionConfig, + "bearer_token", + new="test_bearer_token", + ) + + with pytest.raises(ProviderError): + with connection_config.get_catalog(max_retries=1): + pass + + # max_retries_connection fixture is 2, but max_retries=1 overrides to a single attempt + assert mock_load_catalog.call_count == 1 + + +def test_ibm_watsonx_uploader_precheck_calls_get_catalog_with_max_retries_1( + mock_get_catalog: MagicMock, + mock_catalog: MagicMock, + uploader: IbmWatsonxUploader, +): + """precheck() passes max_retries=1 so connection errors surface immediately.""" + uploader.precheck() + + mock_get_catalog.assert_called_once_with(uploader.connection_config, max_retries=1) From a2e3540e84e08c04859a3641ec601ac4d7eb4cbe Mon Sep 17 00:00:00 2001 From: Austin Walker Date: Mon, 23 Mar 2026 16:16:26 -0400 Subject: [PATCH 3/4] chore: apply linter fixes Co-Authored-By: Claude Sonnet 4.6 --- test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py | 5 ++--- .../processes/connectors/ibm_watsonx/ibm_watsonx_s3.py | 4 +++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py b/test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py index 7940ea387..6ad3ab82f 100644 --- a/test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py +++ b/test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py @@ -581,9 +581,8 @@ def test_ibm_watsonx_connection_config_get_catalog_max_retries_override( new="test_bearer_token", ) - with pytest.raises(ProviderError): - with connection_config.get_catalog(max_retries=1): - pass + with pytest.raises(ProviderError), connection_config.get_catalog(max_retries=1): + pass # max_retries_connection fixture is 2, but max_retries=1 overrides to a single attempt assert mock_load_catalog.call_count == 1 diff --git a/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py b/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py index 562163a35..68866759b 100644 --- a/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +++ b/unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py @@ -155,7 +155,9 @@ def get_catalog_config(self) -> dict[str, Any]: @requires_dependencies(["pyiceberg"], extras="ibm-watsonx-s3") @contextmanager - def get_catalog(self, max_retries: Optional[int] = None) -> Generator["RestCatalog", None, None]: + def get_catalog( + self, max_retries: Optional[int] = None + ) -> Generator["RestCatalog", None, None]: from pyiceberg.catalog import load_catalog from pyiceberg.exceptions import RESTError from tenacity import ( From fde6e0db5c2c7bc4cd0d2e790b1e4feaea99541a Mon Sep 17 00:00:00 2001 From: Austin Walker Date: Mon, 23 Mar 2026 16:41:01 -0400 Subject: [PATCH 4/4] test(ibm-watsonx-s3): fix bearer token test to verify wrap_error is called Co-Authored-By: Claude Sonnet 4.6 --- test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py b/test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py index 6ad3ab82f..914932661 100644 --- a/test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py +++ b/test/unit/connectors/ibm_watsonx/test_ibm_watsonx_s3.py @@ -553,17 +553,17 @@ def test_ibm_watsonx_generate_bearer_token_non_json_response( mocker: MockerFixture, connection_config: IbmWatsonxConnectionConfig, ): - """response.json() failures are caught and re-raised via wrap_error rather than escaping raw.""" + """response.json() failures are caught and re-raised via wrap_error.""" mock_response = mocker.MagicMock() mock_response.raise_for_status.return_value = None mock_response.json.side_effect = ValueError("No JSON object could be decoded") mocker.patch("httpx.post", return_value=mock_response) + spy_wrap_error = mocker.spy(IbmWatsonxConnectionConfig, "wrap_error") with pytest.raises(ValueError): connection_config.generate_bearer_token() - mock_response.raise_for_status.assert_called_once() - mock_response.json.assert_called_once() + spy_wrap_error.assert_called_once() def test_ibm_watsonx_connection_config_get_catalog_max_retries_override(