From 7d7860ef59e1642daeb40c63c3903afb4429f1ae Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Tue, 23 Sep 2025 22:41:40 -0700 Subject: [PATCH 1/3] chore: sanitize connector builder error messages --- .../connector_builder_handler.py | 10 ++- .../test_connector_builder_handler.py | 66 +++++++++++++++++++ 2 files changed, 73 insertions(+), 3 deletions(-) diff --git a/airbyte_cdk/connector_builder/connector_builder_handler.py b/airbyte_cdk/connector_builder/connector_builder_handler.py index e63c200c1..ea635d53c 100644 --- a/airbyte_cdk/connector_builder/connector_builder_handler.py +++ b/airbyte_cdk/connector_builder/connector_builder_handler.py @@ -117,11 +117,15 @@ def read_stream( ), ) except Exception as exc: + # - message: user-friendly error for display + # - internal_message: technical details for debugging (including config/catalog) error = AirbyteTracedException.from_exception( exc, - message=filter_secrets( - f"Error reading stream with config={config} and catalog={configured_catalog}: {str(exc)}" - ), + message=filter_secrets(f"Error reading stream: {str(exc)}"), + ) + # Override internal_message to include context for debugging + error.internal_message = filter_secrets( + f"Error reading stream with config={config} and catalog={configured_catalog}: {str(exc)}" ) return error.as_airbyte_message() diff --git a/unit_tests/connector_builder/test_connector_builder_handler.py b/unit_tests/connector_builder/test_connector_builder_handler.py index 6f0394407..2e6f12899 100644 --- a/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/unit_tests/connector_builder/test_connector_builder_handler.py @@ -1408,6 +1408,72 @@ def test_read_stream_exception_with_secrets(): assert "super_secret_key" not in response.trace.error.message +def test_read_stream_error_message_does_not_contain_config_and_catalog(): + """ + Test that error messages in read_stream are clean and user-friendly, + without embedding verbose config and catalog information. + + This test verifies that: + 1. The user-facing `message` is clean and doesn't contain config/catalog dumps + 2. The technical `internal_message` still contains full context for debugging + """ + # Create a config and catalog with identifiable content + config = { + "__injected_declarative_manifest": "test_manifest", + "verbose_config_data": "this_should_not_appear_in_user_message", + "api_key": "secret_key_value" + } + catalog = ConfiguredAirbyteCatalog( + streams=[ + ConfiguredAirbyteStream( + stream=AirbyteStream( + name=_stream_name, + json_schema={"properties": {"verbose_catalog_schema": {"type": "string"}}}, + supported_sync_modes=[SyncMode.full_refresh] + ), + sync_mode=SyncMode.full_refresh, + destination_sync_mode=DestinationSyncMode.append, + ) + ] + ) + state = [] + limits = TestLimits() + + # Mock the source + mock_source = MagicMock() + + # Patch the handler to raise a meaningful exception + with patch( + "airbyte_cdk.connector_builder.test_reader.TestReader.run_test_read" + ) as mock_handler: + # Simulate a common error like the datetime parsing error from the user's example + mock_handler.side_effect = ValueError("time data '' does not match format '%Y-%m-%dT%H:%M:%SZ'") + + # Call the read_stream function + response = read_stream(mock_source, config, catalog, state, limits) + + # Verify it's a trace message with an error + assert response.type == Type.TRACE + assert response.trace.type.value == "ERROR" + + # The user-facing message should be clean - no config or catalog dumps + user_message = response.trace.error.message + assert "verbose_config_data" not in user_message + assert "verbose_catalog_schema" not in user_message + assert "__injected_declarative_manifest" not in user_message + + # But it should contain the actual error + assert "time data" in user_message + assert "does not match format" in user_message + + # The internal message should contain technical details for debugging + internal_message = response.trace.error.internal_message + assert "verbose_config_data" in internal_message + assert "verbose_catalog_schema" in internal_message + assert "Error reading stream with config=" in internal_message + assert "and catalog=" in internal_message + + def test_full_resolve_manifest(valid_resolve_manifest_config_file): config = copy.deepcopy(RESOLVE_DYNAMIC_STREAM_MANIFEST_CONFIG) command = config["__command"] From cc11666512ab002a783339144be34ab25667b0e7 Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Tue, 23 Sep 2025 22:50:40 -0700 Subject: [PATCH 2/3] chore: add stream_name to message --- .../connector_builder/connector_builder_handler.py | 4 ++-- .../test_connector_builder_handler.py | 10 ++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/airbyte_cdk/connector_builder/connector_builder_handler.py b/airbyte_cdk/connector_builder/connector_builder_handler.py index ea635d53c..0f41cab1e 100644 --- a/airbyte_cdk/connector_builder/connector_builder_handler.py +++ b/airbyte_cdk/connector_builder/connector_builder_handler.py @@ -121,11 +121,11 @@ def read_stream( # - internal_message: technical details for debugging (including config/catalog) error = AirbyteTracedException.from_exception( exc, - message=filter_secrets(f"Error reading stream: {str(exc)}"), + message=filter_secrets(f"Error reading stream {stream_name}: {str(exc)}"), ) # Override internal_message to include context for debugging error.internal_message = filter_secrets( - f"Error reading stream with config={config} and catalog={configured_catalog}: {str(exc)}" + f"Error reading stream {stream_name} with config={config} and catalog={configured_catalog}: {str(exc)}" ) return error.as_airbyte_message() diff --git a/unit_tests/connector_builder/test_connector_builder_handler.py b/unit_tests/connector_builder/test_connector_builder_handler.py index 2e6f12899..907936a7a 100644 --- a/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/unit_tests/connector_builder/test_connector_builder_handler.py @@ -1442,11 +1442,10 @@ def test_read_stream_error_message_does_not_contain_config_and_catalog(): # Mock the source mock_source = MagicMock() - # Patch the handler to raise a meaningful exception with patch( "airbyte_cdk.connector_builder.test_reader.TestReader.run_test_read" ) as mock_handler: - # Simulate a common error like the datetime parsing error from the user's example + # Simulate a common error like a datetime parsing error mock_handler.side_effect = ValueError("time data '' does not match format '%Y-%m-%dT%H:%M:%SZ'") # Call the read_stream function @@ -1463,15 +1462,14 @@ def test_read_stream_error_message_does_not_contain_config_and_catalog(): assert "__injected_declarative_manifest" not in user_message # But it should contain the actual error - assert "time data" in user_message - assert "does not match format" in user_message + stream_name = catalog.streams[0].stream.name + assert user_message == f"Error reading stream {stream_name}: time data '' does not match format '%Y-%m-%dT%H:%M:%SZ'" # The internal message should contain technical details for debugging internal_message = response.trace.error.internal_message assert "verbose_config_data" in internal_message assert "verbose_catalog_schema" in internal_message - assert "Error reading stream with config=" in internal_message - assert "and catalog=" in internal_message + assert f"Error reading stream {stream_name} with config=" in internal_message def test_full_resolve_manifest(valid_resolve_manifest_config_file): From 4382381f28c039ae6702ef40a94f8be725e82365 Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Wed, 24 Sep 2025 11:33:52 -0700 Subject: [PATCH 3/3] chore: format --- .../test_connector_builder_handler.py | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/unit_tests/connector_builder/test_connector_builder_handler.py b/unit_tests/connector_builder/test_connector_builder_handler.py index 907936a7a..4b716113c 100644 --- a/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/unit_tests/connector_builder/test_connector_builder_handler.py @@ -1412,24 +1412,24 @@ def test_read_stream_error_message_does_not_contain_config_and_catalog(): """ Test that error messages in read_stream are clean and user-friendly, without embedding verbose config and catalog information. - + This test verifies that: 1. The user-facing `message` is clean and doesn't contain config/catalog dumps 2. The technical `internal_message` still contains full context for debugging """ # Create a config and catalog with identifiable content config = { - "__injected_declarative_manifest": "test_manifest", + "__injected_declarative_manifest": "test_manifest", "verbose_config_data": "this_should_not_appear_in_user_message", - "api_key": "secret_key_value" + "api_key": "secret_key_value", } catalog = ConfiguredAirbyteCatalog( streams=[ ConfiguredAirbyteStream( stream=AirbyteStream( - name=_stream_name, - json_schema={"properties": {"verbose_catalog_schema": {"type": "string"}}}, - supported_sync_modes=[SyncMode.full_refresh] + name=_stream_name, + json_schema={"properties": {"verbose_catalog_schema": {"type": "string"}}}, + supported_sync_modes=[SyncMode.full_refresh], ), sync_mode=SyncMode.full_refresh, destination_sync_mode=DestinationSyncMode.append, @@ -1446,7 +1446,9 @@ def test_read_stream_error_message_does_not_contain_config_and_catalog(): "airbyte_cdk.connector_builder.test_reader.TestReader.run_test_read" ) as mock_handler: # Simulate a common error like a datetime parsing error - mock_handler.side_effect = ValueError("time data '' does not match format '%Y-%m-%dT%H:%M:%SZ'") + mock_handler.side_effect = ValueError( + "time data '' does not match format '%Y-%m-%dT%H:%M:%SZ'" + ) # Call the read_stream function response = read_stream(mock_source, config, catalog, state, limits) @@ -1454,17 +1456,20 @@ def test_read_stream_error_message_does_not_contain_config_and_catalog(): # Verify it's a trace message with an error assert response.type == Type.TRACE assert response.trace.type.value == "ERROR" - + # The user-facing message should be clean - no config or catalog dumps user_message = response.trace.error.message assert "verbose_config_data" not in user_message assert "verbose_catalog_schema" not in user_message assert "__injected_declarative_manifest" not in user_message - + # But it should contain the actual error stream_name = catalog.streams[0].stream.name - assert user_message == f"Error reading stream {stream_name}: time data '' does not match format '%Y-%m-%dT%H:%M:%SZ'" - + assert ( + user_message + == f"Error reading stream {stream_name}: time data '' does not match format '%Y-%m-%dT%H:%M:%SZ'" + ) + # The internal message should contain technical details for debugging internal_message = response.trace.error.internal_message assert "verbose_config_data" in internal_message