From 0ec91c7a225fec291830ebe980c8d536735cfb51 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 7 Apr 2026 17:49:58 +0000 Subject: [PATCH 1/3] fix(source-github): fix silent error swallowing in exception handlers Fix three bugs where exceptions were silently swallowed: 1. ContributorActivity.read_records(): else/raise was paired with outer if instead of inner if, causing non-ACCEPTED status codes to be silently swallowed. 2. GithubStreamABC.read_records(): guard used 'and' instead of 'or', causing AttributeError when _exception is missing and failing to re-raise when response is missing. 3. Releases._extract_database_id_from_node_id(): bare except Exception replaced with specific (ValueError, struct.error, binascii.Error). Co-Authored-By: bot_apk --- .../connectors/source-github/metadata.yaml | 2 +- .../connectors/source-github/pyproject.toml | 2 +- .../source-github/source_github/streams.py | 7 +- .../source-github/unit_tests/test_stream.py | 109 ++++++++++++++++++ docs/integrations/sources/github.md | 1 + 5 files changed, 117 insertions(+), 4 deletions(-) diff --git a/airbyte-integrations/connectors/source-github/metadata.yaml b/airbyte-integrations/connectors/source-github/metadata.yaml index e4ae0f178bac..3d9107cfbb80 100644 --- a/airbyte-integrations/connectors/source-github/metadata.yaml +++ b/airbyte-integrations/connectors/source-github/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: api connectorType: source definitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e - dockerImageTag: 2.1.17 + dockerImageTag: 2.1.18 dockerRepository: airbyte/source-github documentationUrl: https://docs.airbyte.com/integrations/sources/github erdUrl: https://dbdocs.io/airbyteio/source-github?view=relationships diff --git a/airbyte-integrations/connectors/source-github/pyproject.toml b/airbyte-integrations/connectors/source-github/pyproject.toml index f1dd8aa14ca5..277fc2094658 100644 --- a/airbyte-integrations/connectors/source-github/pyproject.toml +++ b/airbyte-integrations/connectors/source-github/pyproject.toml @@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",] build-backend = "poetry.core.masonry.api" [tool.poetry] -version = "2.1.17" +version = "2.1.18" name = "source-github" description = "Source implementation for GitHub." authors = [ "Airbyte ",] diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index 68202ca64d6f..8327b37e61c1 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -3,6 +3,7 @@ # import base64 +import binascii import re import struct from abc import ABC, abstractmethod @@ -137,7 +138,7 @@ def read_records(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iter # This whole try/except situation in `read_records()` isn't good but right now in `self._send_request()` # function we have `response.raise_for_status()` so we don't have much choice on how to handle errors. # Bocked on https://github.com/airbytehq/airbyte/issues/3514. - if not hasattr(e, "_exception") and not hasattr(e._exception, "response"): + if not hasattr(e, "_exception") or not hasattr(e._exception, "response"): raise e if e._exception.response.status_code == requests.codes.NOT_FOUND: # A lot of streams are not available for repositories owned by a user instead of an organization. @@ -827,7 +828,7 @@ def _extract_database_id_from_node_id(node_id: str) -> Optional[int]: decoded = base64.urlsafe_b64decode(encoded + "==") if len(decoded) >= 4: return struct.unpack(">I", decoded[-4:])[0] - except Exception: + except (ValueError, struct.error, binascii.Error): return None return None @@ -1828,6 +1829,8 @@ def read_records(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iter partition_obj = stream_slice.get("partition") if self.cursor and partition_obj: self.cursor.close_slice(StreamSlice(cursor_slice={}, partition=partition_obj)) + else: + raise e else: raise e diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py index 9350a23fdbf0..4fcb23854083 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py @@ -1763,3 +1763,112 @@ def test_pull_request_stats(requests_mock): list(read_full_refresh(stream)) assert query == expected_query + + +# === Tests for error-swallowing bug fixes (oncall/issues/11907) === + + +@patch("time.sleep") +def test_github_stream_abc_read_records_reraises_when_no_exception_attr(time_mock, requests_mock): + """Bug fix: GithubStreamABC.read_records() should re-raise when AirbyteTracedException + has no _exception attribute. Previously used `and` instead of `or` which would cause + an AttributeError when accessing e._exception on an exception without that attribute.""" + organization_args = {"organizations": ["org_name"]} + stream = Teams(**organization_args) + + requests_mock.get( + "https://api.github.com/orgs/org_name/teams", + status_code=requests.codes.INTERNAL_SERVER_ERROR, + json={"message": "Internal Server Error"}, + ) + + # The exception should be re-raised (not swallowed) + with pytest.raises(AirbyteTracedException): + list(read_full_refresh(stream)) + + +@patch("time.sleep") +def test_github_stream_abc_read_records_reraises_for_unhandled_status(time_mock, requests_mock): + """Bug fix: GithubStreamABC.read_records() correctly handles exceptions with _exception + and response attributes for status codes not explicitly handled (e.g. 422).""" + repository_args = { + "repositories": ["airbytehq/airbyte"], + "page_size_for_large_streams": 20, + } + stream = Branches(**repository_args) + + requests_mock.get( + "https://api.github.com/repos/airbytehq/airbyte/branches", + status_code=422, + json={"message": "Unprocessable Entity"}, + ) + + # Unhandled status codes should propagate up through the else clause on line 188 + with pytest.raises(AirbyteTracedException): + list(read_full_refresh(stream)) + + +@patch("time.sleep") +def test_contributor_activity_reraises_non_accepted_status(time_mock, rate_limit_mock_response, requests_mock): + """Bug fix: ContributorActivity.read_records() should re-raise when the exception has + a valid _exception.response but the status code is NOT 202 ACCEPTED. Previously, the + `else: raise e` was paired with the outer `if` instead of the inner `if`, causing + non-ACCEPTED errors to be silently swallowed.""" + requests_mock.get( + "https://api.github.com/repos/airbytehq/test_airbyte?per_page=100", + json={"full_name": "airbytehq/test_airbyte"}, + status_code=200, + ) + requests_mock.get( + "https://api.github.com/repos/airbytehq/test_airbyte?per_page=100", + json={"full_name": "airbytehq/test_airbyte", "default_branch": "default_branch"}, + status_code=200, + ) + requests_mock.get( + "https://api.github.com/repos/airbytehq/test_airbyte/branches?per_page=100", + json={}, + status_code=200, + ) + requests_mock.get( + "https://api.github.com/repos/airbytehq/test_airbyte/stats/contributors?per_page=100", + json={"message": "Unauthorized"}, + status_code=401, + ) + + source = SourceGithub() + catalog = CatalogBuilder().with_stream(name="contributor_activity", sync_mode=SyncMode.full_refresh).build() + config = {"access_token": "test_token", "repository": "airbytehq/test_airbyte"} + + # The 401 error should be re-raised, not silently swallowed + with pytest.raises(AirbyteTracedException): + list(source.read(config=config, logger=MagicMock(), catalog=catalog, state={})) + + +def test_releases_extract_database_id_does_not_catch_type_error(): + """Bug fix: _extract_database_id_from_node_id() should only catch ValueError, + struct.error, and binascii.Error — not all exceptions. A TypeError (or other + unexpected exception) should propagate instead of being silently swallowed.""" + # Passing a non-string type that has an underscore representation but causes + # TypeError during string operations + class BadNodeId: + """Object that contains underscore but causes TypeError on split.""" + def __contains__(self, item): + return True # "_" in BadNodeId() returns True + + def split(self, *args, **kwargs): + raise TypeError("split not supported") + + with pytest.raises(TypeError): + Releases._extract_database_id_from_node_id(BadNodeId()) + + +@pytest.mark.parametrize( + "node_id,expected_id", + [ + pytest.param("RA_####", None, id="invalid_base64_caught_by_binascii_error"), + pytest.param("RA_ab", None, id="short_decoded_data"), + ], +) +def test_releases_extract_database_id_catches_expected_errors(node_id, expected_id): + """Verify that expected decode/unpack errors still return None after narrowing the except.""" + assert Releases._extract_database_id_from_node_id(node_id) == expected_id diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index e3f789c5eafb..9287ebd7679a 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -229,6 +229,7 @@ Your token should have at least the `repo` scope. Depending on which streams you | Version | Date | Pull Request | Subject | |:-----------|:-----------|:------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 2.1.18 | 2026-04-07 | [76124](https://github.com/airbytehq/airbyte/pull/76124) | Fix silent error swallowing in exception handlers for ContributorActivity, GithubStreamABC, and Releases streams | | 2.1.17 | 2026-04-03 | [76080](https://github.com/airbytehq/airbyte/pull/76080) | Fix remaining NameError references to removed MessageRepresentationAirbyteTracedErrors in ContributorActivity stream | | 2.1.16 | 2026-04-02 | [76038](https://github.com/airbytehq/airbyte/pull/76038) | Replace deprecated MessageRepresentationAirbyteTracedErrors with AirbyteTracedException | | 2.1.15 | 2026-03-27 | [75508](https://github.com/airbytehq/airbyte/pull/75508) | Add declarative OAuth with `oauth_connector_input_specification` and granular scopes | From 5f8d35751d685addce53bb4371308e2c7c0a1bb6 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 7 Apr 2026 17:54:05 +0000 Subject: [PATCH 2/3] style: fix ruff-format blank line requirements in test file Co-Authored-By: bot_apk --- .../connectors/source-github/unit_tests/test_stream.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py index 4fcb23854083..40375155c751 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py @@ -1848,10 +1848,12 @@ def test_releases_extract_database_id_does_not_catch_type_error(): """Bug fix: _extract_database_id_from_node_id() should only catch ValueError, struct.error, and binascii.Error — not all exceptions. A TypeError (or other unexpected exception) should propagate instead of being silently swallowed.""" + # Passing a non-string type that has an underscore representation but causes # TypeError during string operations class BadNodeId: """Object that contains underscore but causes TypeError on split.""" + def __contains__(self, item): return True # "_" in BadNodeId() returns True From 85d707cc3d7b0fb6a74011d1b8e1e7b33bbc503f Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 7 Apr 2026 20:42:15 +0000 Subject: [PATCH 3/3] =?UTF-8?q?test:=20improve=20Bug=202=20tests=20to=20di?= =?UTF-8?q?rectly=20exercise=20and=E2=86=92or=20guard=20clause=20fix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: bot_apk --- .../source-github/unit_tests/test_stream.py | 59 ++++++++++--------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py index 40375155c751..8288a5abcb24 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py @@ -1769,43 +1769,44 @@ def test_pull_request_stats(requests_mock): @patch("time.sleep") -def test_github_stream_abc_read_records_reraises_when_no_exception_attr(time_mock, requests_mock): - """Bug fix: GithubStreamABC.read_records() should re-raise when AirbyteTracedException - has no _exception attribute. Previously used `and` instead of `or` which would cause - an AttributeError when accessing e._exception on an exception without that attribute.""" +def test_github_stream_abc_read_records_reraises_when_no_exception_attr(time_mock): + """Bug fix: GithubStreamABC.read_records() guard clause uses `or` so that when + AirbyteTracedException has no _exception attribute, the exception is re-raised + immediately. With the old `and`, the second hasattr would raise AttributeError.""" organization_args = {"organizations": ["org_name"]} stream = Teams(**organization_args) - requests_mock.get( - "https://api.github.com/orgs/org_name/teams", - status_code=requests.codes.INTERNAL_SERVER_ERROR, - json={"message": "Internal Server Error"}, - ) + # Construct an AirbyteTracedException WITHOUT _exception attribute + exc = AirbyteTracedException(message="bare error", failure_type=FailureType.system_error) + # CDK sets _exception=None by default; delete it to simulate the case where it's truly absent + delattr(exc, "_exception") + assert not hasattr(exc, "_exception"), "Test precondition: exception must lack _exception attr" - # The exception should be re-raised (not swallowed) - with pytest.raises(AirbyteTracedException): - list(read_full_refresh(stream)) + # Patch HttpStream.read_records (the super() target) to raise our bare exception + with patch("airbyte_cdk.sources.streams.http.http.HttpStream.read_records", side_effect=exc): + with pytest.raises(AirbyteTracedException): + list(stream.read_records(stream_slice={"organization": "org_name"})) @patch("time.sleep") -def test_github_stream_abc_read_records_reraises_for_unhandled_status(time_mock, requests_mock): - """Bug fix: GithubStreamABC.read_records() correctly handles exceptions with _exception - and response attributes for status codes not explicitly handled (e.g. 422).""" - repository_args = { - "repositories": ["airbytehq/airbyte"], - "page_size_for_large_streams": 20, - } - stream = Branches(**repository_args) - - requests_mock.get( - "https://api.github.com/repos/airbytehq/airbyte/branches", - status_code=422, - json={"message": "Unprocessable Entity"}, - ) +def test_github_stream_abc_read_records_reraises_when_no_response_attr(time_mock): + """Bug fix: GithubStreamABC.read_records() guard clause uses `or` so that when + AirbyteTracedException has _exception but _exception lacks response attribute, + the exception is re-raised. With the old `and`, this case was silently swallowed.""" + organization_args = {"organizations": ["org_name"]} + stream = Teams(**organization_args) - # Unhandled status codes should propagate up through the else clause on line 188 - with pytest.raises(AirbyteTracedException): - list(read_full_refresh(stream)) + # Construct an AirbyteTracedException WITH _exception but WITHOUT response + exc = AirbyteTracedException(message="missing response", failure_type=FailureType.system_error) + inner = Exception("inner error") + exc._exception = inner + assert hasattr(exc, "_exception"), "Test precondition: exception must have _exception attr" + assert not hasattr(exc._exception, "response"), "Test precondition: _exception must lack response attr" + + # Patch HttpStream.read_records (the super() target) to raise our exception + with patch("airbyte_cdk.sources.streams.http.http.HttpStream.read_records", side_effect=exc): + with pytest.raises(AirbyteTracedException): + list(stream.read_records(stream_slice={"organization": "org_name"})) @patch("time.sleep")