diff --git a/airbyte-integrations/connectors/source-github/metadata.yaml b/airbyte-integrations/connectors/source-github/metadata.yaml index e4ae0f178bac..3d9107cfbb80 100644 --- a/airbyte-integrations/connectors/source-github/metadata.yaml +++ b/airbyte-integrations/connectors/source-github/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: api connectorType: source definitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e - dockerImageTag: 2.1.17 + dockerImageTag: 2.1.18 dockerRepository: airbyte/source-github documentationUrl: https://docs.airbyte.com/integrations/sources/github erdUrl: https://dbdocs.io/airbyteio/source-github?view=relationships diff --git a/airbyte-integrations/connectors/source-github/pyproject.toml b/airbyte-integrations/connectors/source-github/pyproject.toml index f1dd8aa14ca5..277fc2094658 100644 --- a/airbyte-integrations/connectors/source-github/pyproject.toml +++ b/airbyte-integrations/connectors/source-github/pyproject.toml @@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",] build-backend = "poetry.core.masonry.api" [tool.poetry] -version = "2.1.17" +version = "2.1.18" name = "source-github" description = "Source implementation for GitHub." authors = [ "Airbyte ",] diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index 68202ca64d6f..8327b37e61c1 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -3,6 +3,7 @@ # import base64 +import binascii import re import struct from abc import ABC, abstractmethod @@ -137,7 +138,7 @@ def read_records(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iter # This whole try/except situation in `read_records()` isn't good but right now in `self._send_request()` # function we have `response.raise_for_status()` so we don't have much choice on how to handle errors. # Bocked on https://github.com/airbytehq/airbyte/issues/3514. - if not hasattr(e, "_exception") and not hasattr(e._exception, "response"): + if not hasattr(e, "_exception") or not hasattr(e._exception, "response"): raise e if e._exception.response.status_code == requests.codes.NOT_FOUND: # A lot of streams are not available for repositories owned by a user instead of an organization. @@ -827,7 +828,7 @@ def _extract_database_id_from_node_id(node_id: str) -> Optional[int]: decoded = base64.urlsafe_b64decode(encoded + "==") if len(decoded) >= 4: return struct.unpack(">I", decoded[-4:])[0] - except Exception: + except (ValueError, struct.error, binascii.Error): return None return None @@ -1828,6 +1829,8 @@ def read_records(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iter partition_obj = stream_slice.get("partition") if self.cursor and partition_obj: self.cursor.close_slice(StreamSlice(cursor_slice={}, partition=partition_obj)) + else: + raise e else: raise e diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py index 9350a23fdbf0..8288a5abcb24 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py @@ -1763,3 +1763,115 @@ def test_pull_request_stats(requests_mock): list(read_full_refresh(stream)) assert query == expected_query + + +# === Tests for error-swallowing bug fixes (oncall/issues/11907) === + + +@patch("time.sleep") +def test_github_stream_abc_read_records_reraises_when_no_exception_attr(time_mock): + """Bug fix: GithubStreamABC.read_records() guard clause uses `or` so that when + AirbyteTracedException has no _exception attribute, the exception is re-raised + immediately. With the old `and`, the second hasattr would raise AttributeError.""" + organization_args = {"organizations": ["org_name"]} + stream = Teams(**organization_args) + + # Construct an AirbyteTracedException WITHOUT _exception attribute + exc = AirbyteTracedException(message="bare error", failure_type=FailureType.system_error) + # CDK sets _exception=None by default; delete it to simulate the case where it's truly absent + delattr(exc, "_exception") + assert not hasattr(exc, "_exception"), "Test precondition: exception must lack _exception attr" + + # Patch HttpStream.read_records (the super() target) to raise our bare exception + with patch("airbyte_cdk.sources.streams.http.http.HttpStream.read_records", side_effect=exc): + with pytest.raises(AirbyteTracedException): + list(stream.read_records(stream_slice={"organization": "org_name"})) + + +@patch("time.sleep") +def test_github_stream_abc_read_records_reraises_when_no_response_attr(time_mock): + """Bug fix: GithubStreamABC.read_records() guard clause uses `or` so that when + AirbyteTracedException has _exception but _exception lacks response attribute, + the exception is re-raised. With the old `and`, this case was silently swallowed.""" + organization_args = {"organizations": ["org_name"]} + stream = Teams(**organization_args) + + # Construct an AirbyteTracedException WITH _exception but WITHOUT response + exc = AirbyteTracedException(message="missing response", failure_type=FailureType.system_error) + inner = Exception("inner error") + exc._exception = inner + assert hasattr(exc, "_exception"), "Test precondition: exception must have _exception attr" + assert not hasattr(exc._exception, "response"), "Test precondition: _exception must lack response attr" + + # Patch HttpStream.read_records (the super() target) to raise our exception + with patch("airbyte_cdk.sources.streams.http.http.HttpStream.read_records", side_effect=exc): + with pytest.raises(AirbyteTracedException): + list(stream.read_records(stream_slice={"organization": "org_name"})) + + +@patch("time.sleep") +def test_contributor_activity_reraises_non_accepted_status(time_mock, rate_limit_mock_response, requests_mock): + """Bug fix: ContributorActivity.read_records() should re-raise when the exception has + a valid _exception.response but the status code is NOT 202 ACCEPTED. Previously, the + `else: raise e` was paired with the outer `if` instead of the inner `if`, causing + non-ACCEPTED errors to be silently swallowed.""" + requests_mock.get( + "https://api.github.com/repos/airbytehq/test_airbyte?per_page=100", + json={"full_name": "airbytehq/test_airbyte"}, + status_code=200, + ) + requests_mock.get( + "https://api.github.com/repos/airbytehq/test_airbyte?per_page=100", + json={"full_name": "airbytehq/test_airbyte", "default_branch": "default_branch"}, + status_code=200, + ) + requests_mock.get( + "https://api.github.com/repos/airbytehq/test_airbyte/branches?per_page=100", + json={}, + status_code=200, + ) + requests_mock.get( + "https://api.github.com/repos/airbytehq/test_airbyte/stats/contributors?per_page=100", + json={"message": "Unauthorized"}, + status_code=401, + ) + + source = SourceGithub() + catalog = CatalogBuilder().with_stream(name="contributor_activity", sync_mode=SyncMode.full_refresh).build() + config = {"access_token": "test_token", "repository": "airbytehq/test_airbyte"} + + # The 401 error should be re-raised, not silently swallowed + with pytest.raises(AirbyteTracedException): + list(source.read(config=config, logger=MagicMock(), catalog=catalog, state={})) + + +def test_releases_extract_database_id_does_not_catch_type_error(): + """Bug fix: _extract_database_id_from_node_id() should only catch ValueError, + struct.error, and binascii.Error — not all exceptions. A TypeError (or other + unexpected exception) should propagate instead of being silently swallowed.""" + + # Passing a non-string type that has an underscore representation but causes + # TypeError during string operations + class BadNodeId: + """Object that contains underscore but causes TypeError on split.""" + + def __contains__(self, item): + return True # "_" in BadNodeId() returns True + + def split(self, *args, **kwargs): + raise TypeError("split not supported") + + with pytest.raises(TypeError): + Releases._extract_database_id_from_node_id(BadNodeId()) + + +@pytest.mark.parametrize( + "node_id,expected_id", + [ + pytest.param("RA_####", None, id="invalid_base64_caught_by_binascii_error"), + pytest.param("RA_ab", None, id="short_decoded_data"), + ], +) +def test_releases_extract_database_id_catches_expected_errors(node_id, expected_id): + """Verify that expected decode/unpack errors still return None after narrowing the except.""" + assert Releases._extract_database_id_from_node_id(node_id) == expected_id diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index e3f789c5eafb..9287ebd7679a 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -229,6 +229,7 @@ Your token should have at least the `repo` scope. Depending on which streams you | Version | Date | Pull Request | Subject | |:-----------|:-----------|:------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 2.1.18 | 2026-04-07 | [76124](https://github.com/airbytehq/airbyte/pull/76124) | Fix silent error swallowing in exception handlers for ContributorActivity, GithubStreamABC, and Releases streams | | 2.1.17 | 2026-04-03 | [76080](https://github.com/airbytehq/airbyte/pull/76080) | Fix remaining NameError references to removed MessageRepresentationAirbyteTracedErrors in ContributorActivity stream | | 2.1.16 | 2026-04-02 | [76038](https://github.com/airbytehq/airbyte/pull/76038) | Replace deprecated MessageRepresentationAirbyteTracedErrors with AirbyteTracedException | | 2.1.15 | 2026-03-27 | [75508](https://github.com/airbytehq/airbyte/pull/75508) | Add declarative OAuth with `oauth_connector_input_specification` and granular scopes |