diff --git a/src/chat_sdk/adapters/github/adapter.py b/src/chat_sdk/adapters/github/adapter.py index 50ad1e3..75eb292 100644 --- a/src/chat_sdk/adapters/github/adapter.py +++ b/src/chat_sdk/adapters/github/adapter.py @@ -120,6 +120,10 @@ def __init__(self, config: GitHubAdapterConfig | None = None) -> None: self._installation_id: int | None = None self._installation_token_cache: dict[int, tuple[str, float]] = {} self._token_lock = asyncio.Lock() + # Serializes concurrent bot-user-ID detection so concurrent webhooks + # don't all race to fetch the (identical) bot identity (see + # ``_detect_bot_user_id``). + self._detect_lock = asyncio.Lock() # Shared aiohttp session for connection pooling self._http_session: Any | None = None @@ -201,21 +205,77 @@ async def initialize(self, chat: ChatInstance) -> None: """Initialize the adapter.""" self._chat = chat - if not self._bot_user_id and self._auth_token: + # Fetch bot user ID if not provided. For multi-tenant mode there is no + # fixed installation yet, so detection happens lazily on the first + # webhook (see ``handle_webhook``) so ``is_me`` checks work for the + # very first reply. + if self._bot_user_id is None and self._auth_token: + await self._detect_bot_user_id() + + self._logger.info("GitHub adapter initialized") + + async def _detect_bot_user_id(self, installation_id: int | None = None) -> None: + """Fetch the bot's user ID from GitHub. Used for self-message detection. + + Best-effort: errors are swallowed so they do not block webhook + processing. The bot identity is the same across all installations of + the same App, so we only detect once and cache the result on + ``self._bot_user_id``. + + Mirrors the upstream TypeScript ``detectBotUserId`` sequence: + try ``users.getAuthenticated`` (``GET /user``) first — this works for + PAT mode and (returns the bot user) for installation tokens too. On + failure, fall back to ``apps.getAuthenticated`` (``GET /app``) and + resolve the bot user via ``users.getByUsername`` (``GET /users/{slug}[bot]``). + """ + # Cache hit: already detected once, don't re-fetch per webhook. This is + # the fast path that avoids taking the lock once detection has succeeded. + if self._bot_user_id is not None: + return + + # Double-checked locking: concurrent webhooks must not all race to fetch + # the (identical) bot identity. Serialize detection and re-check the + # cache inside the lock so only the first caller hits the API. + async with self._detect_lock: + if self._bot_user_id is not None: + return + try: - user_data = await self._github_api_request("GET", "/user") - self._bot_user_id = user_data.get("id") + user = await self._github_api_request("GET", "/user", installation_id=installation_id) + self._bot_user_id = user.get("id") self._logger.info( - "GitHub auth completed", + "GitHub bot user ID auto-detected", { "botUserId": self._bot_user_id, - "login": user_data.get("login"), + "login": user.get("login"), }, ) + return except Exception as error: - self._logger.warn("Could not fetch bot user ID", {"error": str(error)}) + self._logger.debug( + "users.getAuthenticated failed; falling back to apps.getAuthenticated", + {"error": str(error)}, + ) - self._logger.info("GitHub adapter initialized") + try: + # For App-authenticated installation tokens, /user is not + # available; use apps.getAuthenticated (GET /app, authenticated + # with the App JWT) and resolve the bot user via + # /users/{login}[bot]. + app = await self._github_api_request("GET", "/app", installation_id=installation_id) + if app: + login = f"{app['slug']}[bot]" + bot_user = await self._github_api_request("GET", f"/users/{login}", installation_id=installation_id) + self._bot_user_id = bot_user.get("id") + self._logger.info( + "GitHub bot user ID auto-detected via app slug", + { + "botUserId": self._bot_user_id, + "login": login, + }, + ) + except Exception as error: + self._logger.warn("Could not auto-detect GitHub bot user ID", {"error": str(error)}) async def get_user(self, user_id: str) -> UserInfo | None: """Look up a GitHub user by numeric account ID. @@ -306,6 +366,13 @@ async def handle_webhook(self, request: Any, options: WebhookOptions | None = No if owner_login and repo_name: await self._store_installation_id(owner_login, repo_name, installation_id) + # Eagerly resolve the bot user ID before dispatching to handlers, so + # is_me checks work on the very first webhook. Without this, + # multi-tenant adapters can self-reply-loop until detection fires + # lazily elsewhere. Cached after the first detection (once per process). + if self._bot_user_id is None: + await self._detect_bot_user_id(installation_id) + if event_type == "issue_comment": if payload.get("action") == "created": self._handle_issue_comment(payload, installation_id, options) @@ -629,12 +696,10 @@ async def add_reaction(self, thread_id: str, message_id: str, emoji: EmojiValue async def remove_reaction(self, thread_id: str, message_id: str, emoji: EmojiValue | str) -> None: """Remove a reaction from a message.""" - if not self._bot_user_id and self._auth_token: - try: - user_data = await self._github_api_request("GET", "/user") - self._bot_user_id = user_data.get("id") - except Exception: - self._logger.warn("Could not detect bot user ID for reaction removal") + # Multi-tenant mode has no fixed installation, so initialize() can't + # detect _bot_user_id. Detect lazily (cached after first success). + if self._bot_user_id is None: + await self._detect_bot_user_id() decoded = self.decode_thread_id(thread_id) comment_id = int(message_id) @@ -1059,24 +1124,42 @@ async def _get_installation_token(self, installation_id: int) -> str: ) return token - async def _github_api_request(self, method: str, path: str, body: Any = None) -> Any: + async def _github_api_request( + self, method: str, path: str, body: Any = None, *, installation_id: int | None = None + ) -> Any: """Make a request to the GitHub API. Supports PAT auth (``_auth_token``) and GitHub App auth (``_app_credentials`` with JWT -> installation token exchange). + + ``installation_id`` overrides ``self._installation_id`` for App auth. + Multi-tenant mode has no fixed installation on the adapter (it is + extracted per-webhook), so callers operating inside a webhook context + pass the webhook's installation explicitly. """ auth_token = self._auth_token - # GitHub App auth: exchange JWT for installation token + # GitHub App auth. if not auth_token and self._app_credentials: - installation_id = self._installation_id - if not installation_id: - raise RuntimeError( - "Installation ID required for GitHub App authentication. " - "This usually means you're trying to make an API call outside of a webhook context. " - "For proactive messages, use thread IDs from previous webhook interactions." - ) - auth_token = await self._get_installation_token(installation_id) + if path == "/app": + # App-level endpoints (apps.getAuthenticated) REQUIRE app-JWT + # auth; installation tokens get 401/403 here. Authenticate with + # the App JWT directly instead of exchanging for an installation + # token. This is what makes bot-user-ID detection work in the + # GitHub-App/installation case (the /user path 403s on + # installation tokens, so this fallback must succeed). + auth_token = self._generate_app_jwt() + else: + # Installation-scoped endpoints: exchange the JWT for an + # installation token. + resolved_installation_id = installation_id if installation_id is not None else self._installation_id + if not resolved_installation_id: + raise RuntimeError( + "Installation ID required for GitHub App authentication. " + "This usually means you're trying to make an API call outside of a webhook context. " + "For proactive messages, use thread IDs from previous webhook interactions." + ) + auth_token = await self._get_installation_token(resolved_installation_id) headers: dict[str, str] = { "Accept": "application/vnd.github+json", diff --git a/tests/test_github_dispatch.py b/tests/test_github_dispatch.py index 74d788c..baf69ff 100644 --- a/tests/test_github_dispatch.py +++ b/tests/test_github_dispatch.py @@ -7,6 +7,7 @@ from __future__ import annotations +import asyncio import hashlib import hmac import json @@ -281,3 +282,318 @@ async def test_in_reply_to_id_routes_to_root_thread(self): assert decoded.review_comment_id == root_comment_id # The thread_id format should contain :rc:3000 assert f":rc:{root_comment_id}" in thread_id + + +# ============================================================================= +# Tests -- eager bot-user-ID auto-detection (github slice of upstream 9824d33) +# ============================================================================= + + +def _make_multi_tenant_adapter(**overrides: Any) -> GitHubAdapter: + """Create a multi-tenant GitHub App adapter (no installation_id).""" + defaults: dict[str, Any] = { + "webhook_secret": WEBHOOK_SECRET, + "app_id": "12345", + "private_key": "-----BEGIN RSA PRIVATE KEY-----\nfake\n-----END RSA PRIVATE KEY-----", + "logger": ConsoleLogger("error"), + } + defaults.update(overrides) + return GitHubAdapter(defaults) + + +def _make_install_payload(*, sender_id: int = 100, installation_id: int = 54321) -> dict[str, Any]: + """An issue_comment payload carrying an installation id (multi-tenant).""" + payload = _make_issue_comment_payload(sender_id=sender_id, owner="acme", repo="app", pr_number=42) + payload["installation"] = {"id": installation_id, "node_id": "MDIzOk"} + return payload + + +class TestEagerBotUserIdDetection: + """Eager bot-user-ID detection so is_me works and self-reply loops are prevented. + + Mirrors the github slice of upstream commit 9824d33: detection runs on the + first webhook for an installation (so the very first reply has a populated + bot id), is cached so subsequent webhooks don't re-fetch, and falls back + from users.getAuthenticated (GET /user) to apps.getAuthenticated (GET /app) + + users.getByUsername (GET /users/{slug}[bot]) for installation tokens. + """ + + @pytest.mark.asyncio + async def test_webhook_populates_bot_user_id_before_dispatch(self): + """First webhook for a new installation populates _bot_user_id before message handling.""" + adapter = _make_multi_tenant_adapter() + chat = MagicMock() + chat.process_message = MagicMock() + chat.get_state = MagicMock(return_value=MagicMock(set=AsyncMock(), get=AsyncMock(return_value=None))) + adapter._chat = chat + + # Installation tokens: /user is unavailable, so detection falls back to + # /app then /users/{slug}[bot]. + async def fake_api(method: str, path: str, body: Any = None, *, installation_id: int | None = None) -> Any: + if path == "/user": + raise RuntimeError("404 /user not available for installation token") + if path == "/app": + return {"slug": "my-bot", "id": 1} + if path == "/users/my-bot[bot]": + return {"id": 7777, "login": "my-bot[bot]"} + raise AssertionError(f"unexpected path {path}") + + api = AsyncMock(side_effect=fake_api) + adapter._github_api_request = api + + # process_message must observe a populated bot id. Capture it at call time. + observed: dict[str, Any] = {} + chat.process_message.side_effect = lambda *a, **kw: observed.update({"bot_id": adapter._bot_user_id}) + + payload = _make_install_payload(sender_id=100, installation_id=54321) + body = json.dumps(payload) + headers = { + "x-hub-signature-256": _sign(body), + "x-github-event": "issue_comment", + "content-type": "application/json", + } + + result = await adapter.handle_webhook(FakeRequest(body, headers)) + + assert result["status"] == 200 + assert adapter._bot_user_id == 7777 + # Detection used the webhook's installation id for the API calls. + assert api.await_args_list[0].kwargs["installation_id"] == 54321 + # process_message ran, and the bot id was already set when it did. + chat.process_message.assert_called_once() + assert observed["bot_id"] == 7777 + + @pytest.mark.asyncio + async def test_message_from_bot_filtered_as_is_me(self): + """A message authored by the bot itself is filtered (self-reply loop prevented).""" + adapter = _make_multi_tenant_adapter() + chat = MagicMock() + chat.process_message = MagicMock() + chat.get_state = MagicMock(return_value=MagicMock(set=AsyncMock(), get=AsyncMock(return_value=None))) + adapter._chat = chat + + async def fake_api(method: str, path: str, body: Any = None, *, installation_id: int | None = None) -> Any: + if path == "/user": + raise RuntimeError("404") + if path == "/app": + return {"slug": "my-bot"} + if path == "/users/my-bot[bot]": + return {"id": 8888, "login": "my-bot[bot]"} + raise AssertionError(f"unexpected path {path}") + + adapter._github_api_request = AsyncMock(side_effect=fake_api) + + # Sender is the bot itself (id 8888) -> must be ignored. + payload = _make_install_payload(sender_id=8888, installation_id=54321) + body = json.dumps(payload) + headers = { + "x-hub-signature-256": _sign(body), + "x-github-event": "issue_comment", + "content-type": "application/json", + } + + result = await adapter.handle_webhook(FakeRequest(body, headers)) + + assert result["status"] == 200 + assert adapter._bot_user_id == 8888 + # Self-message: process_message NOT called -> no self-reply loop. + chat.process_message.assert_not_called() + + @pytest.mark.asyncio + async def test_pat_path_uses_users_get_authenticated(self): + """PAT path resolves the bot via GET /user (users.getAuthenticated), no /app fallback.""" + adapter = _make_adapter() # token-based (PAT) adapter + chat = MagicMock() + chat.process_message = MagicMock() + chat.get_state = MagicMock(return_value=MagicMock(set=AsyncMock(), get=AsyncMock(return_value=None))) + adapter._chat = chat + + api = AsyncMock(return_value={"id": 4242, "login": "pat-bot"}) + adapter._github_api_request = api + + payload = _make_issue_comment_payload(sender_id=100) + body = json.dumps(payload) + headers = { + "x-hub-signature-256": _sign(body), + "x-github-event": "issue_comment", + "content-type": "application/json", + } + + result = await adapter.handle_webhook(FakeRequest(body, headers)) + + assert result["status"] == 200 + assert adapter._bot_user_id == 4242 + # Only GET /user was used; no apps.getAuthenticated fallback. + called_paths = [c.args[1] for c in api.await_args_list] + assert called_paths == ["/user"] + chat.process_message.assert_called_once() + + @pytest.mark.asyncio + async def test_second_webhook_does_not_refetch(self): + """A second webhook for the same installation does NOT re-fetch (cache hit).""" + adapter = _make_multi_tenant_adapter() + chat = MagicMock() + chat.process_message = MagicMock() + chat.get_state = MagicMock(return_value=MagicMock(set=AsyncMock(), get=AsyncMock(return_value=None))) + adapter._chat = chat + + async def fake_api(method: str, path: str, body: Any = None, *, installation_id: int | None = None) -> Any: + if path == "/user": + raise RuntimeError("404") + if path == "/app": + return {"slug": "my-bot"} + if path == "/users/my-bot[bot]": + return {"id": 9999, "login": "my-bot[bot]"} + raise AssertionError(f"unexpected path {path}") + + api = AsyncMock(side_effect=fake_api) + adapter._github_api_request = api + + headers_for = lambda b: { # noqa: E731 + "x-hub-signature-256": _sign(b), + "x-github-event": "issue_comment", + "content-type": "application/json", + } + + body1 = json.dumps(_make_install_payload(sender_id=100, installation_id=54321)) + await adapter.handle_webhook(FakeRequest(body1, headers_for(body1))) + assert adapter._bot_user_id == 9999 + + detection_calls_after_first = len(api.await_args_list) + + body2 = json.dumps(_make_install_payload(sender_id=101, installation_id=54321)) + await adapter.handle_webhook(FakeRequest(body2, headers_for(body2))) + + # No additional detection API calls on the second webhook (cache hit). + assert len(api.await_args_list) == detection_calls_after_first + assert chat.process_message.call_count == 2 + + +# ============================================================================= +# Fake aiohttp session (exercises the real auth-selection logic in +# _github_api_request: PAT vs App-JWT vs installation-token) +# ============================================================================= + + +class _FakeResponse: + """Minimal aiohttp-style response usable as an async context manager.""" + + def __init__(self, status: int, payload: Any) -> None: + self.status = status + self._payload = payload + + async def __aenter__(self) -> _FakeResponse: + return self + + async def __aexit__(self, *exc: Any) -> None: + return None + + async def json(self) -> Any: + return self._payload + + async def text(self) -> str: + return json.dumps(self._payload) + + +class _FakeSession: + """Routes requests by path, recording the Authorization header used. + + Lets a test assert *which* credential (app JWT vs installation token) was + sent to each GitHub endpoint, which is the load-bearing behaviour for the + /app-needs-JWT fix. + """ + + def __init__(self, handler: Any) -> None: + self._handler = handler + self.closed = False + # path -> Authorization header value used for that path + self.auth_by_path: dict[str, str] = {} + + def request(self, method: str, url: str, **kwargs: Any) -> _FakeResponse: + path = url.replace("https://api.github.com", "") + headers = kwargs.get("headers", {}) + self.auth_by_path[path] = headers.get("Authorization", "") + status, payload = self._handler(method, path) + return _FakeResponse(status, payload) + + +class TestAppEndpointAuthSelection: + """GET /app must authenticate with the App JWT, not an installation token. + + GitHub's apps.getAuthenticated (GET /app) endpoint rejects installation + tokens (401/403). For the GitHub-App/installation case the /user path also + fails on installation tokens, so the /app fallback is what must work -- + and it only works when authenticated with the App JWT directly. + + This test exercises the real auth-selection branch in _github_api_request + (the HTTP layer is faked, not _github_api_request itself). + """ + + @pytest.mark.asyncio + async def test_app_endpoint_uses_jwt_not_installation_token(self): + adapter = _make_multi_tenant_adapter() + adapter._installation_id = 54321 # single resolvable installation + + # Spy the credential minting so we can assert which path used which. + adapter._generate_app_jwt = MagicMock(return_value="APP_JWT") # type: ignore[method-assign] + adapter._get_installation_token = AsyncMock(return_value="INSTALL_TOKEN") # type: ignore[method-assign] + + def handler(method: str, path: str) -> tuple[int, Any]: + if path == "/user": + # Installation token cannot use /user -> 403. + return 403, {"message": "Resource not accessible by integration"} + if path == "/app": + return 200, {"slug": "my-app", "id": 1} + if path == "/users/my-app[bot]": + return 200, {"id": 12345, "login": "my-app[bot]"} + raise AssertionError(f"unexpected path {path}") + + session = _FakeSession(handler) + adapter._get_http_session = AsyncMock(return_value=session) # type: ignore[method-assign] + + await adapter._detect_bot_user_id(installation_id=54321) + + # Detection resolved the bot id via /app -> /users/{slug}[bot]. + assert adapter._bot_user_id == 12345 + + # The /app call was authenticated with the App JWT (not an install token). + assert session.auth_by_path["/app"] == "Bearer APP_JWT" + assert adapter._generate_app_jwt.called + # And /app was NOT sent the installation token. + assert session.auth_by_path["/app"] != "Bearer INSTALL_TOKEN" + + # The /user attempt (which 403s) and the public /users/{slug}[bot] lookup + # both go through the installation-token exchange, as expected. + assert session.auth_by_path["/user"] == "Bearer INSTALL_TOKEN" + assert session.auth_by_path["/users/my-app[bot]"] == "Bearer INSTALL_TOKEN" + + +class TestConcurrentDetectionFetchesOnce: + """Concurrent detection must fetch the bot identity only once (lock works).""" + + @pytest.mark.asyncio + async def test_concurrent_detect_fetches_once(self): + adapter = _make_multi_tenant_adapter() + + call_counts: dict[str, int] = {"/user": 0} + started = asyncio.Event() + + async def fake_api(method: str, path: str, body: Any = None, *, installation_id: int | None = None) -> Any: + if path == "/user": + call_counts["/user"] += 1 + started.set() + # Slow PAT-style success so concurrent callers pile up behind + # the first one if locking is absent. + await asyncio.sleep(0.05) + return {"id": 4242, "login": "the-bot"} + raise AssertionError(f"unexpected path {path}") + + adapter._github_api_request = AsyncMock(side_effect=fake_api) + + # Fire N concurrent detections; only the first should hit the API. + await asyncio.gather(*[adapter._detect_bot_user_id(installation_id=54321) for _ in range(8)]) + + assert adapter._bot_user_id == 4242 + # The lock + double-checked cache means /user is fetched exactly once. + assert call_counts["/user"] == 1 + assert adapter._github_api_request.await_count == 1