From a6cab732931a9f87d9dc56b4fcc1e578b7ef3fed Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Tue, 1 Apr 2025 13:14:14 +0200 Subject: [PATCH 1/3] Optimize `fetch_repository` resolver Previously, the `owner.repository` resolver would unconditionally execute a very complicated query through `fetch_repository`. We can avoid that expensive subquery by inspecting the requested fields as stated in the graphql query. --- .../interactors/fetch_repository.py | 19 +- core/commands/repository/repository.py | 15 +- graphql_api/helpers/requested_fields.py | 50 +++++ graphql_api/tests/test_requested_fields.py | 208 ++++++++++++++++++ graphql_api/types/owner/owner.py | 17 ++ 5 files changed, 284 insertions(+), 25 deletions(-) create mode 100644 graphql_api/helpers/requested_fields.py create mode 100644 graphql_api/tests/test_requested_fields.py diff --git a/core/commands/repository/interactors/fetch_repository.py b/core/commands/repository/interactors/fetch_repository.py index 14a4e0eb92..37292f7bd0 100644 --- a/core/commands/repository/interactors/fetch_repository.py +++ b/core/commands/repository/interactors/fetch_repository.py @@ -15,6 +15,8 @@ def execute( name: str, okta_authenticated_accounts: list[int], exclude_okta_enforced_repos: bool = True, + needs_coverage: bool = True, + needs_commits: bool = True, ) -> Repository | None: queryset = Repository.objects.viewable_repos(self.current_owner) if exclude_okta_enforced_repos: @@ -22,18 +24,11 @@ def execute( okta_authenticated_accounts ) - # TODO(swatinem): We should find a way to avoid these combinators: - # The `with_recent_coverage` combinator is quite expensive. - # We only need that in case we want to query these props via graphql: - # `coverageAnalytics.{percentCovered,commitSha,hits,misses,lines}` - # Similarly, `with_oldest_commit_at` is only needed for `oldestCommitAt`. + if needs_coverage: + queryset = queryset.with_recent_coverage() + if needs_commits: + queryset = queryset.with_oldest_commit_at() - repo = ( - queryset.filter(author=owner, name=name) - .with_recent_coverage() - .with_oldest_commit_at() - .select_related("author") - .first() - ) + repo = queryset.filter(author=owner, name=name).select_related("author").first() return repo diff --git a/core/commands/repository/repository.py b/core/commands/repository/repository.py index 0b1c61a427..26cd73bde6 100644 --- a/core/commands/repository/repository.py +++ b/core/commands/repository/repository.py @@ -21,19 +21,8 @@ class RepositoryCommands(BaseCommand): - def fetch_repository( - self, - owner: Owner, - name: str, - okta_authenticated_accounts: list[int], - exclude_okta_enforced_repos: bool = True, - ) -> Repository | None: - return self.get_interactor(FetchRepositoryInteractor).execute( - owner, - name, - okta_authenticated_accounts, - exclude_okta_enforced_repos=exclude_okta_enforced_repos, - ) + def fetch_repository(self, *args, **kwargs) -> Repository | None: + return self.get_interactor(FetchRepositoryInteractor).execute(*args, **kwargs) def regenerate_repository_upload_token( self, diff --git a/graphql_api/helpers/requested_fields.py b/graphql_api/helpers/requested_fields.py new file mode 100644 index 0000000000..ba762f6002 --- /dev/null +++ b/graphql_api/helpers/requested_fields.py @@ -0,0 +1,50 @@ +# This was adapted from +from collections.abc import Generator, Iterable + +from graphql import GraphQLResolveInfo +from graphql.language import ( + FieldNode, + SelectionNode, + InlineFragmentNode, + FragmentSpreadNode, +) + + +def selected_fields(info: GraphQLResolveInfo) -> set[str]: + names: set[str] = set() + for node in info.field_nodes: + if node.selection_set is None: + continue + names.update(_fields_from_selections(info, node.selection_set.selections)) + return names + + +def _fields_from_selections( + info: GraphQLResolveInfo, selections: Iterable[SelectionNode] +) -> Generator[str, None, None]: + for selection in selections: + match selection: + case FieldNode(): + name = selection.name.value + yield name + + if selection.selection_set is not None: + yield from ( + f"{name}.{field}" + for field in _fields_from_selections( + info, selection.selection_set.selections + ) + ) + + case InlineFragmentNode(): + yield from _fields_from_selections( + info, selection.selection_set.selections + ) + case FragmentSpreadNode(): + fragment = info.fragments[selection.name.value] + yield from _fields_from_selections( + info, fragment.selection_set.selections + ) + + case _: + raise NotImplementedError(f"field type {type(selection)} not supported") diff --git a/graphql_api/tests/test_requested_fields.py b/graphql_api/tests/test_requested_fields.py new file mode 100644 index 0000000000..a7033a56a5 --- /dev/null +++ b/graphql_api/tests/test_requested_fields.py @@ -0,0 +1,208 @@ +from graphql_api.helpers.requested_fields import selected_fields +from graphql.language import parse + +from graphql import GraphQLResolveInfo +from graphql.language import ( + OperationDefinitionNode, + FragmentDefinitionNode, +) + + +def parse_into_resolveinfo(source: str) -> GraphQLResolveInfo: + document = parse(source) + + operation: OperationDefinitionNode | None = None + fragments: dict[str, FragmentDefinitionNode] = {} + + for definition in document.definitions: + if isinstance(definition, OperationDefinitionNode): + operation = definition + elif isinstance(definition, FragmentDefinitionNode): + fragments[definition.name.value] = definition + + assert operation + root_fields = [operation] + + return GraphQLResolveInfo( + "__root__", + root_fields, # list[FieldNode] + None, + None, + None, + None, + fragments, # dict[str, FragmentDefinitionNode] + None, + None, + None, + None, + None, + ) + + +QUERY_CoverageForFile = """ +query CoverageForFile( + $owner: String! + $repo: String! + $ref: String! + $path: String! + $flags: [String] + $components: [String] +) { + owner(username: $owner) { + repository(name: $repo) { + __typename + ... on Repository { + commit(id: $ref) { + ...CoverageForFile + } + branch(name: $ref) { + name + head { + ...CoverageForFile + } + } + } + ... on NotFoundError { + message + } + ... on OwnerNotActivatedError { + message + } + } + } +} + +fragment CoverageForFile on Commit { + commitid + coverageAnalytics { + flagNames + components { + id + name + } + coverageFile(path: $path, flags: $flags, components: $components) { + hashedPath + content + coverage { + line + coverage + } + totals { + percentCovered # Absolute coverage of the commit + } + } + } +} +""" + +QUERY_GetRepoConfigurationStatus = """ +query GetRepoConfigurationStatus($owner: String!, $repo: String!) { + owner(username: $owner) { + plan { + isTeamPlan + } + repository(name:$repo) { + __typename + ... on Repository { + coverageEnabled + bundleAnalysisEnabled + testAnalyticsEnabled + yaml + languages + coverageAnalytics { + flagsCount + componentsCount + } + } + ... on NotFoundError { + message + } + ... on OwnerNotActivatedError { + message + } + } + } +} +""" + +QUERY_ReposForOwner = """ +query ReposForOwner( + $filters: RepositorySetFilters! + $owner: String! + $ordering: RepositoryOrdering! + $direction: OrderingDirection! + $after: String + $first: Int +) { + owner(username: $owner) { + repositories( + filters: $filters + ordering: $ordering + orderingDirection: $direction + first: $first + after: $after + ) { + edges { + node { + name + active + activated + private + coverageAnalytics { + percentCovered + lines + } + updatedAt + latestCommitAt + author { + username + } + coverageEnabled + bundleAnalysisEnabled + repositoryConfig { + indicationRange { + upperRange + lowerRange + } + } + } + } + pageInfo { + hasNextPage + endCursor + } + } + } +} +""" + + +def test_requested_fields(): + info = parse_into_resolveinfo(QUERY_CoverageForFile) + fields = selected_fields(info) + + assert "owner.repository.branch.name" in fields + assert "owner.repository.branch.head.commitid" in fields + assert ( + "owner.repository.branch.head.coverageAnalytics.coverageFile.totals.percentCovered" + in fields + ) + + assert "owner.repository.oldestCommitAt" not in fields + assert "owner.repository.coverageAnalytics.percentCovered" not in fields + assert "owner.repository.coverageAnalytics.commitSha" not in fields + + info = parse_into_resolveinfo(QUERY_GetRepoConfigurationStatus) + fields = selected_fields(info) + + assert "owner.repository.coverageAnalytics" in fields + assert "owner.repository.oldestCommitAt" not in fields + assert "owner.repository.coverageAnalytics.percentCovered" not in fields + + info = parse_into_resolveinfo(QUERY_ReposForOwner) + fields = selected_fields(info) + + assert "owner.repositories.edges.node.latestCommitAt" in fields + assert "owner.repositories.edges.node.oldestCommitAt" not in fields + assert "owner.repositories.edges.node.coverageAnalytics" in fields + assert "owner.repositories.edges.node.coverageAnalytics.percentCovered" in fields diff --git a/graphql_api/types/owner/owner.py b/graphql_api/types/owner/owner.py index 241007e891..32b60b2df8 100644 --- a/graphql_api/types/owner/owner.py +++ b/graphql_api/types/owner/owner.py @@ -2,6 +2,7 @@ from hashlib import sha1 from typing import Any, Coroutine, Iterable, List, Optional +from graphql_api.helpers.requested_fields import selected_fields import sentry_sdk import shared.rate_limits as rate_limits import stripe @@ -155,6 +156,16 @@ def resolve_ownerid(owner: Owner, info: GraphQLResolveInfo) -> int: return owner.ownerid +COVERAGE_FIELDS = { + "coverageAnalytics.percentCovered", + "coverageAnalytics.commitSha", + "coverageAnalytics.hits", + "coverageAnalytics.misses", + "coverageAnalytics.lines", +} +COMMITS_FIELDS = {"oldestCommitAt"} + + @owner_bindable.field("repository") async def resolve_repository( owner: Owner, info: GraphQLResolveInfo, name: str @@ -169,11 +180,17 @@ async def resolve_repository( # This means we do not want to filter out the Okta enforced repos exclude_okta_enforced_repos = not is_impersonation + requested_fields = selected_fields(info) + needs_coverage = not requested_fields.isdisjoint(COVERAGE_FIELDS) + needs_commits = not requested_fields.isdisjoint(COMMITS_FIELDS) + repository: Repository | None = await command.fetch_repository( owner, name, okta_authenticated_accounts, exclude_okta_enforced_repos=exclude_okta_enforced_repos, + needs_coverage=needs_coverage, + needs_commits=needs_commits, ) if repository is None: From 8f73894b8a2de0f3024bb94b478b365ccb77f833 Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Tue, 1 Apr 2025 13:35:09 +0200 Subject: [PATCH 2/3] fix tests --- codecov_auth/tests/unit/test_authentication.py | 10 ++++++++-- core/commands/repository/tests/test_repository.py | 4 +--- graphql_api/helpers/requested_fields.py | 4 ++-- graphql_api/tests/test_requested_fields.py | 8 ++++---- graphql_api/types/owner/owner.py | 2 +- 5 files changed, 16 insertions(+), 12 deletions(-) diff --git a/codecov_auth/tests/unit/test_authentication.py b/codecov_auth/tests/unit/test_authentication.py index 47c898d399..138238d720 100644 --- a/codecov_auth/tests/unit/test_authentication.py +++ b/codecov_auth/tests/unit/test_authentication.py @@ -1,6 +1,6 @@ from datetime import datetime, timedelta from http.cookies import SimpleCookie -from unittest.mock import call, patch +from unittest.mock import AsyncMock, call, patch import pytest from django.conf import settings @@ -198,7 +198,9 @@ def test_impersonation(self): assert res.json()["data"]["me"] == {"user": {"username": "impersonateme"}} @patch("core.commands.repository.repository.RepositoryCommands.fetch_repository") - def test_impersonation_with_okta(self, mock_call_to_fetch_repository): + def test_impersonation_with_okta( + self, mock_call_to_fetch_repository, new_callable=AsyncMock + ): repo = RepositoryFactory(author=self.owner_to_impersonate, private=True) query_repositories = """{ owner(username: "%s") { repository(name: "%s") { ... on Repository { name } } } }""" query = query_repositories % (repo.author.username, repo.name) @@ -228,12 +230,16 @@ def test_impersonation_with_okta(self, mock_call_to_fetch_repository): repo.name, [], exclude_okta_enforced_repos=True, + needs_coverage=False, + needs_commits=False, ), call( self.owner_to_impersonate, repo.name, [], exclude_okta_enforced_repos=False, + needs_coverage=False, + needs_commits=False, ), ] ) diff --git a/core/commands/repository/tests/test_repository.py b/core/commands/repository/tests/test_repository.py index 407e54a5a5..bd956358a7 100644 --- a/core/commands/repository/tests/test_repository.py +++ b/core/commands/repository/tests/test_repository.py @@ -17,9 +17,7 @@ def setUp(self): @patch("core.commands.repository.repository.FetchRepositoryInteractor.execute") def test_fetch_repository_to_interactor(self, interactor_mock): self.command.fetch_repository(self.org, self.repo.name, []) - interactor_mock.assert_called_once_with( - self.org, self.repo.name, [], exclude_okta_enforced_repos=True - ) + interactor_mock.assert_called_once_with(self.org, self.repo.name, []) @patch("core.commands.repository.repository.FetchRepositoryInteractor.execute") def test_fetch_repository_to_interactor_with_enforcing_okta(self, interactor_mock): diff --git a/graphql_api/helpers/requested_fields.py b/graphql_api/helpers/requested_fields.py index ba762f6002..74d3474b34 100644 --- a/graphql_api/helpers/requested_fields.py +++ b/graphql_api/helpers/requested_fields.py @@ -4,9 +4,9 @@ from graphql import GraphQLResolveInfo from graphql.language import ( FieldNode, - SelectionNode, - InlineFragmentNode, FragmentSpreadNode, + InlineFragmentNode, + SelectionNode, ) diff --git a/graphql_api/tests/test_requested_fields.py b/graphql_api/tests/test_requested_fields.py index a7033a56a5..c7ab1b1a54 100644 --- a/graphql_api/tests/test_requested_fields.py +++ b/graphql_api/tests/test_requested_fields.py @@ -1,12 +1,12 @@ -from graphql_api.helpers.requested_fields import selected_fields -from graphql.language import parse - from graphql import GraphQLResolveInfo from graphql.language import ( - OperationDefinitionNode, FragmentDefinitionNode, + OperationDefinitionNode, + parse, ) +from graphql_api.helpers.requested_fields import selected_fields + def parse_into_resolveinfo(source: str) -> GraphQLResolveInfo: document = parse(source) diff --git a/graphql_api/types/owner/owner.py b/graphql_api/types/owner/owner.py index 32b60b2df8..2df429baea 100644 --- a/graphql_api/types/owner/owner.py +++ b/graphql_api/types/owner/owner.py @@ -2,7 +2,6 @@ from hashlib import sha1 from typing import Any, Coroutine, Iterable, List, Optional -from graphql_api.helpers.requested_fields import selected_fields import sentry_sdk import shared.rate_limits as rate_limits import stripe @@ -39,6 +38,7 @@ require_part_of_org, require_shared_account_or_part_of_org, ) +from graphql_api.helpers.requested_fields import selected_fields from graphql_api.types.enums import OrderingDirection, RepositoryOrdering from graphql_api.types.errors.errors import NotFoundError from graphql_api.types.repository.repository import TOKEN_UNAVAILABLE From 1649d07b60d39eb77ed66e666b752ec5565a685c Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Tue, 1 Apr 2025 16:52:45 +0200 Subject: [PATCH 3/3] add comment --- graphql_api/helpers/requested_fields.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/graphql_api/helpers/requested_fields.py b/graphql_api/helpers/requested_fields.py index 74d3474b34..c6a961e149 100644 --- a/graphql_api/helpers/requested_fields.py +++ b/graphql_api/helpers/requested_fields.py @@ -11,6 +11,15 @@ def selected_fields(info: GraphQLResolveInfo) -> set[str]: + """ + Given a GraphQL "sub-query", this recursively collects all the queried fields. + + For example, if the original GraphQL query looks like `owner { repository { name } }`, + this would resolve to `repository` and `repository.name`. + + This function works by traversing the parts of the GraphQL Query AST which + are exposed to each "resolver". + """ names: set[str] = set() for node in info.field_nodes: if node.selection_set is None: