diff --git a/services/apps/git_integration/src/crowdgit/database/crud.py b/services/apps/git_integration/src/crowdgit/database/crud.py index 1a476ac2aa..fb240f81e9 100644 --- a/services/apps/git_integration/src/crowdgit/database/crud.py +++ b/services/apps/git_integration/src/crowdgit/database/crud.py @@ -223,6 +223,23 @@ async def find_github_identity(github_username: str): return result +async def find_maintainer_identity_by_email(email: str): + sql_query = """ + SELECT id + FROM "memberIdentities" + WHERE + platform IN ('github', 'git', 'gitlab') + AND "verified" = TRUE + AND value = $1 + LIMIT 1 + """ + result = await fetchval( + sql_query, + (email,), + ) + return result + + async def upsert_maintainer( repo_id: str, identity_id: str, diff --git a/services/apps/git_integration/src/crowdgit/models/maintainer_info.py b/services/apps/git_integration/src/crowdgit/models/maintainer_info.py index 942dca2c0a..5a420567ae 100644 --- a/services/apps/git_integration/src/crowdgit/models/maintainer_info.py +++ b/services/apps/git_integration/src/crowdgit/models/maintainer_info.py @@ -13,6 +13,7 @@ class MaintainerInfoItem(BaseModel): name: str | None = None title: str | None = None normalized_title: Literal["maintainer", "contributor"] | None = None + email: str | None = None class MaintainerInfo(BaseModel): diff --git a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py index 2db77a993f..1be1a3e8cc 100644 --- a/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py +++ b/services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py @@ -11,6 +11,7 @@ from crowdgit.database.crud import ( find_github_identity, + find_maintainer_identity_by_email, get_maintainers_for_repo, save_service_execution, set_maintainer_end_date, @@ -76,10 +77,16 @@ async def process_maintainer(maintainer: MaintainerInfoItem): original_role = self.make_role(maintainer.title) # Find the identity in the database github_username = maintainer.github_username - if github_username == "unknown": - self.logger.warning("github username with value 'unknown' aborting") + email = maintainer.email + + if github_username == "unknown" and email == "unknown": + self.logger.warning("username & email with value 'unknown' aborting") return - identity_id = await find_github_identity(github_username) + identity_id = ( + await find_github_identity(github_username) + if github_username != "unknown" + else await find_maintainer_identity_by_email(email) + ) self.logger.debug( f"Found identity_id for {github_username}: {identity_id} (type: {type(identity_id)})" ) @@ -198,7 +205,7 @@ def get_extraction_prompt(self, filename: str, content_to_analyze: str) -> str: - If maintainers are found, the JSON format must be: `{{"info": [list_of_maintainer_objects]}}` - If no individual maintainers are found, or only teams/groups are mentioned, the JSON format must be: `{{"error": "not_found"}}` - Each object in the "info" list must contain these four fields: + Each object in the "info" list must contain these five fields: 1. `github_username`: - Find using common patterns like `@username`, `github.com/username`, `Name (@username)`, or from emails (`123+user@users.noreply.github.com`). - This is a best-effort search. If no username can be confidently found, use the string "unknown". @@ -210,6 +217,10 @@ def get_extraction_prompt(self, filename: str, content_to_analyze: str) -> str: - Do not include filler words like "repository", "project", or "active". 4. `normalized_title`: - Must be exactly "maintainer" or "contributor". If the role is ambiguous, use the `` as the primary hint. For example, a file named `MAINTAINERS` or `CODEOWNERS` implies "maintainer", while `CONTRIBUTORS` implies "contributor". + 5. `email`: + - Extract the person's email address from the content. Look for patterns like `FullName `, `email@domain`, or email addresses in various formats. + - The email must be a valid email address format (containing @ and a domain). + - If no valid email can be found for the individual, use the string "unknown". --- Filename: {filename}