Skip to content

Commit 9b94e63

Browse files
authored
feat: map maintainers by email if username not found (CM-773) (#3598)
1 parent 0c09569 commit 9b94e63

3 files changed

Lines changed: 33 additions & 4 deletions

File tree

services/apps/git_integration/src/crowdgit/database/crud.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,23 @@ async def find_github_identity(github_username: str):
223223
return result
224224

225225

226+
async def find_maintainer_identity_by_email(email: str):
227+
sql_query = """
228+
SELECT id
229+
FROM "memberIdentities"
230+
WHERE
231+
platform IN ('github', 'git', 'gitlab')
232+
AND "verified" = TRUE
233+
AND value = $1
234+
LIMIT 1
235+
"""
236+
result = await fetchval(
237+
sql_query,
238+
(email,),
239+
)
240+
return result
241+
242+
226243
async def upsert_maintainer(
227244
repo_id: str,
228245
identity_id: str,

services/apps/git_integration/src/crowdgit/models/maintainer_info.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class MaintainerInfoItem(BaseModel):
1313
name: str | None = None
1414
title: str | None = None
1515
normalized_title: Literal["maintainer", "contributor"] | None = None
16+
email: str | None = None
1617

1718

1819
class MaintainerInfo(BaseModel):

services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from crowdgit.database.crud import (
1313
find_github_identity,
14+
find_maintainer_identity_by_email,
1415
get_maintainers_for_repo,
1516
save_service_execution,
1617
set_maintainer_end_date,
@@ -76,10 +77,16 @@ async def process_maintainer(maintainer: MaintainerInfoItem):
7677
original_role = self.make_role(maintainer.title)
7778
# Find the identity in the database
7879
github_username = maintainer.github_username
79-
if github_username == "unknown":
80-
self.logger.warning("github username with value 'unknown' aborting")
80+
email = maintainer.email
81+
82+
if github_username == "unknown" and email == "unknown":
83+
self.logger.warning("username & email with value 'unknown' aborting")
8184
return
82-
identity_id = await find_github_identity(github_username)
85+
identity_id = (
86+
await find_github_identity(github_username)
87+
if github_username != "unknown"
88+
else await find_maintainer_identity_by_email(email)
89+
)
8390
self.logger.debug(
8491
f"Found identity_id for {github_username}: {identity_id} (type: {type(identity_id)})"
8592
)
@@ -198,7 +205,7 @@ def get_extraction_prompt(self, filename: str, content_to_analyze: str) -> str:
198205
- If maintainers are found, the JSON format must be: `{{"info": [list_of_maintainer_objects]}}`
199206
- If no individual maintainers are found, or only teams/groups are mentioned, the JSON format must be: `{{"error": "not_found"}}`
200207
201-
Each object in the "info" list must contain these four fields:
208+
Each object in the "info" list must contain these five fields:
202209
1. `github_username`:
203210
- Find using common patterns like `@username`, `github.com/username`, `Name (@username)`, or from emails (`123+user@users.noreply.github.com`).
204211
- This is a best-effort search. If no username can be confidently found, use the string "unknown".
@@ -210,6 +217,10 @@ def get_extraction_prompt(self, filename: str, content_to_analyze: str) -> str:
210217
- Do not include filler words like "repository", "project", or "active".
211218
4. `normalized_title`:
212219
- Must be exactly "maintainer" or "contributor". If the role is ambiguous, use the `<filename>` as the primary hint. For example, a file named `MAINTAINERS` or `CODEOWNERS` implies "maintainer", while `CONTRIBUTORS` implies "contributor".
220+
5. `email`:
221+
- Extract the person's email address from the content. Look for patterns like `FullName <email@domain>`, `email@domain`, or email addresses in various formats.
222+
- The email must be a valid email address format (containing @ and a domain).
223+
- If no valid email can be found for the individual, use the string "unknown".
213224
214225
---
215226
Filename: {filename}

0 commit comments

Comments
 (0)