11import { get as getLevenshteinDistance } from 'fast-levenshtein'
22
3+ import { parseGitHubNoreplyEmail } from '@crowd/common'
34import {
45 IMemberIdentity ,
56 IMemberOpensearch ,
@@ -105,6 +106,11 @@ class MemberSimilarityCalculator {
105106 }
106107 }
107108
109+ // Check if a noreply email on one member resolves to a username on the other
110+ if ( this . hasMatchingUsernameFromNoreplyEmail ( primaryMember , similarMember ) ) {
111+ return 0.95
112+ }
113+
108114 for ( const primaryIdentity of primaryMember . identities . filter ( ( i ) => i . verified ) ) {
109115 // similar member has an unverified identity as one of primary members's verified identity, return score 95
110116 if (
@@ -187,7 +193,9 @@ class MemberSimilarityCalculator {
187193 similarMember : IMemberOpensearch ,
188194 ) : boolean {
189195 if ( member . identities && member . identities . length > 0 ) {
190- for ( const identity of member . identities ) {
196+ for ( const identity of member . identities . filter (
197+ ( i ) => i . type === MemberIdentityType . USERNAME ,
198+ ) ) {
191199 if (
192200 similarMember . nested_identities . some (
193201 ( i ) =>
@@ -204,6 +212,59 @@ class MemberSimilarityCalculator {
204212 return false
205213 }
206214
215+ /**
216+ * Checks if a noreply email in one member matches a username in the other (e.g. GitHub noreply email -> GitHub username)
217+ * Works bidirectionally: primary email -> similar username, and similar email -> primary username.
218+ */
219+ static hasMatchingUsernameFromNoreplyEmail (
220+ primaryMember : IMemberWithAggregatesForMergeSuggestions ,
221+ similarMember : IMemberOpensearch ,
222+ ) : boolean {
223+ // Primary member's noreply emails -> similar member's platform usernames
224+ const similarUsernamesByPlatform = {
225+ [ PlatformType . GITHUB ] : new Set (
226+ similarMember . nested_identities
227+ . filter (
228+ ( i ) =>
229+ i . string_platform === PlatformType . GITHUB &&
230+ i . keyword_type === MemberIdentityType . USERNAME ,
231+ )
232+ . map ( ( i ) => i . string_value ?. toLowerCase ( ) ) ,
233+ ) ,
234+ }
235+
236+ for ( const identity of primaryMember . identities ) {
237+ if ( ! identity . verified || identity . type !== MemberIdentityType . EMAIL ) continue
238+
239+ const ghUsername = parseGitHubNoreplyEmail ( identity . value )
240+ if ( ghUsername && similarUsernamesByPlatform [ PlatformType . GITHUB ] . has ( ghUsername ) ) {
241+ return true
242+ }
243+ }
244+
245+ // Similar member's noreply emails -> primary member's platform usernames
246+ const primaryUsernamesByPlatform = {
247+ [ PlatformType . GITHUB ] : new Set (
248+ primaryMember . identities
249+ . filter (
250+ ( i ) => i . platform === PlatformType . GITHUB && i . type === MemberIdentityType . USERNAME ,
251+ )
252+ . map ( ( i ) => i . value ?. toLowerCase ( ) ) ,
253+ ) ,
254+ }
255+
256+ for ( const identity of similarMember . nested_identities ) {
257+ if ( ! identity . bool_verified || identity . keyword_type !== MemberIdentityType . EMAIL ) continue
258+
259+ const ghUsername = parseGitHubNoreplyEmail ( identity . string_value )
260+ if ( ghUsername && primaryUsernamesByPlatform [ PlatformType . GITHUB ] . has ( ghUsername ) ) {
261+ return true
262+ }
263+ }
264+
265+ return false
266+ }
267+
207268 static hasEmailAsUsernameIdentityMatch (
208269 primaryIdentity : IMemberIdentity ,
209270 similarMember : IMemberOpensearch ,
0 commit comments