@@ -538,47 +538,22 @@ def create_activities_from_commit(
538538 committer_name = commit ["committer_name" ]
539539 committer_email = commit ["committer_email" ]
540540
541- # Create author activity
542- author = {
543- "username" : author_email ,
544- "displayName" : author_name ,
545- "emails" : [author_email ],
546- }
547- activity = self .create_activity (
548- remote = remote ,
549- commit = commit ,
550- activity_type = "authored-commit" ,
551- member = author ,
552- source_id = commit_hash ,
553- segment_id = segment_id ,
554- re_onboarding_count = re_onboarding_count ,
555- )
556- activity_db , activity_kafka = self .prepare_activity_for_db_and_queue (
557- activity , segment_id , integration_id
558- )
559- activities_db .append (activity_db )
560- activities_queue .append (activity_kafka )
561-
562- # Only create committer activity if author and committer are different
563- if author_name != committer_name or author_email != committer_email :
564- # IMPORTANT: hash_input has a typo in "commited" instead of "committed"
565- # however fixing it requires recalculating sourceId/parentSourceId for ALL git activities in db
566- # so far the typo doesn't have any major effect, since the activity type "committed-commit" is correct
567- hash_input = f"{ commit_hash } commited-commit{ committer_email } "
568- committer_source_id = hashlib .sha1 (hash_input .encode ("utf-8" )).hexdigest ()
569-
570- committer = {
571- "username" : committer_email ,
572- "displayName" : committer_name ,
573- "emails" : [committer_email ],
541+ # Create author activity — skip if email is empty (no identity to attach to)
542+ author_email = author_email .strip () if author_email else ""
543+ if not author_email :
544+ self .logger .warning (f"Skipping authored-commit for { commit_hash } — empty author email" )
545+ else :
546+ author = {
547+ "username" : author_email ,
548+ "displayName" : author_name ,
549+ "emails" : [author_email ],
574550 }
575551 activity = self .create_activity (
576552 remote = remote ,
577553 commit = commit ,
578- activity_type = "committed-commit" ,
579- member = committer ,
580- source_id = committer_source_id ,
581- source_parent_id = commit_hash ,
554+ activity_type = "authored-commit" ,
555+ member = author ,
556+ source_id = commit_hash ,
582557 segment_id = segment_id ,
583558 re_onboarding_count = re_onboarding_count ,
584559 )
@@ -588,23 +563,65 @@ def create_activities_from_commit(
588563 activities_db .append (activity_db )
589564 activities_queue .append (activity_kafka )
590565
566+ # Only create committer activity if author and committer are different
567+ committer_email = committer_email .strip () if committer_email else ""
568+ if author_name != committer_name or author_email != committer_email :
569+ if not committer_email :
570+ self .logger .warning (
571+ f"Skipping committed-commit for { commit_hash } — empty committer email"
572+ )
573+ else :
574+ # IMPORTANT: hash_input has a typo in "commited" instead of "committed"
575+ # however fixing it requires recalculating sourceId/parentSourceId for ALL git activities in db
576+ # so far the typo doesn't have any major effect, since the activity type "committed-commit" is correct
577+ hash_input = f"{ commit_hash } commited-commit{ committer_email } "
578+ committer_source_id = hashlib .sha1 (hash_input .encode ("utf-8" )).hexdigest ()
579+
580+ committer = {
581+ "username" : committer_email ,
582+ "displayName" : committer_name ,
583+ "emails" : [committer_email ],
584+ }
585+ activity = self .create_activity (
586+ remote = remote ,
587+ commit = commit ,
588+ activity_type = "committed-commit" ,
589+ member = committer ,
590+ source_id = committer_source_id ,
591+ source_parent_id = commit_hash ,
592+ segment_id = segment_id ,
593+ re_onboarding_count = re_onboarding_count ,
594+ )
595+ activity_db , activity_kafka = self .prepare_activity_for_db_and_queue (
596+ activity , segment_id , integration_id
597+ )
598+ activities_db .append (activity_db )
599+ activities_queue .append (activity_kafka )
600+
591601 # Process extracted activities from commit message
592602 extracted_activities = self .extract_activities (commit ["message" ])
593603 for extracted_activity in extracted_activities :
594604 activity_type , member_data = list (extracted_activity .items ())[0 ]
595605
606+ trailer_email = (member_data .get ("email" ) or "" ).strip ()
607+ if not trailer_email :
608+ self .logger .warning (
609+ f"Skipping { activity_type } for { commit_hash } — empty email in commit trailer"
610+ )
611+ continue
612+
596613 # Convert activity type to lowercase and add "-commit" suffix
597614 # This matches the legacy behavior: "signed-off-by" -> "signed-off-commit"
598615 activity_type = activity_type .lower ().replace ("-by" , "" ) + "-commit"
599616
600617 member = {
601618 "displayName" : member_data ["name" ],
602- "emails" : [member_data [ "email" ] ],
619+ "emails" : [trailer_email ],
603620 }
604621
605622 # Generate unique source ID for extracted activity
606623 source_id = hashlib .sha1 (
607- (commit_hash + activity_type + member_data [ "email" ] ).encode ("utf-8" )
624+ (commit_hash + activity_type + trailer_email ).encode ("utf-8" )
608625 ).hexdigest ()
609626 activity = self .create_activity (
610627 remote = remote ,
0 commit comments