@@ -624,21 +624,14 @@ async def _filter_parent_repo_activities(
624624 parent_repo : Repository ,
625625 ) -> tuple [list [tuple ], list [dict ], int ]:
626626 """
627- Filter out activities that exist in parent repo ( for fork detection) .
628- Uses full dedup key (timestamp, platform, type, sourceId, channel, segmentId) for optimal index usage .
627+ Filter out activities that exist in parent repo for forked repositories .
628+ Done in post-processing phase using batch lookup to avoid N+1 queries .
629629
630- Args:
631- activities_db: List of activity tuples for database
632- activities_queue: List of activity dicts for Kafka queue
633- parent_repo: Parent repository information
634-
635- Returns:
636- Tuple of (filtered_activities_db, filtered_activities_queue, skipped_activities_count)
630+ Returns: (filtered_activities_db, filtered_activities_queue, skipped_activities_count)
637631 """
638632 if not activities_db :
639633 return activities_db , activities_queue , 0
640634
641- # Extract (timestamp, type, sourceId) for each activity to use full dedup index
642635 activity_keys = []
643636 for act in activities_db :
644637 data = orjson .loads (act [2 ])["data" ]
@@ -754,14 +747,9 @@ async def process_commits_chunk(
754747 activities_db , activities_queue , parent_repo
755748 )
756749
757- if skipped_activities > 0 :
758- self .logger .info (
759- f"Processed { processed_commits } commits, skipped { bad_commits } invalid commits, filtered { skipped_activities } activities from parent repo in { repo_path } "
760- )
761- else :
762- self .logger .info (
763- f"Processed { processed_commits } commits, skipped { bad_commits } invalid commits in { repo_path } "
764- )
750+ self .logger .info (
751+ f"Processed { processed_commits } commits, skipped { bad_commits } invalid commits, filtered { skipped_activities } activities from parent repo in { repo_path } "
752+ )
765753 # Update metrics context
766754 if self ._metrics_context :
767755 self ._metrics_context ["processed_commits" ] += processed_commits
0 commit comments