22
33from crowdgit .database .crud import (
44 acquire_repo_for_processing ,
5+ get_recently_processed_repository_by_url ,
56 mark_repo_as_processed ,
67 release_repo ,
78 update_last_processed_commit ,
@@ -135,14 +136,41 @@ def _reset_all_contexts(self) -> None:
135136 for service in services :
136137 service .reset_logger_context ()
137138
139+ async def _check_parent_repo_validity (self , repository : Repository ) -> bool :
140+ """
141+ In case of forked repo we need to prevent re-processing activities from parent repo and assigning them to fork, so we need to check:
142+ 1. Parent repo already connected/onboarded
143+ 2. Parent repo was processed successfully from last run to ensure we have up to date data
144+ also assigns repository.parent_repo if valid
145+ """
146+ if not repository .forked_from :
147+ return True
148+ logger .info (
149+ f"Repo forked from { repository .forked_from } , checking parent repo validity in our system"
150+ )
151+ await asyncio .sleep (10 )
152+ parent_repo = await get_recently_processed_repository_by_url (repository .forked_from )
153+ if not parent_repo :
154+ logger .warning (
155+ f"Parent repo { repository .forked_from } is not found/valid - Aborting processing"
156+ )
157+ return False
158+ logger .info ("Parent repo is valid and already processed, proceeding with fork processing" )
159+ repository .parent_repo = parent_repo
160+ return True
161+
138162 async def _process_single_repository (self , repository : Repository ):
139163 """Process a single repository through services with full clone for new repos, incremental for existing"""
140164 logger .info ("Processing repository: {}" , repository .url )
141- processing_state = RepositoryState .PENDING
165+ processing_state = RepositoryState .FAILED
142166
143167 try :
144168 repo_name = get_repo_name (repository .url )
145169 self ._bind_repository_context (repository , repo_name )
170+ valid_parent = await self ._check_parent_repo_validity (repository )
171+ if not valid_parent :
172+ processing_state = RepositoryState .REQUIRES_PARENT
173+ return
146174 async for batch_info in self .clone_service .clone_batches_generator (
147175 repository ,
148176 working_dir_cleanup = True ,
@@ -166,7 +194,6 @@ async def _process_single_repository(self, repository: Repository):
166194 logger .info ("Incremental processing completed successfully" )
167195 processing_state = RepositoryState .COMPLETED
168196 except Exception as e :
169- processing_state = RepositoryState .FAILED
170197 logger .error (f"Processing failed with error: { repr (e )} " )
171198 finally :
172199 # Reset logger context for all services
0 commit comments