Skip to content

Commit c3a14ba

Browse files
committed
chore: refactore _validate_and_get_parent_repo
1 parent 84ae58a commit c3a14ba

3 files changed

Lines changed: 39 additions & 18 deletions

File tree

services/apps/git_integration/src/crowdgit/enums.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ class ErrorCode(str, Enum):
1919
MAINTAINER_ANALYSIS_FAILED = "maintainer-analysis-failed"
2020
MAINTAINER_INTERVAL_NOT_ELAPSED = "maintainer-interval-not-elapsed"
2121
CLEANUP_FAILED = "cleanup-failed"
22+
PARENT_REPO_INVALID = "parent-repo-invalid"
2223

2324

2425
class RepositoryState(str, Enum):

services/apps/git_integration/src/crowdgit/errors.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,9 @@ class MaintainerIntervalNotElapsedError(CrowdGitError):
101101
error_message: str = "Maintainer processing interval has not elapsed yet"
102102
error_code: ErrorCode = ErrorCode.MAINTAINER_INTERVAL_NOT_ELAPSED
103103
ai_cost: int = 0
104+
105+
106+
@dataclass
107+
class ParentRepoInvalidError(CrowdGitError):
108+
error_message: str = "Parent repository is not valid or not found"
109+
error_code: ErrorCode = ErrorCode.PARENT_REPO_INVALID

services/apps/git_integration/src/crowdgit/worker/repository_worker.py

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
update_last_processed_commit,
99
)
1010
from crowdgit.enums import RepositoryState
11-
from crowdgit.errors import InternalError
11+
from crowdgit.errors import InternalError, ParentRepoInvalidError
1212

1313
# Import configured loguru logger from crowdgit.logger
1414
from crowdgit.logger import logger
@@ -136,41 +136,51 @@ def _reset_all_contexts(self) -> None:
136136
for service in services:
137137
service.reset_logger_context()
138138

139-
async def _check_parent_repo_validity(self, repository: Repository) -> bool:
139+
async def _validate_and_get_parent_repo(self, repository: Repository) -> Repository | None:
140140
"""
141-
In case of forked repo we need to prevent re-processing activities from parent repo and assigning them to fork, so we need to check:
142-
1. Parent repo already connected/onboarded
143-
2. Parent repo was processed successfully from last run to ensure we have up to date data
144-
also assigns repository.parent_repo if valid
141+
Validate and return the parent repository for forks.
142+
For forked repos, we need to prevent re-processing activities from the parent repo,
143+
so we verify the parent:
144+
1. Is already connected/onboarded in our system
145+
2. Was processed successfully within REPOSITORY_UPDATE_INTERVAL_HOURS
146+
3. Has COMPLETED state
147+
148+
Returns:
149+
Repository | None: Parent repository if this is a valid fork, None if not a fork
150+
Raises:
151+
ParentRepoInvalidError: If this is a fork but the parent repo is invalid or not found
145152
"""
146153
if not repository.forked_from:
147-
return True
154+
return None
155+
148156
logger.info(
149-
f"Repo forked from {repository.forked_from}, checking parent repo validity in our system"
157+
f"Repository {repository.url} is forked from {repository.forked_from}, validating parent repo..."
150158
)
151-
await asyncio.sleep(10)
159+
152160
parent_repo = await get_recently_processed_repository_by_url(repository.forked_from)
153161
if not parent_repo:
154162
logger.warning(
155163
f"Parent repo {repository.forked_from} is not found/valid - Aborting processing"
156164
)
157-
return False
158-
logger.info("Parent repo is valid and already processed, proceeding with fork processing")
159-
repository.parent_repo = parent_repo
160-
return True
165+
raise ParentRepoInvalidError()
166+
167+
logger.info(
168+
f"Parent repo {repository.forked_from} is valid, proceeding with fork processing"
169+
)
170+
return parent_repo
161171

162172
async def _process_single_repository(self, repository: Repository):
163173
"""Process a single repository through services with full clone for new repos, incremental for existing"""
164174
logger.info("Processing repository: {}", repository.url)
165-
processing_state = RepositoryState.FAILED
175+
processing_state = RepositoryState.PENDING
166176

167177
try:
168178
repo_name = get_repo_name(repository.url)
169179
self._bind_repository_context(repository, repo_name)
170-
valid_parent = await self._check_parent_repo_validity(repository)
171-
if not valid_parent:
172-
processing_state = RepositoryState.REQUIRES_PARENT
173-
return
180+
181+
# Validate and get parent repo if this is a fork
182+
repository.parent_repo = await self._validate_and_get_parent_repo(repository)
183+
174184
async for batch_info in self.clone_service.clone_batches_generator(
175185
repository,
176186
working_dir_cleanup=True,
@@ -193,8 +203,12 @@ async def _process_single_repository(self, repository: Repository):
193203

194204
logger.info("Incremental processing completed successfully")
195205
processing_state = RepositoryState.COMPLETED
206+
except ParentRepoInvalidError as e:
207+
logger.error(f"Parent repo validation failed: {repr(e)}")
208+
processing_state = RepositoryState.REQUIRES_PARENT
196209
except Exception as e:
197210
logger.error(f"Processing failed with error: {repr(e)}")
211+
processing_state = RepositoryState.FAILED
198212
finally:
199213
# Reset logger context for all services
200214
self._reset_all_contexts()

0 commit comments

Comments
 (0)