Skip to content

Commit c6f1cbc

Browse files
committed
feat: limit number of concurrent repos onboardings
1 parent a60ab3a commit c6f1cbc

2 files changed

Lines changed: 20 additions & 8 deletions

File tree

services/apps/git_integration/src/crowdgit/database/crud.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from crowdgit.errors import RepoLockingError
99
from crowdgit.models.repository import Repository
1010
from crowdgit.models.service_execution import ServiceExecution
11-
from crowdgit.settings import REPOSITORY_UPDATE_INTERVAL_HOURS
11+
from crowdgit.settings import MAX_CONCURRENT_ONBOARDINGS, REPOSITORY_UPDATE_INTERVAL_HOURS
1212

1313
from .connection import get_db_connection
1414
from .registry import execute, executemany, fetchrow, fetchval, query
@@ -38,24 +38,35 @@ async def get_repository_by_url(url: str) -> dict[str, Any] | None:
3838

3939
async def acquire_onboarding_repo() -> Repository | None:
4040
onboarding_repo_sql_query = """
41+
WITH current_onboarding_count AS (
42+
-- Count repositories currently being onboarded (processing + never processed before)
43+
SELECT COUNT(*) as count
44+
FROM git.repositories
45+
WHERE state = $1
46+
AND "lastProcessedCommit" IS NULL
47+
AND "deletedAt" IS NULL
48+
)
4149
UPDATE git.repositories
4250
SET "lockedAt" = NOW(),
4351
state = $1,
4452
"updatedAt" = NOW()
4553
WHERE id = (
46-
SELECT id
47-
FROM git.repositories
48-
WHERE state = $2
49-
AND "lockedAt" IS NULL
50-
AND "deletedAt" IS NULL
51-
ORDER BY priority ASC, "createdAt" ASC
54+
SELECT r.id
55+
FROM git.repositories r
56+
CROSS JOIN current_onboarding_count c
57+
WHERE r.state = $2
58+
AND r."lockedAt" IS NULL
59+
AND r."deletedAt" IS NULL
60+
AND c.count < $3 -- Only proceed if under the limit
61+
ORDER BY r.priority ASC, r."createdAt" ASC
5262
LIMIT 1
5363
FOR UPDATE SKIP LOCKED
5464
)
5565
RETURNING id, url, state, priority, "lastProcessedAt", "lastProcessedCommit", "lockedAt", "createdAt", "updatedAt", "segmentId", "integrationId", "maintainerFile", "lastMaintainerRunAt", "branch"
5666
"""
5767
return await acquire_repository(
58-
onboarding_repo_sql_query, (RepositoryState.PROCESSING, RepositoryState.PENDING)
68+
onboarding_repo_sql_query,
69+
(RepositoryState.PROCESSING, RepositoryState.PENDING, MAX_CONCURRENT_ONBOARDINGS),
5970
)
6071

6172

services/apps/git_integration/src/crowdgit/settings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,4 @@ def load_env_var(key: str, required=True, default=None):
3737
load_env_var("MAINTAINER_UPDATE_INTERVAL_HOURS", default="24")
3838
)
3939
WORKER_SHUTDOWN_TIMEOUT_SEC = int(load_env_var("WORKER_SHUTDOWN_TIMEOUT_SEC", default="3600"))
40+
MAX_CONCURRENT_ONBOARDINGS = int(load_env_var("MAX_CONCURRENT_ONBOARDINGS", default="3"))

0 commit comments

Comments
 (0)