Skip to content

Commit e24a93b

Browse files
committed
add modal b200 submission rate limit
1 parent 84e84f9 commit e24a93b

8 files changed

Lines changed: 204 additions & 6 deletions

File tree

src/kernelbot/api/main.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,17 @@
1515
from libkernelbot.background_submission_manager import BackgroundSubmissionManager
1616
from libkernelbot.consts import SubmissionMode
1717
from libkernelbot.db_types import IdentityType
18-
from libkernelbot.kernelguard import KernelGuardRejected, enforce_submission_precheck, should_precheck_submission
18+
from libkernelbot.kernelguard import (
19+
KernelGuardRejected,
20+
enforce_submission_precheck,
21+
should_precheck_submission,
22+
)
1923
from libkernelbot.leaderboard_db import LeaderboardDB, LeaderboardRankedEntry
2024
from libkernelbot.problem_sync import sync_problems
2125
from libkernelbot.submission import (
2226
ProcessedSubmissionRequest,
2327
SubmissionRequest,
28+
enforce_gpu_rate_limits,
2429
prepare_submission,
2530
)
2631
from libkernelbot.task import make_task_definition
@@ -500,6 +505,7 @@ async def enqueue_background_job(
500505
):
501506
# pre-create the submission for api returns
502507
with backend.db as db:
508+
enforce_gpu_rate_limits(req, db)
503509
sub_id = db.create_submission(
504510
leaderboard=req.leaderboard,
505511
file_name=req.file_name,
@@ -508,6 +514,7 @@ async def enqueue_background_job(
508514
time=datetime.datetime.now(datetime.timezone.utc),
509515
user_name=req.user_name,
510516
mode_category=req.mode_category,
517+
requested_gpus=req.gpus,
511518
)
512519
job_id = db.upsert_submission_job_status(sub_id, "initial", None)
513520
# put submission request in queue

src/libkernelbot/backend.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,13 @@
44
from types import SimpleNamespace
55
from typing import Optional
66

7-
from libkernelbot.consts import GPU, GPU_TO_SM, SubmissionMode, get_gpu_by_name, get_mode_category
7+
from libkernelbot.consts import (
8+
GPU,
9+
GPU_TO_SM,
10+
SubmissionMode,
11+
get_gpu_by_name,
12+
get_mode_category,
13+
)
814
from libkernelbot.kernelguard import (
915
KernelGuardRejected,
1016
enforce_submission_precheck,
@@ -19,7 +25,11 @@
1925
make_short_report,
2026
)
2127
from libkernelbot.run_eval import FullResult
22-
from libkernelbot.submission import ProcessedSubmissionRequest, compute_score
28+
from libkernelbot.submission import (
29+
ProcessedSubmissionRequest,
30+
compute_score,
31+
enforce_gpu_rate_limits,
32+
)
2333
from libkernelbot.task import LeaderboardTask, build_task_config
2434
from libkernelbot.utils import setup_logging
2535

@@ -68,6 +78,7 @@ async def submit_full(
6878
sub_id = pre_sub_id
6979
else:
7080
with self.db as db:
81+
enforce_gpu_rate_limits(req, db)
7182
sub_id = db.create_submission(
7283
leaderboard=req.leaderboard,
7384
file_name=req.file_name,
@@ -76,6 +87,7 @@ async def submit_full(
7687
time=datetime.datetime.now(datetime.timezone.utc),
7788
user_name=req.user_name,
7889
mode_category=req.mode_category or get_mode_category(mode),
90+
requested_gpus=req.gpus,
7991
)
8092
selected_gpus = [get_gpu_by_name(gpu) for gpu in req.gpus]
8193
submission_started = False

src/libkernelbot/consts.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,9 @@ class RankCriterion(Enum):
119119
GEOM = "geom" # geometric mean of all benchmarks
120120

121121

122+
MODAL_B200_MAX_SUBMISSIONS_PER_HOUR = 1
123+
124+
122125
GPU_TO_SM = {
123126
"T4": "75",
124127
"L4": "89",

src/libkernelbot/leaderboard_db.py

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,11 +278,16 @@ def create_submission(
278278
time: datetime.datetime,
279279
user_name: str = None,
280280
mode_category: str = None,
281+
requested_gpus: Optional[list[str] | str] = None,
281282
) -> Optional[int]:
282283
try:
283284
if time.tzinfo is None:
284285
time = time.astimezone()
285286
time = time.astimezone(datetime.timezone.utc)
287+
if requested_gpus is None:
288+
requested_gpus = []
289+
elif isinstance(requested_gpus, str):
290+
requested_gpus = [requested_gpus]
286291

287292
# check if we already have the code
288293
self.cursor.execute(
@@ -329,10 +334,10 @@ def create_submission(
329334
self.cursor.execute(
330335
"""
331336
INSERT INTO leaderboard.submission (leaderboard_id, file_name,
332-
user_id, code_id, submission_time, mode_category)
337+
user_id, code_id, submission_time, mode_category, requested_gpus)
333338
VALUES (
334339
(SELECT id FROM leaderboard.leaderboard WHERE name = %s),
335-
%s, %s, %s, %s, %s)
340+
%s, %s, %s, %s, %s, %s)
336341
RETURNING id
337342
""",
338343
(
@@ -342,6 +347,7 @@ def create_submission(
342347
code_id,
343348
time,
344349
mode_category,
350+
requested_gpus,
345351
),
346352
)
347353
submission_id = self.cursor.fetchone()[0]
@@ -1778,6 +1784,40 @@ def check_rate_limit(
17781784
logger.exception("Error checking rate limit", exc_info=e)
17791785
raise KernelBotError("Error checking rate limit") from e
17801786

1787+
def check_gpu_submission_rate_limit(
1788+
self, user_id: str, gpu_type: str, max_per_hour: int
1789+
) -> dict:
1790+
"""Check if a user has exceeded a per-GPU submission limit over the last hour."""
1791+
try:
1792+
self.cursor.execute(
1793+
"""
1794+
SELECT COUNT(*), MIN(submission_time)
1795+
FROM leaderboard.submission
1796+
WHERE user_id = %s
1797+
AND requested_gpus @> ARRAY[%s]::TEXT[]
1798+
AND submission_time > NOW() - INTERVAL '1 hour'
1799+
""",
1800+
(str(user_id), gpu_type),
1801+
)
1802+
current_count, oldest_time = self.cursor.fetchone()
1803+
allowed = current_count < max_per_hour
1804+
retry_after = 0
1805+
if not allowed and oldest_time is not None:
1806+
expiry = oldest_time + datetime.timedelta(hours=1)
1807+
now = datetime.datetime.now(datetime.timezone.utc)
1808+
retry_after = max(0, int((expiry - now).total_seconds()))
1809+
1810+
return {
1811+
"allowed": allowed,
1812+
"current_count": current_count,
1813+
"max_per_hour": max_per_hour,
1814+
"retry_after_seconds": retry_after,
1815+
}
1816+
except psycopg2.Error as e:
1817+
self.connection.rollback()
1818+
logger.exception("Error checking GPU submission rate limit", exc_info=e)
1819+
raise KernelBotError("Error checking GPU submission rate limit") from e
1820+
17811821

17821822
class LeaderboardDoesNotExist(KernelBotError):
17831823
def __init__(self, name: str):

src/libkernelbot/submission.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,13 @@
77

88
from better_profanity import profanity
99

10-
from libkernelbot.consts import RankCriterion, SubmissionMode, get_mode_category
10+
from libkernelbot.consts import (
11+
MODAL_B200_MAX_SUBMISSIONS_PER_HOUR,
12+
ModalGPU,
13+
RankCriterion,
14+
SubmissionMode,
15+
get_mode_category,
16+
)
1117
from libkernelbot.db_types import RunItem, SubmissionItem
1218
from libkernelbot.leaderboard_db import LeaderboardDB, LeaderboardItem
1319
from libkernelbot.run_eval import FullResult
@@ -41,6 +47,36 @@ class ProcessedSubmissionRequest(SubmissionRequest):
4147
mode_category: str = None
4248

4349

50+
def normalize_requested_gpus(gpus: Union[None, str, list]) -> list[str]:
51+
if gpus is None:
52+
return []
53+
if isinstance(gpus, str):
54+
return [gpus]
55+
return list(gpus)
56+
57+
58+
def enforce_gpu_rate_limits(req: SubmissionRequest, db: LeaderboardDB) -> None:
59+
requested_gpus = normalize_requested_gpus(req.gpus)
60+
if ModalGPU.B200.value not in requested_gpus:
61+
return
62+
63+
rate_check = db.check_gpu_submission_rate_limit(
64+
str(req.user_id),
65+
ModalGPU.B200.value,
66+
MODAL_B200_MAX_SUBMISSIONS_PER_HOUR,
67+
)
68+
if rate_check["allowed"]:
69+
return
70+
71+
raise KernelBotError(
72+
"Rate limit exceeded: "
73+
f"{rate_check['current_count']}/{rate_check['max_per_hour']} Modal B200 submissions "
74+
"per hour. "
75+
f"Try again in {rate_check['retry_after_seconds']}s.",
76+
code=429,
77+
)
78+
79+
4480
def prepare_submission( # noqa: C901
4581
req: SubmissionRequest, backend: "KernelBackend", mode: SubmissionMode = None
4682
) -> ProcessedSubmissionRequest:
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
"""
2+
Track requested GPUs on submission rows so GPU-specific rate limits can apply before queueing.
3+
"""
4+
5+
from yoyo import step
6+
7+
__depends__ = {"20260318_01_ban-user"}
8+
9+
steps = [
10+
step(
11+
"""
12+
ALTER TABLE leaderboard.submission
13+
ADD COLUMN requested_gpus TEXT[] NOT NULL DEFAULT ARRAY[]::TEXT[];
14+
""",
15+
"""
16+
ALTER TABLE leaderboard.submission
17+
DROP COLUMN requested_gpus;
18+
""",
19+
),
20+
step(
21+
"""
22+
CREATE INDEX leaderboard_submission_requested_gpus_idx
23+
ON leaderboard.submission USING GIN (requested_gpus);
24+
""",
25+
"""
26+
DROP INDEX leaderboard.leaderboard_submission_requested_gpus_idx;
27+
""",
28+
),
29+
]

tests/test_leaderboard_db.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1198,3 +1198,47 @@ def test_check_rate_limit_categories_independent(database, submit_leaderboard):
11981198
# Test should be blocked
11991199
result = db.check_rate_limit("submit-leaderboard", "123", "test")
12001200
assert result["allowed"] is False
1201+
1202+
1203+
def test_check_gpu_submission_rate_limit_under_limit(database, submit_leaderboard):
1204+
"""GPU-specific rate limit counts only submissions that requested that GPU."""
1205+
with database as db:
1206+
db.create_submission(
1207+
"submit-leaderboard",
1208+
"test.py",
1209+
123,
1210+
"code1",
1211+
datetime.datetime.now(),
1212+
requested_gpus=["A100", "B200"],
1213+
)
1214+
db.create_submission(
1215+
"submit-leaderboard",
1216+
"other.py",
1217+
123,
1218+
"code2",
1219+
datetime.datetime.now(),
1220+
requested_gpus=["A100"],
1221+
)
1222+
result = db.check_gpu_submission_rate_limit("123", "B200", 2)
1223+
assert result["allowed"] is True
1224+
assert result["current_count"] == 1
1225+
assert result["max_per_hour"] == 2
1226+
1227+
1228+
def test_check_gpu_submission_rate_limit_at_limit(database, submit_leaderboard):
1229+
"""GPU-specific rate limit blocks once the hourly cap is reached."""
1230+
with database as db:
1231+
for i in range(2):
1232+
db.create_submission(
1233+
"submit-leaderboard",
1234+
f"test{i}.py",
1235+
123,
1236+
f"code{i}",
1237+
datetime.datetime.now(),
1238+
requested_gpus=["B200"],
1239+
)
1240+
result = db.check_gpu_submission_rate_limit("123", "B200", 2)
1241+
assert result["allowed"] is False
1242+
assert result["current_count"] == 2
1243+
assert result["max_per_hour"] == 2
1244+
assert result["retry_after_seconds"] >= 0

tests/test_submission.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,13 @@ def mock_backend():
3232
}
3333
db_context.get_leaderboard_gpu_types.return_value = ["A100", "V100"]
3434
db_context.is_user_banned.return_value = False
35+
db_context.check_rate_limit.return_value = None
36+
db_context.check_gpu_submission_rate_limit.return_value = {
37+
"allowed": True,
38+
"current_count": 0,
39+
"max_per_hour": 1,
40+
"retry_after_seconds": 0,
41+
}
3542

3643
return backend
3744

@@ -297,6 +304,26 @@ def test_prepare_submission_checks(mock_backend):
297304
submission.prepare_submission(req, mock_backend)
298305

299306

307+
def test_enforce_gpu_rate_limits_blocks_modal_b200(mock_backend):
308+
mock_backend.db.check_gpu_submission_rate_limit.return_value = {
309+
"allowed": False,
310+
"current_count": 1,
311+
"max_per_hour": 1,
312+
"retry_after_seconds": 123,
313+
}
314+
req = submission.SubmissionRequest(
315+
code="print('hello world')",
316+
file_name="test.py",
317+
user_id=2,
318+
user_name="test_user2",
319+
gpus=["B200"],
320+
leaderboard="test_board",
321+
)
322+
323+
with pytest.raises(KernelBotError, match="Modal B200 submissions per hour"):
324+
submission.enforce_gpu_rate_limits(req, mock_backend.db)
325+
326+
300327
def test_compute_score():
301328
mock_task = mock.Mock()
302329
mock_result = mock.Mock()

0 commit comments

Comments
 (0)