Skip to content

Commit a84c587

Browse files
committed
Add a script to check solutions.
1 parent e0e4f8b commit a84c587

2 files changed

Lines changed: 246 additions & 0 deletions

File tree

cmscontrib/SolutionChecker.py

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
#!/usr/bin/env python3
2+
3+
# Contest Management System - http://cms-dev.github.io/
4+
# Copyright © 2026 Luca Versari <veluca93@gmail.com>
5+
#
6+
# This program is free software: you can redistribute it and/or modify
7+
# it under the terms of the GNU Affero General Public License as
8+
# published by the Free Software Foundation, either version 3 of the
9+
# License, or (at your option) any later version.
10+
#
11+
# This program is distributed in the hope that it will be useful,
12+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
# GNU Affero General Public License for more details.
15+
#
16+
# You should have received a copy of the GNU Affero General Public License
17+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
18+
19+
"""Script to automate testing of solutions via CMS API.
20+
21+
The script expects a JSON file containing a list of solution checks.
22+
Each check should be an object with the following fields:
23+
- path: path to the solution file.
24+
- min_score: minimum expected score.
25+
- max_score: maximum expected score.
26+
27+
Such a file can be generated with `task-maker-rust export-solution-checks`.
28+
"""
29+
30+
import argparse
31+
import json
32+
import logging
33+
import os
34+
import re
35+
import sys
36+
import time
37+
from typing import Optional, Dict, Any
38+
39+
import requests
40+
41+
logger = logging.getLogger(__name__)
42+
43+
44+
class RedAlertFormatter(logging.Formatter):
45+
RED = "\x1b[31;1m"
46+
YELLOW = "\x1b[33;1m"
47+
RSET = "\x1b[0m"
48+
49+
BASE_FORMAT = "%(levelname)8s %(message)s"
50+
51+
RED_FORMAT = RED + "%(levelname)8s" + RSET + " %(message)s"
52+
53+
YELLOW_FORMAT = YELLOW + "%(levelname)8s" + RSET + " %(message)s"
54+
55+
56+
FORMATS = {
57+
logging.DEBUG: BASE_FORMAT,
58+
logging.INFO: BASE_FORMAT,
59+
logging.WARNING: YELLOW_FORMAT,
60+
logging.ERROR: RED_FORMAT,
61+
logging.CRITICAL: RED_FORMAT
62+
}
63+
64+
def format(self, record):
65+
log_fmt = self.FORMATS.get(record.levelno, self.BASE_FORMAT)
66+
formatter = logging.Formatter(log_fmt)
67+
return formatter.format(record)
68+
69+
class SolutionChecker:
70+
def __init__(
71+
self, base_url: str, username: Optional[str], password: Optional[str] = None
72+
):
73+
self.base_url = base_url.rstrip("/")
74+
self.username = username
75+
self.password = password
76+
self.session = requests.Session()
77+
self.auth_header = {}
78+
79+
def login(self):
80+
if self.password is None or self.username is None:
81+
logger.info("No password provided, assuming IP autologin.")
82+
return
83+
84+
login_url = f"{self.base_url}/api/login"
85+
response = self.session.post(
86+
login_url, data={"username": self.username, "password": self.password}
87+
)
88+
response.raise_for_status()
89+
data = response.json()
90+
self.auth_header = {"X-CMS-Authorization": data["login_data"]}
91+
logger.info("Successfully logged in.")
92+
93+
def submit(self, task_name: str, file_path: str) -> str:
94+
task_list_url = f"{self.base_url}/api/task_list"
95+
response = self.session.get(task_list_url, headers=self.auth_header)
96+
response.raise_for_status()
97+
tasks = response.json().get("tasks", [])
98+
submission_format = []
99+
for t in tasks:
100+
if t["name"] == task_name:
101+
submission_format = t.get("submission_format", [])
102+
break
103+
assert submission_format, f"Task {task_name} not found in task list"
104+
105+
submit_url = f"{self.base_url}/api/{task_name}/submit"
106+
filename = os.path.basename(file_path)
107+
files = {}
108+
for fmt in submission_format:
109+
files[fmt] = (filename, open(file_path, "rb"))
110+
111+
response = self.session.post(
112+
submit_url,
113+
files=files,
114+
headers=self.auth_header,
115+
)
116+
response.raise_for_status()
117+
return response.json().get("id")
118+
119+
def poll_status(self, task_name: str, filename: str, submission_id: str) -> Dict[str, Any]:
120+
status_url = f"{self.base_url}/tasks/{task_name}/submissions/{submission_id}"
121+
while True:
122+
response = self.session.get(status_url, headers=self.auth_header)
123+
response.raise_for_status()
124+
data = response.json()
125+
# status 5 is SCORED, 2 is COMPILATION_FAILED
126+
if data.get("status") in [2, 5]:
127+
return data
128+
time.sleep(2)
129+
130+
def get_time_limit(self, task_name: str) -> float:
131+
url = f"{self.base_url}/tasks/{task_name}/description"
132+
response = self.session.get(url, headers=self.auth_header)
133+
response.raise_for_status()
134+
match = re.search(r"Time limit</th>\s*<td[^>]*>([\d.]+)\s*s", response.text)
135+
assert match, "Could not find time limit in task description"
136+
return float(match.group(1))
137+
138+
def has_slow_testcases(
139+
self, task_name: str, submission_id: str, time_limit: float
140+
) -> bool:
141+
url = f"{self.base_url}/tasks/{task_name}/submissions/{submission_id}/details"
142+
response = self.session.get(url, headers=self.auth_header)
143+
response.raise_for_status()
144+
145+
html = response.text
146+
# Split by subtask. This is quite hacky and relies on subtask
147+
# delimiters having at least two classes (to avoid mixing it
148+
# up with subtask-head/subtask-body).
149+
subtasks = html.split('<div class="subtask ')[1:]
150+
for st in subtasks:
151+
# Check if subtask score > 0
152+
score_match = re.search(r'<span class="score">\s*\(\s*([\d.]+)\s*/', st)
153+
if score_match and float(score_match.group(1).replace(",", ".")) > 0:
154+
# Find all execution times in this subtask
155+
times = re.findall(
156+
r'<td class="execution-time">\s*(?:&gt;\s*)?([\d.]+)\s*s', st
157+
)
158+
if not times:
159+
logger.warning(
160+
"No testcase times found. Ensure feedback levels are configured correctly"
161+
)
162+
for t in times:
163+
if float(t.replace(",", ".")) > time_limit * 0.5:
164+
return True
165+
return False
166+
167+
168+
def main():
169+
parser = argparse.ArgumentParser(description="CMS Solution Checker")
170+
parser.add_argument(
171+
"--checks-json", "-c", required=True, help="Path to solution_checks.json"
172+
)
173+
parser.add_argument(
174+
"--url",
175+
"-u",
176+
required=True,
177+
help="CMS contest URL (e.g. http://localhost:8888/contest)",
178+
)
179+
parser.add_argument("--task", "-t", required=True, help="Task name")
180+
parser.add_argument("--username", "-U", help="CMS username")
181+
parser.add_argument("--password", "-p", help="CMS password")
182+
parser.add_argument(
183+
"--quiet", "-q", action="store_true", help="Disable non-warnings"
184+
)
185+
186+
args = parser.parse_args()
187+
188+
logger.setLevel(logging.WARNING if args.quiet else logging.INFO)
189+
ch = logging.StreamHandler()
190+
ch.setFormatter(RedAlertFormatter())
191+
logger.addHandler(ch)
192+
193+
if not os.path.exists(args.checks_json):
194+
logger.error("%s not found.", args.checks_json)
195+
return 1
196+
197+
with open(args.checks_json, "r") as f:
198+
checks = json.load(f)
199+
200+
checker = SolutionChecker(args.url, args.username, args.password)
201+
checker.login()
202+
203+
time_limit = checker.get_time_limit(args.task)
204+
205+
submissions = {}
206+
logger.info("Submitting %d solutions...", len(checks))
207+
for criteria in checks:
208+
sol_path = criteria.get("path")
209+
sub_id = checker.submit(args.task, sol_path)
210+
submissions[sol_path] = (sub_id, criteria)
211+
logger.info("Submitted %s: %s", sol_path.split("/")[-1], sub_id)
212+
213+
has_failures = False
214+
logger.info("Waiting for evaluations...")
215+
for sol_path, (sub_id, criteria) in submissions.items():
216+
sol_name = sol_path.split("/")[-1]
217+
status = checker.poll_status(args.task, sol_name, sub_id)
218+
failed = False
219+
if status:
220+
score = status["public_score"]
221+
min_score = criteria["min_score"]
222+
max_score = criteria["max_score"]
223+
if score < min_score - 1e-7 or score > max_score + 1e-7:
224+
failed = True
225+
error = f"score {score} is not in range [{min_score}, {max_score}]"
226+
else:
227+
failed = True
228+
error = "Evaluation failed."
229+
230+
if not failed:
231+
logger.info("%20s: check successful", sol_name)
232+
if checker.has_slow_testcases(args.task, sub_id, time_limit):
233+
logger.warning(
234+
"%20s: some testcases took > 50%% of time limit",
235+
sol_name,
236+
)
237+
else:
238+
has_failures = True
239+
logger.error("%20s: %s", sol_name, error)
240+
241+
return 1 if has_failures else 0
242+
243+
244+
if __name__ == "__main__":
245+
sys.exit(main())

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ class build_with_l10n(build):
145145
"cmsRemoveSubmissions=cmscontrib.RemoveSubmissions:main",
146146
"cmsRemoveTask=cmscontrib.RemoveTask:main",
147147
"cmsRemoveUser=cmscontrib.RemoveUser:main",
148+
"cmsSolutionChecker=cmscontrib.SolutionChecker:main",
148149
"cmsSpoolExporter=cmscontrib.SpoolExporter:main",
149150
"cmsMake=cmstaskenv.cmsMake:main",
150151
"cmsPrometheusExporter=cmscontrib.PrometheusExporter:main",

0 commit comments

Comments
 (0)