|
37 | 37 | from connections import github |
38 | 38 | from tools import config, cvmfs_repository, job_metadata, pr_comments, run_cmd |
39 | 39 | import tools.filter as tools_filter |
40 | | -from tools.pr_comments import ChatLevels, create_comment |
| 40 | +from tools.pr_comments import ChatLevels, create_comment, update_comment |
41 | 41 | from tools.build_params import BUILD_PARAM_ARCH, BUILD_PARAM_ACCEL |
42 | 42 |
|
43 | 43 | # defaults (used if not specified via, eg, 'app.cfg') |
|
54 | 54 | # other constants |
55 | 55 | EXPORT_VARS_FILE = 'export_vars.sh' |
56 | 56 |
|
57 | | -Job = namedtuple('Job', ('working_dir', 'arch_target', 'repo_id', 'slurm_opts', 'year_month', 'pr_id', 'accelerator')) |
| 57 | + |
| 58 | +Job = namedtuple('Job', |
| 59 | + ('working_dir', 'arch_target', 'repo_id', 'slurm_opts', 'year_month', 'pr_id', 'accelerator', 'owner')) |
58 | 60 |
|
59 | 61 | # global repo_cfg |
60 | 62 | repo_cfg = {} |
@@ -111,6 +113,10 @@ def get_build_env_cfg(cfg): |
111 | 113 | log(f"{fn}(): submit_command '{submit_command}'") |
112 | 114 | config_data[config.BUILDENV_SETTING_SUBMIT_COMMAND] = submit_command |
113 | 115 |
|
| 116 | + cancel_command = buildenv.get(config.BUILDENV_SETTING_CANCEL_COMMAND) |
| 117 | + log(f"{fn}(): cancel_command '{cancel_command}'") |
| 118 | + config_data[config.BUILDENV_SETTING_CANCEL_COMMAND] = cancel_command |
| 119 | + |
114 | 120 | job_handover_protocol = buildenv.get(config.BUILDENV_SETTING_JOB_HANDOVER_PROTOCOL) |
115 | 121 | slurm_params = buildenv.get(config.BUILDENV_SETTING_SLURM_PARAMS) |
116 | 122 | if job_handover_protocol == config.JOB_HANDOVER_PROTOCOL_HOLD_RELEASE: |
@@ -585,6 +591,8 @@ def prepare_jobs(pr, cfg, event_info, action_filter, build_params): |
585 | 591 | base_branch_name = pr.base.ref |
586 | 592 | log(f"{fn}(): pr.base.repo.ref '{base_branch_name}'") |
587 | 593 |
|
| 594 | + job_owner = event_info['raw_request_body']['sender']['login'] |
| 595 | + |
588 | 596 | # create run dir (base directory for potentially several jobs) |
589 | 597 | # TODO may still be too early (before we get to any actual job being |
590 | 598 | # prepared below when calling 'download_pr') |
@@ -692,7 +700,7 @@ def prepare_jobs(pr, cfg, event_info, action_filter, build_params): |
692 | 700 |
|
693 | 701 | # enlist jobs to proceed |
694 | 702 | job = Job(job_dir, partition_info['cpu_subdir'], repo_id, partition_info['slurm_params'], year_month, |
695 | | - pr_id, accelerator) |
| 703 | + pr_id, accelerator, job_owner) |
696 | 704 | jobs.append(job) |
697 | 705 |
|
698 | 706 | log(f"{fn}(): {len(jobs)} jobs to proceed after applying white list") |
@@ -1391,3 +1399,123 @@ def request_bot_build_issue_comments(repo_name, pr_number): |
1391 | 1399 | status_table['result'].append(result) |
1392 | 1400 |
|
1393 | 1401 | return status_table |
| 1402 | + |
| 1403 | + |
| 1404 | +def get_job_ids(action_filter): |
| 1405 | + """ |
| 1406 | + Gets and validates 'jobid:' arguments. |
| 1407 | +
|
| 1408 | + Args: |
| 1409 | + action_filter (EESSIBotActionFilter): Instance containing 'jobid:' arguments |
| 1410 | +
|
| 1411 | + Returns: |
| 1412 | + job_ids (list): valid 'jobid:' arguments |
| 1413 | + """ |
| 1414 | + fn = sys._getframe().f_code.co_name |
| 1415 | + |
| 1416 | + # Get 'jobid:' arguments |
| 1417 | + job_filter = action_filter.get_filter_by_component(tools_filter.FILTER_COMPONENT_JOBID) |
| 1418 | + if not job_filter: |
| 1419 | + log(f"{fn}(): 'bot: cancel' command needs at least one 'jobid:' argument.") |
| 1420 | + return [] |
| 1421 | + |
| 1422 | + # Validate job IDs |
| 1423 | + job_ids = [] |
| 1424 | + for job_id in job_filter: |
| 1425 | + try: |
| 1426 | + if int(job_id) > 0: |
| 1427 | + job_ids.append(job_id) |
| 1428 | + else: |
| 1429 | + log(f"{fn}(): Invalid job ID: '{job_id}'") |
| 1430 | + except Exception as err: |
| 1431 | + log(f"{fn}(): Invalid job ID: {err}") |
| 1432 | + |
| 1433 | + return job_ids |
| 1434 | + |
| 1435 | + |
| 1436 | +def get_work_dirs(job_ids, cfg): |
| 1437 | + """ |
| 1438 | + Gets working directories of build jobs. |
| 1439 | +
|
| 1440 | + Args: |
| 1441 | + job_ids (list): list of job_ids to check. |
| 1442 | + cfg (ConfigParser): Instance containing full configuration from app.cfg |
| 1443 | +
|
| 1444 | + Returns: |
| 1445 | + work_dirs (dict): dict mapping each job_id to its work_dir |
| 1446 | + """ |
| 1447 | + poll_command = cfg[config.SECTION_JOB_MANAGER][config.JOB_MANAGER_SETTING_POLL_COMMAND] |
| 1448 | + |
| 1449 | + # squeue only the given job IDs |
| 1450 | + cs_jobs = ",".join(job_ids) |
| 1451 | + command_line = f"{poll_command} --noheader --Format=JobId:0@,WorkDir:0 --job={cs_jobs}" |
| 1452 | + out, err, exit_code = run_cmd(command_line, "Get WorkDirs of jobs") |
| 1453 | + |
| 1454 | + # All output lines are formatted as '{job_id}@{work_dir}' |
| 1455 | + work_dirs = {} |
| 1456 | + for line in out.split("\n"): |
| 1457 | + job = [field.strip() for field in line.split("@")] |
| 1458 | + if len(job) != 2: |
| 1459 | + continue |
| 1460 | + work_dirs[job[0]] = job[1] |
| 1461 | + |
| 1462 | + return work_dirs |
| 1463 | + |
| 1464 | + |
| 1465 | +def cancel_jobs(jobs, user, pr, cfg): |
| 1466 | + """ |
| 1467 | + Cancels the given build jobs. |
| 1468 | +
|
| 1469 | + Args: |
| 1470 | + jobs (dict): dictionary mapping each job_id to cancel to its work_dir |
| 1471 | + user (str): The user who sent the 'bot: cancel' command |
| 1472 | + pr (github.PullRequest.PullRequest): instance representing the pull request |
| 1473 | + cfg (ConfigParser): Instance containing full configuration from app.cfg |
| 1474 | +
|
| 1475 | + Returns: |
| 1476 | + cancelled_jobs (list): job_ids of successfully cancelled jobs |
| 1477 | + """ |
| 1478 | + fn = sys._getframe().f_code.co_name |
| 1479 | + |
| 1480 | + buildenv = get_build_env_cfg(cfg) |
| 1481 | + cancel_command = buildenv[config.BUILDENV_SETTING_CANCEL_COMMAND] |
| 1482 | + |
| 1483 | + cancelled_jobs = [] |
| 1484 | + for job_id, work_dir in jobs.items(): |
| 1485 | + # Get job owner and PR comment ID from metadata |
| 1486 | + metadata_path = os.path.join(work_dir, f"_bot_job{job_id}.metadata") |
| 1487 | + metadata = job_metadata.get_section_from_file( |
| 1488 | + filepath=metadata_path, |
| 1489 | + section=job_metadata.JOB_PR_SECTION, |
| 1490 | + ) |
| 1491 | + job_owner = metadata.get(job_metadata.JOB_PR_JOB_OWNER) |
| 1492 | + pr_comment_id = metadata.get(job_metadata.JOB_PR_PR_COMMENT_ID) |
| 1493 | + |
| 1494 | + # Only the job owner should be able to cancel a job |
| 1495 | + if job_owner != user: |
| 1496 | + log(f"{fn}(): User {user} did not start job {job_id} - skipping cancellation") |
| 1497 | + continue |
| 1498 | + log(f"{fn}(): Job {job_id} was started by user {user} - cancelling job") |
| 1499 | + |
| 1500 | + # Cancel job |
| 1501 | + command_line = f"{cancel_command} --verbose {job_id}" |
| 1502 | + out, err, exit_code = run_cmd(command_line, f"cancel job {job_id}", raise_on_error=False) |
| 1503 | + |
| 1504 | + # Check if command was successful |
| 1505 | + if exit_code != 0: |
| 1506 | + log(f"{fn}(): scancel resulted in a non-zero exit code for job {job_id}.") |
| 1507 | + continue |
| 1508 | + if any([line.startswith("scancel: error: ") for line in err.split("\n")]): |
| 1509 | + log(f"{fn}(): Unable to cancel job {job_id}.") |
| 1510 | + continue |
| 1511 | + |
| 1512 | + log(f"{fn}(): Cancelled job {job_id}") |
| 1513 | + |
| 1514 | + # Update job status table |
| 1515 | + dt = datetime.now(timezone.utc) |
| 1516 | + update = f"\n|{dt.strftime('%b %d %X %Z %Y')}|finished|job id `{job_id}` was cancelled|" |
| 1517 | + update_comment(int(pr_comment_id), pr, update) |
| 1518 | + |
| 1519 | + cancelled_jobs.append(job_id) |
| 1520 | + |
| 1521 | + return cancelled_jobs |
0 commit comments