Skip to content

Commit 7f7ad6f

Browse files
authored
EH: CS-612 allow to overwrite the PE allocation rule on the submission command line (#77)
* EH: CS-612 allow to overwrite the PE allocation rule on the submission command line // added command line switches and qstat output // scheduler code still missing * scheduler changes to overwrite the allocation rule by the global -par switch * implemented handling of master and slave allocation rules in scheduler * reject requesting allocation rule $pe_slots for master or slave scope * - fixes for disjoint queues, fixed allocation rule and -scope master -par 1 - workaround for CS-1898 / CS-1899 * fixed category string for -scope slave -par ... * - fixed issue with binding - optimization (do not touch master host in second round of round_robin if it has been filled with a fixed alloc rule) - added -par switch to man page
1 parent 80dea29 commit 7f7ad6f

29 files changed

Lines changed: 988 additions & 316 deletions

doc/markdown/man/man1/submit.include.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,24 @@ If this option is specified and the priority is not 0 then this value will be pa
12591259
instances as parameter with the name *p*. (see `-jsv` option above or find more information concerning JSV in
12601260
xxqs_name_sxx_jsv(1))
12611261

1262+
## -par *allocation_rule*
1263+
1264+
Available for `qsub`, `qsh`, `qrsh`, `qlogin` and `qalter` only.
1265+
1266+
Allows overwriting the allocation rule defined in the parallel environment, see xxqs_name_sxx_pe(1), attribute `allocation_rule`.
1267+
1268+
The allocation rule can be overwritten globally, as well as for master and/or slave scope, when combined with the `-scope` option.
1269+
1270+
Examples:
1271+
1272+
`qsub -pe <pe_name> 4 -par $round_robin` will overwrite the allocation rule defined in the PE globally.
1273+
1274+
`qsub -pe <pe_name> 4 -scope master -par 1` will overwrite the allocation rule defined in the PE for the master scope.
1275+
1276+
`qsub -pe <pe_name> 5 -scope master -par 1 -q master.q -scope slave -par 4 -q slave.q` will overwrite the allocation rule defined in the PE
1277+
with `1` for the master scope, and `4` for the slave scope. This is a typical use case for jobs which requires
1278+
the master task to run on a separate host / in a different queue instance than the slave tasks.
1279+
12621280
## -pe *parallel_environment* n\[-\[m\]\]\|\[-\]m,...
12631281

12641282
Available for `qsub`, `qsh`, `qrsh`, `qlogin` and `qalter` only.

source/clients/common/ocs_client_job.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,16 @@ void cull_show_job(const lListElem *job, int flags, bool show_binding) {
337337
printf("%-33s", str_attrib);
338338
uni_print_list(stdout, nullptr, 0, lp, fields, delis, FLG_NO_DELIS_STRINGS);
339339
}
340+
341+
const char *allocation_rule = lGetString(jrs, JRS_allocation_rule);
342+
if (allocation_rule != nullptr) {
343+
if (str_scope == nullptr) {
344+
str_attrib = sge_dstring_sprintf(&dstr_attrib, "allocation_rule:");
345+
} else {
346+
str_attrib = sge_dstring_sprintf(&dstr_attrib, "%s_allocation_rule:", str_scope);
347+
}
348+
printf("%-33s%s\n", str_attrib, allocation_rule);
349+
}
340350
}
341351
}
342352

source/clients/qalter/ocs_qalter.cc

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
*
2828
* All Rights Reserved.
2929
*
30-
* Portions of this software are Copyright (c) 2023-2025 HPC-Gridware GmbH
30+
* Portions of this software are Copyright (c) 2023-2026 HPC-Gridware GmbH
3131
*
3232
************************************************************************/
3333
/*___INFO__MARK_END__*/
@@ -701,6 +701,17 @@ static lList *qalter_parse_job_parameter(u_long32 me_who, lList *cmdline, lList
701701
nm_set(job_field, JB_project);
702702
}
703703

704+
while ((ep = lGetElemStrRW(cmdline, SPA_switch_val, "-par"))) {
705+
const char *allocation_rule = lGetString(ep, SPA_switch_arg);
706+
u_long32 scope = lGetChar(ep, SPA_argval_lCharT);
707+
lListElem *jrs = job_get_or_create_request_setRW(job, scope);
708+
if (jrs != nullptr) {
709+
lSetString(jrs, JRS_allocation_rule, allocation_rule);
710+
}
711+
lRemoveElem(cmdline, &ep);
712+
nm_set(job_field, JB_request_set_list);
713+
}
714+
704715
while ((ep = lGetElemStrRW(cmdline, SPA_switch_val, "-pe"))) {
705716
lSetString(job, JB_pe, lGetString(ep, SPA_argval_lStringT));
706717
/* put sublist from parsing into job */
@@ -808,7 +819,6 @@ static lList *qalter_parse_job_parameter(u_long32 me_who, lList *cmdline, lList
808819
nm_set(job_field, JB_job_args);
809820
}
810821
lSetList(job, JB_job_args, lp);
811-
812822
}
813823

814824
/* context switches are sensitive to order */
@@ -829,8 +839,9 @@ static lList *qalter_parse_job_parameter(u_long32 me_who, lList *cmdline, lList
829839
lRemoveElem(cmdline, &ep);
830840
ep = temp;
831841
nm_set(job_field, JB_context);
832-
} else
842+
} else {
833843
ep = lNextRW(ep);
844+
}
834845
}
835846

836847
/* complain about unused options */
@@ -862,8 +873,10 @@ static lList *qalter_parse_job_parameter(u_long32 me_who, lList *cmdline, lList
862873
DRETURN(answer);
863874
}
864875

865-
/* printf("=============== lWriteElemTo(job, stdout); ==================\n"); */
866-
/* lWriteElemTo(job, stdout); */
876+
#if 0
877+
printf("=============== lWriteElemTo(job, stdout); ==================\n");
878+
lWriteElemTo(job, stdout);
879+
#endif
867880

868881

869882
/*

source/clients/qsh/ocs_qsh.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
*
3030
* Portions of this code are Copyright 2011 Univa Inc.
3131
*
32-
* Portions of this software are Copyright (c) 2024-2025 HPC-Gridware GmbH
32+
* Portions of this software are Copyright (c) 2023-2026 HPC-Gridware GmbH
3333
*
3434
************************************************************************/
3535
/*___INFO__MARK_END__*/
@@ -2373,8 +2373,8 @@ static void remove_unknown_opts(lList *lp, u_long32 jb_now, int tightly_integrat
23732373
strcmp(cp, "-hold_jid") && strcmp(cp, "-hold_jid_ad") && strcmp(cp, "-h") &&
23742374
strcmp(cp, "-l") && strcmp(cp, "-m") && strcmp(cp, "-masterq") &&
23752375
strcmp(cp, "-N") && strcmp(cp, "-noshell") && strcmp(cp, "-now") &&
2376-
strcmp(cp, "-notify") && strcmp(cp, "-P") &&
2377-
strcmp(cp, "-p") && strcmp(cp, "-pe") && strcmp(cp, "-q") && strcmp(cp, "-v") &&
2376+
strcmp(cp, "-notify") && strcmp(cp, "-P") && strcmp(cp, "-p") && strcmp(cp, "-par") &&
2377+
strcmp(cp, "-pe") && strcmp(cp, "-q") && strcmp(cp, "-v") &&
23782378
strcmp(cp, "-V") && strcmp(cp, "-display") && strcmp(cp, "-verify") &&
23792379
strcmp(cp, "-soft") && strcmp(cp, "-M") && strcmp(cp, "-verbose") &&
23802380
strcmp(cp, "-ac") && strcmp(cp, "-dc") && strcmp(cp, "-sc") && strcmp(cp, "-scope") &&

source/clients/qsh/ocs_qsh_parse.cc

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
*
2828
* All Rights Reserved.
2929
*
30-
* Portions of this software are Copyright (c) 2023-2025 HPC-Gridware GmbH
30+
* Portions of this software are Copyright (c) 2023-2026 HPC-Gridware GmbH
3131
*
3232
************************************************************************/
3333
/*___INFO__MARK_END__*/
@@ -493,6 +493,16 @@ lList *cull_parse_qsh_parameter(u_long32 prog_number, u_long32 uid, const char *
493493
lRemoveElem(cmdline, &ep);
494494
}
495495

496+
while ((ep = lGetElemStrRW(cmdline, SPA_switch_val, "-par"))) {
497+
const char *allocation_rule = lGetString(ep, SPA_switch_arg);
498+
u_long32 scope = lGetChar(ep, SPA_argval_lCharT);
499+
lListElem *jrs = job_get_or_create_request_setRW(*pjob, scope);
500+
if (jrs != nullptr) {
501+
lSetString(jrs, JRS_allocation_rule, allocation_rule);
502+
}
503+
lRemoveElem(cmdline, &ep);
504+
}
505+
496506
while ((ep = lGetElemStrRW(cmdline, SPA_switch_val, "-js"))) {
497507
lSetUlong(*pjob, JB_jobshare, lGetUlong(ep, SPA_argval_lUlongT));
498508
lRemoveElem(cmdline, &ep);

source/common/msg_common.h

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
*
3131
* Portions of this code are Copyright 2011 Univa Inc.
3232
*
33-
* Portions of this software are Copyright (c) 2023-2025 HPC-Gridware GmbH
33+
* Portions of this software are Copyright (c) 2023-2026 HPC-Gridware GmbH
3434
*
3535
************************************************************************/
3636
/*___INFO__MARK_END__*/
@@ -50,9 +50,10 @@
5050
#define MSG_TABLE_EV_POOL "POOL"
5151
#define MSG_TABLE_SIZE "SIZE"
5252

53-
#define MSG_GDI_ARGUMENTSYNTAX_QA_CATEGORY_ID "cat_id category ID"
53+
#define MSG_GDI_ARGUMENTSYNTAX_OA_ALLOCATION_RULE "allocation_rule <int> | $pe_slots | $fill_up | $round_robin"
5454
#define MSG_GDI_ARGUMENTSYNTAX_OA_ACCOUNT_STRING "account_string account_name"
55-
#define MSG_GDI_ARGUMENTSYNTAX_QA_BINDING_FILTER_STR "topology_string topology string where lower case letters show masked units"
55+
#define MSG_GDI_ARGUMENTSYNTAX_OA_BINDING_FILTER_STR "topology_string topology string where lower case letters show masked units"
56+
#define MSG_GDI_ARGUMENTSYNTAX_OA_CATEGORY_ID "cat_id category ID"
5657
#define MSG_GDI_ARGUMENTSYNTAX_OA_COMPLEX_LIST "complex_list complex[,complex,...]"
5758
#define MSG_GDI_ARGUMENTSYNTAX_OA_CONTEXT_LIST "context_list variable[=value][,variable[=value],...]"
5859
#define MSG_GDI_ARGUMENTSYNTAX_OA_CKPT_SEL "ckpt_selector `n' `s' `m' `x' <interval> "
@@ -965,7 +966,7 @@
965966
#define MSG_SEC_DELCREDNOBIN_US _MESSAGE(60413, _("could not delete credentials for job " sge_u32 " - " SFN " binary does not exist"))
966967
#define MSG_SEC_PUTCREDSTDERR_S _MESSAGE(60414, _("put_cred stderr: " SFN))
967968
#define MSG_SEC_NOSTARTCMD4GETCRED_SU _MESSAGE(60415, _("can't start command " SFQ " for job " sge_u32 " to get credentials"))
968-
#define MSG_PE_ALLOCRULE_S _MESSAGE(60416, _("incorrect allocation_rule " SFQ ))
969+
#define MSG_PE_ALLOCRULE_S _MESSAGE(60416, _("invalid allocation_rule " SFQ ))
969970
#define MSG_GDI_OUTOFMEMORY _MESSAGE(60418, _("out of memory"))
970971
#define MSG_COM_UNPACKINT_I _MESSAGE(60419, _("unpacking integer %d failed"))
971972
#define MSG_COM_UNPACKOBJ_S _MESSAGE(60420, _("unpacking a " SFN " object failed"))
@@ -1110,7 +1111,10 @@
11101111

11111112
#define MSG_PARSE_BSORT_CONTRA_S _MESSAGE(60744, _("contradicting ascending and descending sort order in binding sort " SFQ))
11121113

1113-
#define MSG_GDI_USAGE_when_OPT "[-when now|on_reschedule]"
1114-
#define MSG_GDI_UTEXT_when_OPT _MESSAGE(60745, _("apply modification immediately or on job rescheduling"))
1114+
#define MSG_GDI_USAGE_when_OPT "[-when now|on_reschedule]"
1115+
#define MSG_GDI_UTEXT_when_OPT _MESSAGE(60745, _("apply modification immediately or on job rescheduling"))
1116+
1117+
#define MSG_GDI_USAGE_PAR_OPT "[-par allocation_rule]"
1118+
#define MSG_GDI_UTEXT_PAR_OPT _MESSAGE(60746, _("set the parallel job allocation rule"))
11151119

11161120
// clang-format on

source/common/parse_job_cull.cc

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
*
2828
* All Rights Reserved.
2929
*
30-
* Portions of this software are Copyright (c) 2023-2025 HPC-Gridware GmbH
30+
* Portions of this software are Copyright (c) 2023-2026 HPC-Gridware GmbH
3131
*
3232
************************************************************************/
3333
/*___INFO__MARK_END__*/
@@ -621,6 +621,16 @@ lList *cull_parse_job_parameter(u_long32 uid, const char *username, const char *
621621
lRemoveElem(cmdline, &ep);
622622
}
623623

624+
while ((ep = lGetElemStrRW(cmdline, SPA_switch_val, "-par"))) {
625+
const char *allocation_rule = lGetString(ep, SPA_switch_arg);
626+
u_long32 scope = lGetChar(ep, SPA_argval_lCharT);
627+
lListElem *jrs = job_get_or_create_request_setRW(*pjob, scope);
628+
if (jrs != nullptr) {
629+
lSetString(jrs, JRS_allocation_rule, allocation_rule);
630+
}
631+
lRemoveElem(cmdline, &ep);
632+
}
633+
624634
while ((ep = lGetElemStrRW(cmdline, SPA_switch_val, "-pe"))) {
625635
lSetString(*pjob, JB_pe, lGetString(ep, SPA_argval_lStringT));
626636
lSwapList(*pjob, JB_pe_range, ep, SPA_argval_lListT);

source/common/parse_qsub.cc

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include <cstdlib>
3838
#include <unistd.h>
3939

40+
#include "uti/config_file.h"
4041
#include "uti/sge_dstring.h"
4142
#include "uti/sge_log.h"
4243
#include "uti/sge_parse_num_par.h"
@@ -1540,7 +1541,35 @@ lList *cull_parse_cmdline(
15401541
continue;
15411542
}
15421543

1543-
/*----------------------------------------------------------------------------*/
1544+
/*-----------------------------------------------------------------------------*/
1545+
/* "-par" */
1546+
1547+
if (strcmp("-par", *sp) == 0) {
1548+
1549+
DPRINTF("\"%s\"\n", *sp);
1550+
1551+
sp++;
1552+
if (*sp == nullptr) {
1553+
answer_list_add_sprintf(&answer, STATUS_ESEMANTIC, ANSWER_QUALITY_ERROR,
1554+
MSG_PARSE_XOPTIONMUSTHAVEARGUMENT_S, "-par");
1555+
DRETURN(answer);
1556+
}
1557+
1558+
// check if the given allocation rule (e.g., $pe_slots) is correct
1559+
if (replace_params(*sp, nullptr, 0, pe_alloc_rule_variables) != 0) {
1560+
answer_list_add_sprintf(&answer, STATUS_ESEMANTIC, ANSWER_QUALITY_ERROR,
1561+
MSG_PARSE_INVALIDOPTIONARGUMENT_SS, "-par", *sp);
1562+
DRETURN(answer);
1563+
}
1564+
1565+
ep_opt = sge_add_arg(pcmdline, par_OPT, lStringT, *(sp - 1), *sp);
1566+
lSetChar(ep_opt, SPA_argval_lCharT, scope_flag);
1567+
1568+
sp++;
1569+
continue;
1570+
}
1571+
1572+
/*----------------------------------------------------------------------------*/
15441573
if (!strcmp("-pe", *sp)) {
15451574
lList *pe_range = nullptr;
15461575
dstring d_arg = DSTRING_INIT;

source/common/sge_options.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
*
3030
* Portions of this code are Copyright 2011 Univa Inc.
3131
*
32-
* Portions of this software are Copyright (c) 2023-2025 HPC-Gridware GmbH
32+
* Portions of this software are Copyright (c) 2023-2026 HPC-Gridware GmbH
3333
*
3434
************************************************************************/
3535
/*___INFO__MARK_END__*/
@@ -533,6 +533,8 @@ unsigned short sge_options[][ALL_OPT + 1] =
533533
{0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1},
534534
/* when_OPT */
535535
{0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1},
536+
/* par_OPT */
537+
{0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1},
536538
/*
537539
n q q q q q q q q q q q q q q e q q q q n A
538540
o a c d h m m r r s s r l s s x e r r r o L

source/common/sge_options.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
*
3131
* Portions of this code are Copyright 2011 Univa Inc.
3232
*
33-
* Portions of this software are Copyright (c) 2023-2025 HPC-Gridware GmbH
33+
* Portions of this software are Copyright (c) 2023-2026 HPC-Gridware GmbH
3434
*
3535
************************************************************************/
3636
/*___INFO__MARK_END__*/
@@ -309,6 +309,7 @@ enum {
309309
binstance_OPT, //< binding instance
310310

311311
when_OPT, //< for qalter set if changes shall be applied on a running job or only after rescheduling
312+
par_OPT, //< submit / qalter parameter to set the PE allocation rule per scope
312313
};
313314

314315
/* macros used in parsing */

0 commit comments

Comments
 (0)