Skip to content

Commit 3babb40

Browse files
committed
SelectTypeParameters=CR_Core_Memory; define DefMemPerCPU
Also create the ohpc_nodegroup_computed variable, to share computed values per nodegroup between multiple places of the role. If a nodegroup is in ohpc_nodegroup_computed it has at least one host. It stored - first_host: name of the first host in the nodegroup; use `hostvars[computed.first_host]` to access its hostvars - inventory_group_name: name of the inventory group for this nodegroup; use `groups[computed.inventory_group_name]` to access the host list
1 parent 7fb7f44 commit 3babb40

3 files changed

Lines changed: 31 additions & 11 deletions

File tree

defaults/main.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ openhpc_default_config:
2424
SlurmctldTimeout: 300
2525
SchedulerType: sched/backfill
2626
SelectType: select/cons_tres
27-
SelectTypeParameters: CR_Core
27+
SelectTypeParameters: CR_Core_Memory
2828
PriorityWeightPartition: 1000
2929
PreemptType: preempt/partition_prio
3030
PreemptMode: SUSPEND,GANG
@@ -43,6 +43,7 @@ openhpc_default_config:
4343
Epilog: /etc/slurm/slurm.epilog.clean
4444
ReturnToService: 2
4545
GresTypes: "{{ ohpc_gres_types if ohpc_gres_types != '' else 'omit' }}"
46+
DefMemPerCPU: "{{ ohpc_nodegroup_computed.values() | map(attribute='def_mem_per_cpu') | default([100], true) | min }}"
4647
openhpc_cgroup_default_config:
4748
ConstrainCores: "yes"
4849
ConstrainDevices: "yes"

templates/slurm.conf.j2

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,16 @@ NodeName={{ node }}
2222
# COMPUTE NODES
2323
{% for nodegroup in openhpc_nodegroups %}
2424
# nodegroup: {{ nodegroup.name }}
25-
{% set inventory_group_name = openhpc_cluster_name ~ '_' ~ nodegroup.name %}
26-
{% set inventory_group_hosts = groups.get(inventory_group_name, []) %}
27-
{% if inventory_group_hosts | length > 0 %}
28-
{% set play_group_hosts = inventory_group_hosts | intersect (play_hosts) %}
29-
{% set first_host = play_group_hosts | first | mandatory('Inventory group "' ~ inventory_group_name ~ '" contains no hosts in this play - was --limit used?') %}
30-
{% set first_host_hv = hostvars[first_host] %}
31-
{% set ram_mb = (first_host_hv['ansible_memory_mb']['real']['total'] * (nodegroup.ram_multiplier | default(openhpc_ram_multiplier))) | int %}
32-
{% set hostlists = (inventory_group_hosts | hostlist_expression) %}{# hosts in inventory group aren't necessarily a single hostlist expression #}
25+
{% set nodegroup_computed = ohpc_nodegroup_computed.get(nodegroup.name) %}
26+
{# see vars/main.yml: if nodegroup_computed, nodegroup has at least 1 host in the inventory #}
27+
{% if nodegroup_computed %}
28+
{% set inventory_group_hosts = groups[nodegroup_computed.inventory_group_name] %}
29+
{% set first_host_hv = hostvars[nodegroup_computed.first_host] %}
30+
{% set hostlists = (inventory_group_hosts | hostlist_expression) %}{# hosts in inventory group aren't necessarily a single hostlist expression #}
3331
NodeName={{ hostlists | join(',') }} {{ '' -}}
3432
Features={{ (['nodegroup_' ~ nodegroup.name] + nodegroup.features | default([]) ) | join(',') }} {{ '' -}}
3533
State=UNKNOWN {{ '' -}}
36-
RealMemory={{ nodegroup.ram_mb | default(ram_mb) }} {{ '' -}}
34+
RealMemory={{ nodegroup.ram_mb | default(nodegroup_computed.ram_mb) }} {{ '' -}}
3735
Sockets={{ first_host_hv['ansible_processor_count'] }} {{ '' -}}
3836
CoresPerSocket={{ first_host_hv['ansible_processor_cores'] }} {{ '' -}}
3937
ThreadsPerCore={{ first_host_hv['ansible_processor_threads_per_core'] }} {{ '' -}}
@@ -44,7 +42,7 @@ NodeName={{ hostlists | join(',') }} {{ '' -}}
4442
Gres={{ first_host_hv['ohpc_node_gpu_gres'] -}}
4543
{% endif %}
4644

47-
{% endif %}{# 1 or more hosts in inventory #}
45+
{% endif %}{# 1 or more hosts in inventory #}
4846
NodeSet=nodegroup_{{ nodegroup.name }} Feature=nodegroup_{{ nodegroup.name }}
4947

5048
{% endfor %}

vars/main.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,25 @@ ohpc_slurm_packages:
1717
- "{{ 'lmod-ohpc' if openhpc_module_system_install else '' }}"
1818
database:
1919
- "slurm-slurmdbd-ohpc"
20+
21+
ohpc_nodegroup_computed: >
22+
{
23+
{% for nodegroup in openhpc_nodegroups %}
24+
{% set inventory_group_name = openhpc_cluster_name ~ '_' ~ nodegroup.name %}
25+
{% set inventory_group_hosts = groups.get(inventory_group_name, []) %}
26+
{% if inventory_group_hosts | length > 0 %}
27+
{% set play_group_hosts = inventory_group_hosts | intersect (play_hosts) %}
28+
{% set first_host = play_group_hosts | first | mandatory('Inventory group "' ~ inventory_group_name ~ '" contains no hosts in this play - was --limit used?') %}
29+
{% set first_host_hv = hostvars[first_host] %}
30+
{% set ram_mb = (first_host_hv['ansible_memory_mb']['real']['total'] * (nodegroup.ram_multiplier | default(openhpc_ram_multiplier))) | int %}
31+
{{ nodegroup.name | to_json }}: {
32+
"inventory_group_name": {{ inventory_group_name | to_json }},
33+
"first_host": {{ first_host | to_json }},
34+
"ram_mb": {{ ram_mb }},
35+
"def_mem_per_cpu": {{ (ram_mb / first_host_hv['ansible_processor_vcpus']) | int }},
36+
},
37+
{% endif %}
38+
{% endfor %}
39+
}
40+
2041
...

0 commit comments

Comments
 (0)