Skip to content

Commit 35b85eb

Browse files
authored
Merge pull request #209 from stackhpc/feat/cr_core_memory
SelectTypeParameters=CR_Core_Memory; DefMemPerCPU
2 parents 6ca0fe7 + da6d88c commit 35b85eb

3 files changed

Lines changed: 31 additions & 11 deletions

File tree

defaults/main.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ openhpc_default_config:
2424
SlurmctldTimeout: 300
2525
SchedulerType: sched/backfill
2626
SelectType: select/cons_tres
27-
SelectTypeParameters: CR_Core
27+
SelectTypeParameters: CR_Core_Memory
2828
PriorityWeightPartition: 1000
2929
PreemptType: preempt/partition_prio
3030
PreemptMode: SUSPEND,GANG
@@ -43,6 +43,7 @@ openhpc_default_config:
4343
Epilog: /etc/slurm/slurm.epilog.clean
4444
ReturnToService: 2
4545
GresTypes: "{{ ohpc_gres_types if ohpc_gres_types != '' else 'omit' }}"
46+
DefMemPerCPU: "{{ ohpc_nodegroups_computed.values() | map(attribute='def_mem_per_cpu') | default([100], true) | min }}"
4647
openhpc_cgroup_default_config:
4748
ConstrainCores: "yes"
4849
ConstrainDevices: "yes"

templates/slurm.conf.j2

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,16 @@ NodeName={{ node }}
2222
# COMPUTE NODES
2323
{% for nodegroup in openhpc_nodegroups %}
2424
# nodegroup: {{ nodegroup.name }}
25-
{% set inventory_group_name = openhpc_cluster_name ~ '_' ~ nodegroup.name %}
26-
{% set inventory_group_hosts = groups.get(inventory_group_name, []) %}
27-
{% if inventory_group_hosts | length > 0 %}
28-
{% set play_group_hosts = inventory_group_hosts | intersect (play_hosts) %}
29-
{% set first_host = play_group_hosts | first | mandatory('Inventory group "' ~ inventory_group_name ~ '" contains no hosts in this play - was --limit used?') %}
30-
{% set first_host_hv = hostvars[first_host] %}
31-
{% set ram_mb = (first_host_hv['ansible_memory_mb']['real']['total'] * (nodegroup.ram_multiplier | default(openhpc_ram_multiplier))) | int %}
32-
{% set hostlists = (inventory_group_hosts | hostlist_expression) %}{# hosts in inventory group aren't necessarily a single hostlist expression #}
25+
{% set nodegroup_computed = ohpc_nodegroups_computed.get(nodegroup.name) %}
26+
{# see vars/main.yml: if nodegroup_computed, nodegroup has at least 1 host in the inventory #}
27+
{% if nodegroup_computed %}
28+
{% set inventory_group_hosts = groups[nodegroup_computed.inventory_group_name] %}
29+
{% set first_host_hv = hostvars[nodegroup_computed.first_host] %}
30+
{% set hostlists = (inventory_group_hosts | hostlist_expression) %}{# hosts in inventory group aren't necessarily a single hostlist expression #}
3331
NodeName={{ hostlists | join(',') }} {{ '' -}}
3432
Features={{ (['nodegroup_' ~ nodegroup.name] + nodegroup.features | default([]) ) | join(',') }} {{ '' -}}
3533
State=UNKNOWN {{ '' -}}
36-
RealMemory={{ nodegroup.ram_mb | default(ram_mb) }} {{ '' -}}
34+
RealMemory={{ nodegroup.ram_mb | default(nodegroup_computed.ram_mb) }} {{ '' -}}
3735
Sockets={{ first_host_hv['ansible_processor_count'] }} {{ '' -}}
3836
CoresPerSocket={{ first_host_hv['ansible_processor_cores'] }} {{ '' -}}
3937
ThreadsPerCore={{ first_host_hv['ansible_processor_threads_per_core'] }} {{ '' -}}
@@ -44,7 +42,7 @@ NodeName={{ hostlists | join(',') }} {{ '' -}}
4442
Gres={{ first_host_hv['ohpc_node_gpu_gres'] -}}
4543
{% endif %}
4644

47-
{% endif %}{# 1 or more hosts in inventory #}
45+
{% endif %}{# 1 or more hosts in inventory #}
4846
NodeSet=nodegroup_{{ nodegroup.name }} Feature=nodegroup_{{ nodegroup.name }}
4947

5048
{% endfor %}

vars/main.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,25 @@ ohpc_slurm_packages:
1919
- "slurm-slurmdbd-ohpc"
2020

2121
openhpc_merged_config: "{{ openhpc_default_config | combine(openhpc_config) }}"
22+
23+
ohpc_nodegroups_computed: >
24+
{
25+
{% for nodegroup in openhpc_nodegroups %}
26+
{% set inventory_group_name = openhpc_cluster_name ~ '_' ~ nodegroup.name %}
27+
{% set inventory_group_hosts = groups.get(inventory_group_name, []) %}
28+
{% if inventory_group_hosts | length > 0 %}
29+
{% set play_group_hosts = inventory_group_hosts | intersect (play_hosts) %}
30+
{% set first_host = play_group_hosts | first | mandatory('Inventory group "' ~ inventory_group_name ~ '" contains no hosts in this play - was --limit used?') %}
31+
{% set first_host_hv = hostvars[first_host] %}
32+
{% set ram_mb = (first_host_hv['ansible_memory_mb']['real']['total'] * (nodegroup.ram_multiplier | default(openhpc_ram_multiplier))) | int %}
33+
{{ nodegroup.name | to_json }}: {
34+
"inventory_group_name": {{ inventory_group_name | to_json }},
35+
"first_host": {{ first_host | to_json }},
36+
"ram_mb": {{ ram_mb }},
37+
"def_mem_per_cpu": {{ (ram_mb / first_host_hv['ansible_processor_vcpus']) | int }},
38+
},
39+
{% endif %}
40+
{% endfor %}
41+
}
42+
2243
...

0 commit comments

Comments
 (0)