diff --git a/conf/group_vars/all.yml b/conf/group_vars/all.yml index 9c6eb5f..da8580f 100644 --- a/conf/group_vars/all.yml +++ b/conf/group_vars/all.yml @@ -37,8 +37,9 @@ slurm_profiles: compute: compute server: admin login: login +slurm_compute_nodes: "{{ fhpc_nodes['compute'] }}" # GRES GPU enabled if at least one type of compute nodes has gpu -slurm_with_gres_gpu: "{{ fhpc_nodes['compute'] | map(attribute='gpus') | map('length') | map('bool') | max }}" +slurm_with_gres_gpu: "{{ slurm_compute_nodes | map(attribute='gpus') | map('length') | sum > 0 }}" slurm_local_munge_key_file: "{{ fhpc_cluster_state_dir }}/munge/munge.key" slurm_local_slurm_key_file: "{{ fhpc_cluster_state_dir }}/slurm/slurm.key" slurm_local_mariadb_password_file: "{{ fhpc_cluster_state_dir }}/mariadb/mariadb.password" @@ -49,7 +50,6 @@ slurm_restd_with_unix_socket: "{{ fhpc_slurmrestd_with_unix_socket }}" slurm_restd_socket: "{{ fhpc_slurmrestd_socket }}" slurm_restd_port: "{{ fhpc_slurmrestd_port }}" slurm_accounts: "{{ fhpc_groups }}" -slurm_compute_nodes: "{{ fhpc_nodes['compute'] }}" racksdb_database: "{{ fhpc_db }}" redis_local_password_file: "{{ fhpc_cluster_state_dir }}/redis/redis.password" slurmweb_local_slurmrestd_jwt_key_file: "{{ fhpc_local_slurm_jwt_key }}" diff --git a/conf/roles/slurm/templates/slurm.conf.j2 b/conf/roles/slurm/templates/slurm.conf.j2 index 5c163d6..735e3ae 100644 --- a/conf/roles/slurm/templates/slurm.conf.j2 +++ b/conf/roles/slurm/templates/slurm.conf.j2 @@ -35,7 +35,7 @@ GresTypes=gpu {% endif %} {% for node_type in slurm_compute_nodes %} -NodeName={{ node_type.nodes | nodeset_fold }} Sockets={{ node_type.sockets }} CoresPerSocket={{ node_type.cores }} RealMemory={{ node_type.memory }}{% if not slurm_emulator %}{% for model, nb in node_type.gpus.items() %} Gres=gpu:{{ slurm_gpus_models_map.get(model, 'nvidia') }}:{{ nb }}{% endfor %}{% endif %}{% if slurm_emulator %} NodeHostName={{ slurm_server }}{% endif %} State=UNKNOWN +NodeName={{ node_type.nodes | nodeset_fold }} Sockets={{ node_type.sockets }} CoresPerSocket={{ node_type.cores }} RealMemory={{ node_type.memory }}{% if not slurm_emulator and node_type.gpus | length > 0 %} Gres={% for model, nb in node_type.gpus.items() %}gpu:{{ slurm_gpus_models_map.get(model, 'nvidia') }}:{{ nb }}{% if not loop.last %},{% endif %}{% endfor %}{% endif %}{% if slurm_emulator %} NodeHostName={{ slurm_server }}{% endif %} State=UNKNOWN {% endfor %} {% for slurm_partition in slurm_partitions %} PartitionName={{ slurm_partition.name }} Nodes={{ slurm_partition.nodes }} Default={{ "YES" if "default" in slurm_partition and slurm_partition.default else "NO" }}{% for param, value in slurm_partition.params.items() %} {{param}}={{value}}{% endfor %}