-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy path80_start-mjs.sh
More file actions
executable file
·85 lines (68 loc) · 2.99 KB
/
80_start-mjs.sh
File metadata and controls
executable file
·85 lines (68 loc) · 2.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env bash
# Copyright 2022-2026 The MathWorks, Inc.
PS4='+ [\d \t] '
set -x pipefail
echo "===Setup directory permissions==="
# Set Access Control List to allow CLOUD_USER to read/write/execute on required directories
setfacl -m u:${CLOUD_USER}:rwx /var/run
echo "===Setting up Networking==="
# Ensure that all communication with the headnode occurs on the local network.
# Configure appropriate hostnames to start mjs on workers/headnode.
# EXTERNAL/INTERNAL_HOSTNAME variables are defined in the startup/setup-mjs-hostname.sh file
if [[ "${NODE_TYPE}" == 'HEADNODE' ]]; then
echo ${LOCAL_IPV4} ${EXTERNAL_HOSTNAME} >> /etc/hosts
# Headnode uses its specific external hostname for MJS and external comms
MJS_HOSTNAME="${EXTERNAL_HOSTNAME}"
else
echo ${HEADNODE_LOCAL_IP} ${HEADNODE_EXTERNAL_HOSTNAME} >> /etc/hosts
# For workers, default MJS_HOSTNAME to the internal hostname
MJS_HOSTNAME="${INTERNAL_HOSTNAME}"
fi
# Ensure that the MATLAB client can connect directly to the workers.
# This is a necessary condition to create parpools.
export MDCE_OVERRIDE_EXTERNAL_HOSTNAME="${EXTERNAL_HOSTNAME}"
export MDCE_OVERRIDE_INTERNAL_HOSTNAME="${INTERNAL_HOSTNAME}"
# For MATLAB R2024b and earlier releases, set the MPI interface hostname
if [[ "${MATLAB_RELEASE}" < 'R2025a' ]]; then
export MPICH_INTERFACE_HOSTNAME="${INTERNAL_HOSTNAME}"
fi
echo "===Starting MATLAB Job Scheduler==="
mkdir -p ${CHECKPOINT_ROOT}
chown -R ${CLOUD_USER}:${CLOUD_USER} ${CHECKPOINT_ROOT}
chmod 755 ${CHECKPOINT_ROOT}
MJS_OPTS=(
-hostname "${MJS_HOSTNAME}"
-loglevel ${CLUSTER_LOG_LEVEL%%[^0-9]*}
-enablepeerlookup
-sharedsecretfile "${SECRET_FILE}"
-cleanPreserveJobs
)
if [[ "${TERMINATION_POLICY}" == "Never" || "${TERMINATION_POLICY}" == "When cluster is idle" ]]; then
MJS_OPTS+=(
-sendactivitynotifications
-scriptroot "${MJS_BUSY_IDLE_SCRIPTS}"
)
fi
cd ${MATLAB_ROOT}/toolbox/parallel/bin
# Start MJS as CLOUD_USER
sudo -E -u ${CLOUD_USER} ./mjs start "${MJS_OPTS[@]}"
if [[ ${NODE_TYPE} == 'HEADNODE' ]]; then
echo "===Starting Job Manager==="
if [[ -f "${MJS_ADMIN_PASSWORD_FILE}" ]]; then
# Provide the password for the administrator account if one has been generated (Security Level 2 and 3)
PARALLEL_SERVER_JOBMANAGER_ADMIN_PASSWORD=$(cat "${MJS_ADMIN_PASSWORD_FILE}")
if [[ "${MATLAB_RELEASE}" > 'R2023b' ]]; then
export PARALLEL_SERVER_JOBMANAGER_ADMIN_PASSWORD
else
MDCEQE_JOBMANAGER_ADMIN_PASSWORD="${PARALLEL_SERVER_JOBMANAGER_ADMIN_PASSWORD}"
export MDCEQE_JOBMANAGER_ADMIN_PASSWORD
fi
fi
# Start Job Manager as CLOUD_USER
sudo -E -u ${CLOUD_USER} ./startjobmanager -name "${JOB_MANAGER_NAME}" -certificate ${CERT_FILE}
else
echo "===Starting workers==="
# Start worker processes as CLOUD_USER
sudo -E -u ${CLOUD_USER} ./startworker -jobmanagerhost ${HEADNODE_EXTERNAL_HOSTNAME} -jobmanager "${JOB_MANAGER_NAME}" -num ${WORKERS_PER_NODE}
fi
echo "===Done==="