66Supertype for cluster managers, which control workers processes as a cluster.
77Cluster managers implement how workers can be added, removed and communicated with.
88`SSHManager` and `LocalManager` are subtypes of this.
9+
10+ !!! note
11+ Subtyping `ClusterManager` is no longer required. DistributedNext now
12+ uses the [`is_cluster_manager`](@ref) trait to recognise cluster managers,
13+ so any type can opt in by defining `DistributedNext.is_cluster_manager(::MyMgr) = true`.
14+ `ClusterManager` is kept for backward compatibility; a subtype is automatically
15+ recognised as a cluster manager via a trait fallback.
916"""
1017abstract type ClusterManager end
1118
19+ """
20+ is_cluster_manager(x) -> Bool
21+
22+ Trait identifying `x` as a cluster manager. Defaults to `false`. Cluster
23+ managers opt in by defining a method returning `true`:
24+
25+ ```julia
26+ DistributedNext.is_cluster_manager(::MyManager) = true
27+ ```
28+
29+ Any subtype of [`ClusterManager`](@ref) is automatically recognised via a
30+ fallback method. Defining this trait does *not* require subtyping
31+ `ClusterManager`, which lets external types (for example, types already
32+ subtyping `Distributed.ClusterManager`) act as DistributedNext cluster
33+ managers without multiple inheritance.
34+ """
35+ is_cluster_manager (:: Any ) = false
36+ is_cluster_manager (:: ClusterManager ) = true
37+
38+ # Throw an ArgumentError unless `manager` has opted into the cluster-manager
39+ # trait. Used by entry points accepting user-supplied managers so we fail early
40+ # with a clear message.
41+ function check_cluster_manager (manager)
42+ if ! is_cluster_manager (manager)
43+ throw (ArgumentError (" $(typeof (manager)) is not recognised as a cluster manager. " *
44+ " Define `DistributedNext.is_cluster_manager(::$(typeof (manager)) ) = true` " *
45+ " to opt in." ))
46+ end
47+ end
48+
1249function throw_if_cluster_manager_unassigned ()
1350 isassigned (CTX[]. cluster_manager) || error (" cluster_manager is unassigned" )
1451 return nothing
@@ -121,12 +158,12 @@ mutable struct Worker
121158 w_stream:: IO
122159 w_serializer:: ClusterSerializer # writes can happen from any task hence store the
123160 # serializer as part of the Worker object
124- manager:: ClusterManager
161+ manager:: Any
125162 config:: WorkerConfig
126163 version:: Union{VersionNumber, Nothing} # Julia version of the remote process
127164 initialized:: Event
128165
129- function Worker (id:: Int , r_stream:: IO , w_stream:: IO , manager:: ClusterManager ;
166+ function Worker (id:: Int , r_stream:: IO , w_stream:: IO , manager;
130167 version:: Union{VersionNumber, Nothing} = nothing ,
131168 config:: WorkerConfig = WorkerConfig ())
132169 w = Worker (id)
@@ -404,14 +441,14 @@ function parse_connection_info(str)
404441end
405442
406443"""
407- init_worker(cookie::AbstractString, manager::ClusterManager =DefaultClusterManager())
444+ init_worker(cookie::AbstractString, manager=DefaultClusterManager())
408445
409446Called by cluster managers implementing custom transports. It initializes a newly launched
410447process as a worker. Command line argument `--worker[=<cookie>]` has the effect of initializing a
411448process as a worker using TCP/IP sockets for transport.
412449`cookie` is a [`cluster_cookie`](@ref).
413450"""
414- function init_worker (cookie:: AbstractString , manager:: ClusterManager = DefaultClusterManager ())
451+ function init_worker (cookie:: AbstractString , manager= DefaultClusterManager ())
415452 myrole! (:worker )
416453
417454 # On workers, the default cluster manager connects via TCP sockets. Custom
440477# Only one addprocs can be in progress at any time
441478#
442479"""
443- addprocs(manager::ClusterManager ; kwargs...) -> List of process identifiers
480+ addprocs(manager; kwargs...) -> List of process identifiers
444481
445482Launches worker processes via the specified cluster manager.
446483
@@ -479,7 +516,8 @@ if istaskdone(t) # Check if `addprocs` has completed to ensure `fetch` doesn't
479516end
480517```
481518"""
482- function addprocs (manager:: ClusterManager ; kwargs... )
519+ function addprocs (manager; kwargs... )
520+ check_cluster_manager (manager)
483521 params = merge (default_addprocs_params (manager), Dict {Symbol, Any} (kwargs))
484522
485523 init_multi ()
@@ -492,7 +530,7 @@ function addprocs(manager::ClusterManager; kwargs...)
492530 warning_interval, [(manager, params)])
493531
494532 # Add new workers
495- new_workers = @lock CTX[]. worker_lock addprocs_locked (manager:: ClusterManager , params)
533+ new_workers = @lock CTX[]. worker_lock addprocs_locked (manager, params)
496534
497535 # Call worker-started callbacks
498536 _run_callbacks_concurrently (" worker-started" , CTX[]. worker_started_callbacks,
@@ -501,7 +539,7 @@ function addprocs(manager::ClusterManager; kwargs...)
501539 return new_workers
502540end
503541
504- function addprocs_locked (manager:: ClusterManager , params)
542+ function addprocs_locked (manager, params)
505543 topology (Symbol (params[:topology ]))
506544
507545 if CTX[]. pgrp. topology != = :all_to_all
@@ -574,13 +612,13 @@ function set_valid_processes(plist::Array{Int})
574612end
575613
576614"""
577- default_addprocs_params(mgr::ClusterManager ) -> Dict{Symbol, Any}
615+ default_addprocs_params(mgr) -> Dict{Symbol, Any}
578616
579617Implemented by cluster managers. The default keyword parameters passed when calling
580618`addprocs(mgr)`. The minimal set of options is available by calling
581619`default_addprocs_params()`
582620"""
583- default_addprocs_params (:: ClusterManager ) = default_addprocs_params ()
621+ default_addprocs_params (_ ) = default_addprocs_params ()
584622default_addprocs_params () = Dict {Symbol,Any} (
585623 :topology => :all_to_all ,
586624 :dir => pwd (),
@@ -639,7 +677,7 @@ function launch_n_additional_processes(manager, frompid, fromconfig, cnt, launch
639677 end
640678end
641679
642- function create_worker (manager:: ClusterManager , wconfig:: WorkerConfig )
680+ function create_worker (manager, wconfig:: WorkerConfig )
643681 # only node 1 can add new nodes, since nobody else has the full list of address:port
644682 @assert CTX[]. lproc. id == 1
645683 timeout = worker_timeout ()
0 commit comments