-
-
Notifications
You must be signed in to change notification settings - Fork 65
/
Copy pathmanager.py
306 lines (250 loc) · 9.43 KB
/
manager.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
"""A manager for dask clusters."""
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.
import asyncio
import importlib
from inspect import isawaitable
from typing import Any, Dict, List, Union
from uuid import uuid4
import dask
from dask.utils import format_bytes
from dask.distributed import Adaptive
# A type for a dask cluster model: a serializable
# representation of information about the cluster.
ClusterModel = Dict[str, Any]
# A type stub for a Dask cluster.
Cluster = Any
async def make_cluster(configuration: dict) -> Cluster:
module = importlib.import_module(dask.config.get("labextension.factory.module"))
Cluster = getattr(module, dask.config.get("labextension.factory.class"))
kwargs = dask.config.get("labextension.factory.kwargs")
kwargs = {key.replace("-", "_"): entry for key, entry in kwargs.items()}
cluster = await Cluster(
*dask.config.get("labextension.factory.args"), **kwargs, asynchronous=True
)
configuration = dask.config.merge(
dask.config.get("labextension.default"), configuration
)
adaptive = None
if configuration.get("adapt"):
adaptive = cluster.adapt(**configuration.get("adapt"))
elif configuration.get("workers") is not None:
t = cluster.scale(configuration.get("workers"))
if isawaitable(t):
await t
return cluster, adaptive
class DaskClusterManager:
"""
A class for starting, stopping, and otherwise managing the lifecycle
of Dask clusters.
"""
def __init__(self) -> None:
"""Initialize the cluster manager"""
self._clusters: Dict[str, Cluster] = dict()
self._adaptives: Dict[str, Adaptive] = dict()
self._cluster_names: Dict[str, str] = dict()
self._n_clusters = 0
self._initialized = None
async def _async_init(self):
"""The async part of init
Invoked by `await manager`
"""
for model in dask.config.get("labextension.initial"):
await self.start_cluster(configuration=model)
return self
@property
def initialized(self):
"""Don't create initialization task until it's been requested
typically via `await manager`
Makes it easier to ensure we don't do anything before we are in the event loop.
"""
if self._initialized is None:
self._initialized = asyncio.create_task(self._async_init())
return self._initialized
async def start_cluster(
self, cluster_id: str = "", configuration: dict = {}
) -> ClusterModel:
"""
Start a new Dask cluster.
Parameters
----------
cluster_id : string
An optional string id for the cluster. If not given, a random id
will be chosen.
Returns
cluster_model : a dask cluster model.
"""
if not cluster_id:
cluster_id = str(uuid4())
cluster, adaptive = await make_cluster(configuration)
self._n_clusters += 1
# Check for a name in the config
if not configuration.get("name"):
cluster_type = type(cluster).__name__
cluster_name = f"{cluster_type} {self._n_clusters}"
else:
cluster_name = configuration["name"]
# Check if the cluster was started adaptively
if adaptive:
self._adaptives[cluster_id] = adaptive
self._clusters[cluster_id] = cluster
self._cluster_names[cluster_id] = cluster_name
return make_cluster_model(cluster_id, cluster_name, cluster, adaptive=adaptive)
async def close_cluster(self, cluster_id: str) -> Union[ClusterModel, None]:
"""
Close a Dask cluster.
Parameters
----------
cluster_id : string
A string id for the cluster.
Returns
cluster_model : the dask cluster model for the shut down cluster,
or None if it was not found.
"""
cluster = self._clusters.get(cluster_id)
if cluster:
r = cluster.close()
if isawaitable(r):
await r
self._clusters.pop(cluster_id)
name = self._cluster_names.pop(cluster_id)
adaptive = self._adaptives.pop(cluster_id, None)
return make_cluster_model(cluster_id, name, cluster, adaptive)
else:
return None
async def get_cluster(self, cluster_id) -> Union[ClusterModel, None]:
"""
Get a Dask cluster model.
Parameters
----------
cluster_id : string
A string id for the cluster.
Returns
cluster_model : the dask cluster model for the cluster,
or None if it was not found.
"""
cluster = self._clusters.get(cluster_id)
name = self._cluster_names.get(cluster_id, "")
adaptive = self._adaptives.get(cluster_id)
if not cluster:
return None
return make_cluster_model(cluster_id, name, cluster, adaptive)
async def list_clusters(self) -> List[ClusterModel]:
"""
List the Dask cluster models known to the manager.
Returns
cluster_models : A list of the dask cluster models known to the manager.
"""
return [
make_cluster_model(
cluster_id,
self._cluster_names[cluster_id],
self._clusters[cluster_id],
self._adaptives.get(cluster_id, None),
)
for cluster_id in self._clusters
]
async def scale_cluster(self, cluster_id: str, n: int) -> Union[ClusterModel, None]:
cluster = self._clusters.get(cluster_id)
name = self._cluster_names[cluster_id]
adaptive = self._adaptives.pop(cluster_id, None)
# Check if the cluster exists
if not cluster:
return None
# Check if it is actually different.
model = make_cluster_model(cluster_id, name, cluster, adaptive)
if model.get("adapt") is None and model["workers"] == n:
return model
# Otherwise, rescale the model.
t = cluster.scale(n)
if isawaitable(t):
await t
return make_cluster_model(cluster_id, name, cluster, adaptive=None)
async def adapt_cluster(
self, cluster_id: str, minimum: int, maximum: int
) -> Union[ClusterModel, None]:
cluster = self._clusters.get(cluster_id)
name = self._cluster_names[cluster_id]
adaptive = self._adaptives.pop(cluster_id, None)
# Check if the cluster exists
if not cluster:
return None
# Check if it is actually different.
model = make_cluster_model(cluster_id, name, cluster, adaptive)
if (
model.get("adapt") is not None
and model["adapt"]["minimum"] == minimum
and model["adapt"]["maximum"] == maximum
):
return model
# Otherwise, rescale the model.
adaptive = cluster.adapt(minimum=minimum, maximum=maximum)
self._adaptives[cluster_id] = adaptive
return make_cluster_model(cluster_id, name, cluster, adaptive)
async def close(self):
"""Close all clusters and cleanup"""
for cluster_id in list(self._clusters):
await self.close_cluster(cluster_id)
async def __aenter__(self):
"""
Enter an asynchronous context.
This waits for any initial clusters specified via configuration to start.
"""
await self.initialized
return self
async def __aexit__(self, exc_type, exc, tb):
"""
Exit an asynchronous context.
This closes any extant clusters.
"""
await self.close()
def __await__(self):
"""
Awaiter for the manager to be initialized.
This waits for any initial clusters specified via configuration to start.
"""
return self.initialized.__await__()
def make_cluster_model(
cluster_id: str,
cluster_name: str,
cluster: Cluster,
adaptive: Union[Adaptive, None],
) -> ClusterModel:
"""
Make a cluster model. This is a JSON-serializable representation
of the information about a cluster that can be sent over the wire.
Parameters
----------
cluster_id: string
A unique string for the cluster.
cluster_name: string
A display name for the cluster.
cluster: Cluster
The cluster out of which to make the cluster model.
adaptive: Adaptive
The adaptive controller for the number of workers for the cluster, or
none if the cluster is not scaled adaptively.
"""
# This would be a great target for a dataclass
# once python 3.7 is in wider use.
try:
info = cluster.scheduler_info
except AttributeError:
info = cluster.scheduler.identity()
try:
cores = sum(d["nthreads"] for d in info["workers"].values())
except KeyError: # dask.__version__ < 2.0
cores = sum(d["ncores"] for d in info["workers"].values())
assert isinstance(info, dict)
model = dict(
id=cluster_id,
name=cluster_name,
scheduler_address=cluster.scheduler_address,
dashboard_link=cluster.dashboard_link or "",
workers=len(info["workers"]),
memory=format_bytes(sum(d["memory_limit"] for d in info["workers"].values())),
cores=cores,
)
if adaptive:
model["adapt"] = {"minimum": adaptive.minimum, "maximum": adaptive.maximum}
return model