Use Expr instead of HLG #14610
3 errors, 1 009 fail, 259 skipped, 2 252 pass in 15h 17m 10s
Annotations
Check warning on line 0 in distributed.cli.tests.test_dask_worker
github-actions / Unit Test Results
1 out of 5 runs failed: test_contact_listen_address[tcp://0.0.0.0:---nanny] (distributed.cli.tests.test_dask_worker)
artifacts/ubuntu-latest-mindeps-default-notci1/pytest.xml [took 1s]
Raw output
AttributeError: 'dict' object has no attribute '__dask_graph__'
c = <Client: No scheduler connected>
s = <Scheduler 'tcp://127.0.0.1:43743', workers: 0, cores: 0, tasks: 0>
nanny = '--nanny', listen_address = 'tcp://0.0.0.0:35207'
@pytest.mark.slow
@pytest.mark.skipif(not LINUX, reason="Need 127.0.0.2 to mean localhost")
@pytest.mark.parametrize("nanny", ["--nanny", "--no-nanny"])
@pytest.mark.parametrize("listen_address", ["tcp://0.0.0.0:", "tcp://127.0.0.2:"])
@gen_cluster(client=True, nthreads=[])
async def test_contact_listen_address(c, s, nanny, listen_address):
port = open_port()
listen_address += str(port)
with popen(
[
sys.executable,
"-m",
"dask",
"worker",
s.address,
nanny,
"--no-dashboard",
"--contact-address",
f"tcp://127.0.0.2:{port}",
"--listen-address",
listen_address,
]
):
await c.wait_for_workers(1)
info = c.scheduler_info()
assert info["workers"].keys() == {f"tcp://127.0.0.2:{port}"}
# roundtrip works
> assert await c.submit(lambda x: x + 1, 10) == 11
distributed/cli/tests/test_dask_worker.py:500:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
distributed/client.py:408: in _result
raise exc.with_traceback(tb)
distributed/utils.py:1507: in run_in_executor_with_context
return await loop.run_in_executor(
../../../miniconda3/envs/dask-distributed/lib/python3.10/concurrent/futures/thread.py:58: in run
result = self.fn(*self.args, **self.kwargs)
distributed/utils.py:1508: in <lambda>
executor, lambda: context.run(func, *args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
from __future__ import annotations
import asyncio
import contextlib
import dataclasses
import heapq
import inspect
import itertools
import json
import logging
import math
import operator
import os
import pickle
import random
import textwrap
import uuid
import warnings
import weakref
from abc import abstractmethod
from collections import defaultdict, deque
from collections.abc import (
Callable,
Collection,
Container,
Hashable,
Iterable,
Iterator,
Mapping,
Sequence,
Set,
)
from contextlib import suppress
from functools import partial
from typing import TYPE_CHECKING, Any, ClassVar, Literal, NamedTuple, cast, overload
import psutil
import tornado.web
from sortedcontainers import SortedDict, SortedSet
from tlz import (
concat,
first,
groupby,
merge,
merge_sorted,
merge_with,
partition,
pluck,
second,
take,
valmap,
)
from tornado.ioloop import IOLoop
import dask
import dask.utils
from dask._task_spec import DependenciesMapping, GraphNode, convert_legacy_graph
from dask.core import istask, validate_key
from dask.typing import Key, no_default
from dask.utils import (
_deprecated,
_deprecated_kwarg,
format_bytes,
format_time,
key_split,
parse_bytes,
parse_timedelta,
tmpfile,
)
from dask.widgets import get_template
from distributed import cluster_dump, preloading, profile
from distributed import versions as version_module
from distributed._asyncio import RLock
from distributed._stories import scheduler_story
from distributed.active_memory_manager import ActiveMemoryManagerExtension, RetireWorker
from distributed.batched import BatchedSend
from distributed.broker import Broker
from distributed.client import SourceCode
from distributed.collections import HeapSet
from distributed.comm import (
Comm,
CommClosedError,
get_address_host,
normalize_address,
resolve_address,
unparse_host_port,
)
from distributed.comm.addressing import addresses_from_user_args
from distributed.compatibility import PeriodicCallback
from distributed.core import (
ErrorMessage,
OKMessage,
Status,
clean_exception,
error_message,
rpc,
send_recv,
)
from distributed.diagnostics.memory_sampler import MemorySamplerExtension
from distributed.diagnostics.plugin import SchedulerPlugin, _get_plugin_name
from distributed.event import EventExtension
from distributed.gc import disable_gc_diagnosis, enable_gc_diagnosis
from distributed.http import get_handlers
from distributed.metrics import monotonic, time
from distributed.multi_lock import MultiLockExtension
from distributed.node import ServerNode
from distributed.proctitle import setproctitle
from distributed.protocol import deserialize
from distributed.protocol.pickle import dumps, loads
from distributed.protocol.serialize import Serialized, ToPickle, serialize
from distributed.publish import PublishExtension
from distributed.pubsub import PubSubSchedulerExtension
from distributed.queues import QueueExtension
from distributed.recreate_tasks import ReplayTaskScheduler
from distributed.security import Security
from distributed.semaphore import SemaphoreExtension
from distributed.shuffle import ShuffleSchedulerPlugin
from distributed.spans import SpanMetadata, SpansSchedulerExtension
from distributed.stealing import WorkStealing
from distributed.utils import (
All,
Deadline,
TimeoutError,
format_dashboard_link,
get_fileno_limit,
key_split_group,
log_errors,
offload,
recursive_to_dict,
wait_for,
)
from distributed.utils_comm import (
gather_from_workers,
retry_operation,
scatter_to_workers,
)
from distributed.variable import VariableExtension
if TYPE_CHECKING:
# TODO import from typing (requires Python >=3.10)
# TODO import from typing (requires Python >=3.11)
from typing_extensions import Self, TypeAlias
from dask._expr import Expr
# Not to be confused with distributed.worker_state_machine.TaskStateState
TaskStateState: TypeAlias = Literal[
"released",
"waiting",
"no-worker",
"queued",
"processing",
"memory",
"erred",
"forgotten",
]
ALL_TASK_STATES: Set[TaskStateState] = set(TaskStateState.__args__) # type: ignore
# {task key -> finish state}
# Not to be confused with distributed.worker_state_machine.Recs
Recs: TypeAlias = dict[Key, TaskStateState]
# {client or worker address: [{op: <key>, ...}, ...]}
Msgs: TypeAlias = dict[str, list[dict[str, Any]]]
# (recommendations, client messages, worker messages)
RecsMsgs: TypeAlias = tuple[Recs, Msgs, Msgs]
T_runspec: TypeAlias = GraphNode
logger = logging.getLogger(__name__)
LOG_PDB = dask.config.get("distributed.admin.pdb-on-err")
DEFAULT_DATA_SIZE = parse_bytes(
dask.config.get("distributed.scheduler.default-data-size")
)
STIMULUS_ID_UNSET = "<stimulus_id unset>"
DEFAULT_EXTENSIONS = {
"multi_locks": MultiLockExtension,
"publish": PublishExtension,
"replay-tasks": ReplayTaskScheduler,
"queues": QueueExtension,
"variables": VariableExtension,
"pubsub": PubSubSchedulerExtension,
"semaphores": SemaphoreExtension,
"events": EventExtension,
"amm": ActiveMemoryManagerExtension,
"memory_sampler": MemorySamplerExtension,
"shuffle": ShuffleSchedulerPlugin,
"spans": SpansSchedulerExtension,
"stealing": WorkStealing,
}
class ClientState:
"""A simple object holding information about a client."""
#: A unique identifier for this client. This is generally an opaque
#: string generated by the client itself.
client_key: str
#: Cached hash of :attr:`~ClientState.client_key`
_hash: int
#: A set of tasks this client wants to be kept in memory, so that it can download
#: its result when desired. This is the reverse mapping of
#: :class:`TaskState.who_wants`. Tasks are typically removed from this set when the
#: corresponding object in the client's space (for example a ``Future`` or a Dask
#: collection) gets garbage-collected.
wants_what: set[TaskState]
#: The last time we received a heartbeat from this client, in local scheduler time.
last_seen: float
#: Output of :func:`distributed.versions.get_versions` on the client
versions: dict[str, Any]
__slots__ = tuple(__annotations__)
def __init__(self, client: str, *, versions: dict[str, Any] | None = None):
self.client_key = client
self._hash = hash(client)
self.wants_what = set()
self.last_seen = time()
self.versions = versions or {}
def __hash__(self) -> int:
return self._hash
def __eq__(self, other: object) -> bool:
if not isinstance(other, ClientState):
return False
return self.client_key == other.client_key
def __repr__(self) -> str:
return f"<Client {self.client_key!r}>"
def __str__(self) -> str:
return self.client_key
def _to_dict_no_nest(self, *, exclude: Container[str] = ()) -> dict:
"""Dictionary representation for debugging purposes.
Not type stable and not intended for roundtrips.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
TaskState._to_dict
"""
return recursive_to_dict(
self,
exclude=set(exclude) | {"versions"}, # type: ignore
members=True,
)
class MemoryState:
"""Memory readings on a worker or on the whole cluster.
See :doc:`worker-memory`.
Attributes / properties:
managed_total
Sum of the output of sizeof() for all dask keys held by the worker in memory,
plus number of bytes spilled to disk
managed
Sum of the output of sizeof() for the dask keys held in RAM. Note that this may
be inaccurate, which may cause inaccurate unmanaged memory (see below).
spilled
Number of bytes for the dask keys spilled to the hard drive.
Note that this is the size on disk; size in memory may be different due to
compression and inaccuracies in sizeof(). In other words, given the same keys,
'managed' will change depending on the keys being in memory or spilled.
process
Total RSS memory measured by the OS on the worker process.
This is always exactly equal to managed + unmanaged.
unmanaged
process - managed. This is the sum of
- Python interpreter and modules
- global variables
- memory temporarily allocated by the dask tasks that are currently running
- memory fragmentation
- memory leaks
- memory not yet garbage collected
- memory not yet free()'d by the Python memory manager to the OS
unmanaged_old
Minimum of the 'unmanaged' measures over the last
``distributed.memory.recent-to-old-time`` seconds
unmanaged_recent
unmanaged - unmanaged_old; in other words process memory that has been recently
allocated but is not accounted for by dask; hopefully it's mostly a temporary
spike.
optimistic
managed + unmanaged_old; in other words the memory held long-term by
the process under the hopeful assumption that all unmanaged_recent memory is a
temporary spike
"""
process: int
unmanaged_old: int
managed: int
spilled: int
__slots__ = tuple(__annotations__)
def __init__(
self,
*,
process: int,
unmanaged_old: int,
managed: int,
spilled: int,
):
# Some data arrives with the heartbeat, some other arrives in realtime as the
# tasks progress. Also, sizeof() is not guaranteed to return correct results.
# This can cause glitches where a partial measure is larger than the whole, so
# we need to force all numbers to add up exactly by definition.
self.process = process
self.managed = min(self.process, managed)
self.spilled = spilled
# Subtractions between unsigned ints guaranteed by construction to be >= 0
self.unmanaged_old = min(unmanaged_old, process - self.managed)
@staticmethod
def sum(*infos: MemoryState) -> MemoryState:
process = 0
unmanaged_old = 0
managed = 0
spilled = 0
for ms in infos:
process += ms.process
unmanaged_old += ms.unmanaged_old
spilled += ms.spilled
managed += ms.managed
return MemoryState(
process=process,
unmanaged_old=unmanaged_old,
managed=managed,
spilled=spilled,
)
@property
def managed_total(self) -> int:
return self.managed + self.spilled
@property
def unmanaged(self) -> int:
# This is never negative thanks to __init__
return self.process - self.managed
@property
def unmanaged_recent(self) -> int:
# This is never negative thanks to __init__
return self.process - self.managed - self.unmanaged_old
@property
def optimistic(self) -> int:
return self.managed + self.unmanaged_old
@property
def managed_in_memory(self) -> int:
warnings.warn("managed_in_memory has been renamed to managed", FutureWarning)
return self.managed
@property
def managed_spilled(self) -> int:
warnings.warn("managed_spilled has been renamed to spilled", FutureWarning)
return self.spilled
def __repr__(self) -> str:
return (
f"Process memory (RSS) : {format_bytes(self.process)}\n"
f" - managed by Dask : {format_bytes(self.managed)}\n"
f" - unmanaged (old) : {format_bytes(self.unmanaged_old)}\n"
f" - unmanaged (recent): {format_bytes(self.unmanaged_recent)}\n"
f"Spilled to disk : {format_bytes(self.spilled)}\n"
)
def _to_dict(self, *, exclude: Container[str] = ()) -> dict:
"""Dictionary representation for debugging purposes.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
"""
return {
k: getattr(self, k)
for k in dir(self)
if not k.startswith("_")
and k not in {"sum", "managed_in_memory", "managed_spilled"}
}
class WorkerState:
"""A simple object holding information about a worker.
Not to be confused with :class:`distributed.worker_state_machine.WorkerState`.
"""
#: This worker's unique key. This can be its connected address
#: (such as ``"tcp://127.0.0.1:8891"``) or an alias (such as ``"alice"``).
address: str
pid: int
name: Hashable
#: The number of CPU threads made available on this worker
nthreads: int
#: Memory available to the worker, in bytes
memory_limit: int
local_directory: str
services: dict[str, int]
#: Output of :meth:`distributed.versions.get_versions` on the worker
versions: dict[str, Any]
#: Address of the associated :class:`~distributed.nanny.Nanny`, if present
nanny: str | None
#: Read-only worker status, synced one way from the remote Worker object
status: Status
#: Cached hash of :attr:`~WorkerState.server_id`
_hash: int
#: The total memory size, in bytes, used by the tasks this worker holds in memory
#: (i.e. the tasks in this worker's :attr:`~WorkerState.has_what`).
nbytes: int
#: Worker memory unknown to the worker, in bytes, which has been there for more than
#: 30 seconds. See :class:`MemoryState`.
_memory_unmanaged_old: int
#: History of the last 30 seconds' worth of unmanaged memory. Used to differentiate
#: between "old" and "new" unmanaged memory.
#: Format: ``[(timestamp, bytes), (timestamp, bytes), ...]``
_memory_unmanaged_history: deque[tuple[float, int]]
metrics: dict[str, Any]
#: The last time we received a heartbeat from this worker, in local scheduler time.
last_seen: float
time_delay: float
bandwidth: float
#: A set of all TaskStates on this worker that are actors. This only includes those
#: actors whose state actually lives on this worker, not actors to which this worker
#: has a reference.
actors: set[TaskState]
#: Underlying data of :meth:`WorkerState.has_what`
_has_what: dict[TaskState, None]
#: A set of tasks that have been submitted to this worker. Multiple tasks may be
# submitted to a worker in advance and the worker will run them eventually,
# depending on its execution resources (but see :doc:`work-stealing`).
#:
#: All the tasks here are in the "processing" state.
#: This attribute is kept in sync with :attr:`TaskState.processing_on`.
processing: set[TaskState]
#: Running tasks that invoked :func:`distributed.secede`
long_running: set[TaskState]
#: A dictionary of tasks that are currently being run on this worker.
#: Each task state is associated with the duration in seconds which the task has
#: been running.
executing: dict[TaskState, float]
#: The available resources on this worker, e.g. ``{"GPU": 2}``.
#: These are abstract quantities that constrain certain tasks from running at the
#: same time on this worker.
resources: dict[str, float]
#: The sum of each resource used by all tasks allocated to this worker.
#: The numbers in this dictionary can only be less or equal than those in this
#: worker's :attr:`~WorkerState.resources`.
used_resources: dict[str, float]
#: Arbitrary additional metadata to be added to :meth:`~WorkerState.identity`
extra: dict[str, Any]
# The unique server ID this WorkerState is referencing
server_id: str
# Reference to scheduler task_groups
scheduler_ref: weakref.ref[SchedulerState] | None
task_prefix_count: defaultdict[str, int]
_network_occ: int
_occupancy_cache: float | None
#: Keys that may need to be fetched to this worker, and the number of tasks that need them.
#: All tasks are currently in `memory` on a worker other than this one.
#: Much like `processing`, this does not exactly reflect worker state:
#: keys here may be queued to fetch, in flight, or already in memory
#: on the worker.
needs_what: dict[TaskState, int]
__slots__ = tuple(__annotations__)
def __init__(
self,
*,
address: str,
status: Status,
pid: int,
name: object,
nthreads: int = 0,
memory_limit: int,
local_directory: str,
nanny: str | None,
server_id: str,
services: dict[str, int] | None = None,
versions: dict[str, Any] | None = None,
extra: dict[str, Any] | None = None,
scheduler: SchedulerState | None = None,
):
self.server_id = server_id
self.address = address
self.pid = pid
self.name = name
self.nthreads = nthreads
self.memory_limit = memory_limit
self.local_directory = local_directory
self.services = services or {}
self.versions = versions or {}
self.nanny = nanny
self.status = status
self._hash = hash(self.server_id)
self.nbytes = 0
self._memory_unmanaged_old = 0
self._memory_unmanaged_history = deque()
self.metrics = {}
self.last_seen = time()
self.time_delay = 0
self.bandwidth = parse_bytes(dask.config.get("distributed.scheduler.bandwidth"))
self.actors = set()
self._has_what = {}
self.processing = set()
self.long_running = set()
self.executing = {}
self.resources = {}
self.used_resources = {}
self.extra = extra or {}
self.scheduler_ref = weakref.ref(scheduler) if scheduler else None
self.task_prefix_count = defaultdict(int)
self.needs_what = {}
self._network_occ = 0
self._occupancy_cache = None
def __hash__(self) -> int:
return self._hash
def __eq__(self, other: object) -> bool:
return self is other or (
isinstance(other, WorkerState) and other.server_id == self.server_id
)
@property
def has_what(self) -> Set[TaskState]:
"""An insertion-sorted set-like of tasks which currently reside on this worker.
All the tasks here are in the "memory" state.
This is the reverse mapping of :attr:`TaskState.who_has`.
This is a read-only public accessor. The data is implemented as a dict without
values, because rebalance() relies on dicts being insertion-sorted.
"""
return self._has_what.keys()
@property
def host(self) -> str:
return get_address_host(self.address)
@property
def memory(self) -> MemoryState:
"""Polished memory metrics for the worker.
**Design note on managed memory**
There are two measures available for managed memory:
- ``self.nbytes``
- ``self.metrics["managed_bytes"]``
At rest, the two numbers must be identical. However, ``self.nbytes`` is
immediately updated through the batched comms as soon as each task lands in
memory on the worker; ``self.metrics["managed_bytes"]`` instead is updated by
the heartbeat, which can lag several seconds behind.
Below we are mixing likely newer managed memory info from ``self.nbytes`` with
process and spilled memory from the heartbeat. This is deliberate, so that
managed memory total is updated more frequently.
Managed memory directly and immediately contributes to optimistic memory, which
is in turn used in Active Memory Manager heuristics (at the moment of writing;
more uses will likely be added in the future). So it's important to have it
up to date; much more than it is for process memory.
Having up-to-date managed memory info as soon as the scheduler learns about
task completion also substantially simplifies unit tests.
The flip side of this design is that it may cause some noise in the
unmanaged_recent measure. e.g.:
1. Delete 100MB of managed data
2. The updated managed memory reaches the scheduler faster than the
updated process memory
3. There's a blip where the scheduler thinks that there's a sudden 100MB
increase in unmanaged_recent, since process memory hasn't changed but managed
memory has decreased by 100MB
4. When the heartbeat arrives, process memory goes down and so does the
unmanaged_recent.
This is OK - one of the main reasons for the unmanaged_recent / unmanaged_old
split is exactly to concentrate all the noise in unmanaged_recent and exclude it
from optimistic memory, which is used for heuristics.
Something that is less OK, but also less frequent, is that the sudden deletion
of spilled keys will cause a negative blip in managed memory:
1. Delete 100MB of spilled data
2. The updated managed memory *total* reaches the scheduler faster than the
updated spilled portion
3. This causes the managed memory to temporarily plummet and be replaced by
unmanaged_recent, while spilled memory remains unaltered
4. When the heartbeat arrives, managed goes back up, unmanaged_recent
goes back down, and spilled goes down by 100MB as it should have to
begin with.
:issue:`6002` will let us solve this.
"""
return MemoryState(
process=self.metrics["memory"],
managed=max(0, self.nbytes - self.metrics["spilled_bytes"]["memory"]),
spilled=self.metrics["spilled_bytes"]["disk"],
unmanaged_old=self._memory_unmanaged_old,
)
def clean(self) -> WorkerState:
"""Return a version of this object that is appropriate for serialization"""
ws = WorkerState(
address=self.address,
status=self.status,
pid=self.pid,
name=self.name,
nthreads=self.nthreads,
memory_limit=self.memory_limit,
local_directory=self.local_directory,
services=self.services,
nanny=self.nanny,
extra=self.extra,
server_id=self.server_id,
)
ws._occupancy_cache = self.occupancy
ws.executing = {ts.key: duration for ts, duration in self.executing.items()} # type: ignore
return ws
def __repr__(self) -> str:
name = f", name: {self.name}" if self.name != self.address else ""
return (
f"<WorkerState {self.address!r}{name}, "
f"status: {self.status.name}, "
f"memory: {len(self.has_what)}, "
f"processing: {len(self.processing)}>"
)
def _repr_html_(self) -> str:
return get_template("worker_state.html.j2").render(
address=self.address,
name=self.name,
status=self.status.name,
has_what=self.has_what,
processing=self.processing,
)
def identity(self) -> dict[str, Any]:
return {
"type": "Worker",
"id": self.name,
"host": self.host,
"resources": self.resources,
"local_directory": self.local_directory,
"name": self.name,
"nthreads": self.nthreads,
"memory_limit": self.memory_limit,
"last_seen": self.last_seen,
"services": self.services,
"metrics": self.metrics,
"status": self.status.name,
"nanny": self.nanny,
**self.extra,
}
def _to_dict_no_nest(self, *, exclude: Container[str] = ()) -> dict[str, Any]:
"""Dictionary representation for debugging purposes.
Not type stable and not intended for roundtrips.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
TaskState._to_dict
"""
return recursive_to_dict(
self,
exclude=set(exclude) | {"versions"}, # type: ignore
members=True,
)
@property
def scheduler(self) -> SchedulerState:
assert self.scheduler_ref
s = self.scheduler_ref()
assert s
return s
def add_to_processing(self, ts: TaskState) -> None:
"""Assign a task to this worker for compute."""
if self.scheduler.validate:
assert ts not in self.processing
tp = ts.prefix
self.task_prefix_count[tp.name] += 1
self.scheduler._task_prefix_count_global[tp.name] += 1
self.processing.add(ts)
for dts in ts.dependencies:
assert dts.who_has
if self not in dts.who_has:
self._inc_needs_replica(dts)
def add_to_long_running(self, ts: TaskState) -> None:
if self.scheduler.validate:
assert ts in self.processing
assert ts not in self.long_running
self._remove_from_task_prefix_count(ts)
# Cannot remove from processing since we're using this for things like
# idleness detection. Idle workers are typically targeted for
# downscaling but we should not downscale workers with long running
# tasks
self.long_running.add(ts)
def remove_from_processing(self, ts: TaskState) -> None:
"""Remove a task from a workers processing"""
if self.scheduler.validate:
assert ts in self.processing
if ts in self.long_running:
self.long_running.discard(ts)
else:
self._remove_from_task_prefix_count(ts)
self.processing.remove(ts)
for dts in ts.dependencies:
if dts in self.needs_what:
self._dec_needs_replica(dts)
def _remove_from_task_prefix_count(self, ts: TaskState) -> None:
prefix_name = ts.prefix.name
count = self.task_prefix_count[prefix_name] - 1
tp_count = self.task_prefix_count
tp_count_global = self.scheduler._task_prefix_count_global
if count:
tp_count[prefix_name] = count
else:
del tp_count[prefix_name]
count = tp_count_global[prefix_name] - 1
if count:
tp_count_global[prefix_name] = count
else:
del tp_count_global[prefix_name]
def remove_replica(self, ts: TaskState) -> None:
"""The worker no longer has a task in memory"""
if self.scheduler.validate:
assert ts.who_has
assert self in ts.who_has
assert ts in self.has_what
assert ts not in self.needs_wh…cheduler, title="Scheduler Profile (administrative)"
)
task_stream = TabPanel(child=task_stream, title="Task Stream")
bandwidth_workers = TabPanel(
child=bandwidth_workers.root, title="Bandwidth (Workers)"
)
bandwidth_types = TabPanel(
child=bandwidth_types.root, title="Bandwidth (Types)"
)
system = TabPanel(child=sysmon.root, title="System")
logs = TabPanel(child=logs.root, title="Scheduler Logs")
tabs = Tabs(
tabs=[
html,
task_stream,
system,
logs,
compute,
workers,
scheduler,
bandwidth_workers,
bandwidth_types,
],
sizing_mode="stretch_both",
)
from bokeh.core.templates import get_env
from bokeh.plotting import output_file, save
with tmpfile(extension=".html") as fn:
output_file(filename=fn, title="Dask Performance Report", mode=mode)
template_directory = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "dashboard", "templates"
)
template_environment = get_env()
template_environment.loader.searchpath.append(template_directory)
template = template_environment.get_template("performance_report.html")
save(tabs, filename=fn, template=template)
with open(fn) as f:
data = f.read()
return data
async def get_worker_logs(self, n=None, workers=None, nanny=False):
results = await self.broadcast(
msg={"op": "get_logs", "n": n}, workers=workers, nanny=nanny
)
return results
def log_event(self, topic: str | Collection[str], msg: Any) -> None:
"""Log an event under a given topic
Parameters
----------
topic : str, list[str]
Name of the topic under which to log an event. To log the same
event under multiple topics, pass a list of topic names.
msg
Event message to log. Note this must be msgpack serializable.
See also
--------
Client.log_event
"""
self._broker.publish(topic, msg)
def subscribe_topic(self, topic: str, client: str) -> None:
self._broker.subscribe(topic, client)
def unsubscribe_topic(self, topic: str, client: str) -> None:
self._broker.unsubscribe(topic, client)
@overload
def get_events(self, topic: str) -> tuple[tuple[float, Any], ...]: ...
@overload
def get_events(self) -> dict[str, tuple[tuple[float, Any], ...]]: ...
def get_events(
self, topic: str | None = None
) -> tuple[tuple[float, Any], ...] | dict[str, tuple[tuple[float, Any], ...]]:
return self._broker.get_events(topic)
async def get_worker_monitor_info(self, recent=False, starts=None):
if starts is None:
starts = {}
results = await asyncio.gather(
*(
self.rpc(w).get_monitor_info(recent=recent, start=starts.get(w, 0))
for w in self.workers
)
)
return dict(zip(self.workers, results))
###########
# Cleanup #
###########
@log_errors
async def check_worker_ttl(self) -> None:
now = time()
stimulus_id = f"check-worker-ttl-{now}"
assert self.worker_ttl
ttl = max(self.worker_ttl, 10 * heartbeat_interval(len(self.workers)))
to_restart = []
for ws in self.workers.values():
last_seen = now - ws.last_seen
if last_seen > ttl:
to_restart.append(ws.address)
logger.warning(
f"Worker failed to heartbeat for {last_seen:.0f}s; "
f"{'attempting restart' if ws.nanny else 'removing'}: {ws}"
)
if to_restart:
self.log_event(
"scheduler",
{
"action": "worker-ttl-timed-out",
"workers": to_restart.copy(),
"ttl": ttl,
},
)
await self.restart_workers(
to_restart,
wait_for_workers=False,
stimulus_id=stimulus_id,
)
def check_idle(self) -> float | None:
if self.status in (Status.closing, Status.closed):
return None # pragma: nocover
if self.transition_counter != self._idle_transition_counter:
self._idle_transition_counter = self.transition_counter
self.idle_since = None
return None
if self._active_graph_updates > 0:
self.idle_since = None
return None
if (
self.queued
or self.unrunnable
or any(ws.processing for ws in self.workers.values())
):
self.idle_since = None
return None
if not self.idle_since:
self.idle_since = time()
return self.idle_since
if self.jupyter:
last_activity = (
self._jupyter_server_application.web_app.last_activity().timestamp()
)
if last_activity > self.idle_since:
self.idle_since = last_activity
return self.idle_since
if self.idle_timeout:
if time() > self.idle_since + self.idle_timeout:
assert self.idle_since
logger.info(
"Scheduler closing after being idle for %s",
format_time(self.idle_timeout),
)
self._ongoing_background_tasks.call_soon(
self.close, reason="idle-timeout-exceeded"
)
return self.idle_since
def _check_no_workers(self) -> None:
if (
self.status in (Status.closing, Status.closed)
or self.no_workers_timeout is None
):
return
now = monotonic()
stimulus_id = f"check-no-workers-timeout-{time()}"
recommendations: Recs = {}
self._refresh_no_workers_since(now)
affected = self._check_unrunnable_task_timeouts(
now, recommendations=recommendations, stimulus_id=stimulus_id
)
affected.update(
self._check_queued_task_timeouts(
now, recommendations=recommendations, stimulus_id=stimulus_id
)
)
self.transitions(recommendations, stimulus_id=stimulus_id)
if affected:
self.log_event(
"scheduler",
{"action": "no-workers-timeout-exceeded", "keys": affected},
)
def _check_unrunnable_task_timeouts(
self, timestamp: float, recommendations: Recs, stimulus_id: str
) -> set[Key]:
assert self.no_workers_timeout
unsatisfied = []
no_workers = []
for ts, unrunnable_since in self.unrunnable.items():
if timestamp <= unrunnable_since + self.no_workers_timeout:
# unrunnable is insertion-ordered, which means that unrunnable_since will
# be monotonically increasing in this loop.
break
if (
self._no_workers_since is None
or self._no_workers_since >= unrunnable_since
):
unsatisfied.append(ts)
else:
no_workers.append(ts)
if not unsatisfied and not no_workers:
return set()
for ts in unsatisfied:
e = pickle.dumps(
NoValidWorkerError(
task=ts.key,
host_restrictions=(ts.host_restrictions or set()).copy(),
worker_restrictions=(ts.worker_restrictions or set()).copy(),
resource_restrictions=(ts.resource_restrictions or {}).copy(),
timeout=self.no_workers_timeout,
),
)
r = self.transition(
ts.key,
"erred",
exception=e,
cause=ts.key,
stimulus_id=stimulus_id,
)
recommendations.update(r)
logger.error(
"Task %s marked as failed because it timed out waiting "
"for its restrictions to become satisfied.",
ts.key,
)
self._fail_tasks_after_no_workers_timeout(
no_workers, recommendations, stimulus_id
)
return {ts.key for ts in concat([unsatisfied, no_workers])}
def _check_queued_task_timeouts(
self, timestamp: float, recommendations: Recs, stimulus_id: str
) -> set[Key]:
assert self.no_workers_timeout
if self._no_workers_since is None:
return set()
if timestamp <= self._no_workers_since + self.no_workers_timeout:
return set()
affected = list(self.queued)
self._fail_tasks_after_no_workers_timeout(
affected, recommendations, stimulus_id
)
return {ts.key for ts in affected}
def _fail_tasks_after_no_workers_timeout(
self, timed_out: Iterable[TaskState], recommendations: Recs, stimulus_id: str
) -> None:
assert self.no_workers_timeout
for ts in timed_out:
e = pickle.dumps(
NoWorkerError(
task=ts.key,
timeout=self.no_workers_timeout,
),
)
r = self.transition(
ts.key,
"erred",
exception=e,
cause=ts.key,
stimulus_id=stimulus_id,
)
recommendations.update(r)
logger.error(
"Task %s marked as failed because it timed out waiting "
"without any running workers.",
ts.key,
)
def _refresh_no_workers_since(self, timestamp: float | None = None) -> None:
if self.running or not (self.queued or self.unrunnable):
self._no_workers_since = None
return
if not self._no_workers_since:
self._no_workers_since = timestamp or monotonic()
return
def adaptive_target(self, target_duration=None):
"""Desired number of workers based on the current workload
This looks at the current running tasks and memory use, and returns a
number of desired workers. This is often used by adaptive scheduling.
Parameters
----------
target_duration : str
A desired duration of time for computations to take. This affects
how rapidly the scheduler will ask to scale.
See Also
--------
distributed.deploy.Adaptive
"""
if target_duration is None:
target_duration = dask.config.get("distributed.adaptive.target-duration")
target_duration = parse_timedelta(target_duration)
# CPU
queued = take(100, concat([self.queued, self.unrunnable.keys()]))
queued_occupancy = 0
for ts in queued:
queued_occupancy += self._get_prefix_duration(ts.prefix)
tasks_ready = len(self.queued) + len(self.unrunnable)
if tasks_ready > 100:
queued_occupancy *= tasks_ready / 100
cpu = math.ceil((self.total_occupancy + queued_occupancy) / target_duration)
# Avoid a few long tasks from asking for many cores
for ws in self.workers.values():
if tasks_ready > cpu:
break
tasks_ready += len(ws.processing)
else:
cpu = min(tasks_ready, cpu)
# Divide by average nthreads per worker
if self.workers:
nthreads = sum(ws.nthreads for ws in self.workers.values())
cpu = math.ceil(cpu / nthreads * len(self.workers))
if (self.unrunnable or self.queued) and not self.workers:
cpu = max(1, cpu)
# add more workers if more than 60% of memory is used
limit = sum(ws.memory_limit for ws in self.workers.values())
used = sum(ws.nbytes for ws in self.workers.values())
memory = 0
if used > 0.6 * limit and limit > 0:
memory = 2 * len(self.workers)
target = max(memory, cpu)
if target >= len(self.workers):
return target
else: # Scale down?
to_close = self.workers_to_close()
return len(self.workers) - len(to_close)
def request_acquire_replicas(
self, addr: str, keys: Iterable[Key], *, stimulus_id: str
) -> None:
"""Asynchronously ask a worker to acquire a replica of the listed keys from
other workers. This is a fire-and-forget operation which offers no feedback for
success or failure, and is intended for housekeeping and not for computation.
"""
who_has = {}
nbytes = {}
for key in keys:
ts = self.tasks[key]
assert ts.who_has
who_has[key] = [ws.address for ws in ts.who_has or ()]
nbytes[key] = ts.nbytes
self.stream_comms[addr].send(
{
"op": "acquire-replicas",
"who_has": who_has,
"nbytes": nbytes,
"stimulus_id": stimulus_id,
},
)
def request_remove_replicas(
self, addr: str, keys: list[Key], *, stimulus_id: str
) -> None:
"""Asynchronously ask a worker to discard its replica of the listed keys.
This must never be used to destroy the last replica of a key. This is a
fire-and-forget operation, intended for housekeeping and not for computation.
The replica disappears immediately from TaskState.who_has on the Scheduler side;
if the worker refuses to delete, e.g. because the task is a dependency of
another task running on it, it will (also asynchronously) inform the scheduler
to re-add itself to who_has. If the worker agrees to discard the task, there is
no feedback.
"""
ws = self.workers[addr]
# The scheduler immediately forgets about the replica and suggests the worker to
# drop it. The worker may refuse, at which point it will send back an add-keys
# message to reinstate it.
for key in keys:
ts = self.tasks[key]
if self.validate:
# Do not destroy the last copy
assert ts.who_has
assert len(ts.who_has) > 1
self.remove_replica(ts, ws)
self.stream_comms[addr].send(
{
"op": "remove-replicas",
"keys": keys,
"stimulus_id": stimulus_id,
}
)
def _task_to_report_msg(ts: TaskState) -> dict[str, Any] | None:
if ts.state == "forgotten":
return {"op": "cancelled-keys", "keys": [ts.key]}
elif ts.state == "memory":
return {"op": "key-in-memory", "key": ts.key}
elif ts.state == "erred":
failing_ts = ts.exception_blame
assert failing_ts
return {
"op": "task-erred",
"key": ts.key,
"exception": failing_ts.exception,
"traceback": failing_ts.traceback,
}
else:
return None
def _task_to_client_msgs(ts: TaskState) -> Msgs:
if ts.who_wants:
report_msg = _task_to_report_msg(ts)
if report_msg is not None:
return {cs.client_key: [report_msg] for cs in ts.who_wants}
return {}
def decide_worker(
ts: TaskState,
all_workers: set[WorkerState],
valid_workers: set[WorkerState] | None,
objective: Callable[[WorkerState], Any],
) -> WorkerState | None:
"""
Decide which worker should take task *ts*.
We choose the worker that has the data on which *ts* depends.
If several workers have dependencies then we choose the less-busy worker.
Optionally provide *valid_workers* of where jobs are allowed to occur
(if all workers are allowed to take the task, pass None instead).
If the task requires data communication because no eligible worker has
all the dependencies already, then we choose to minimize the number
of bytes sent between workers. This is determined by calling the
*objective* function.
"""
assert all(dts.who_has for dts in ts.dependencies)
if ts.actor:
candidates = all_workers.copy()
else:
candidates = {wws for dts in ts.dependencies for wws in dts.who_has or ()}
candidates &= all_workers
if valid_workers is None:
if not candidates:
candidates = all_workers.copy()
else:
candidates &= valid_workers
if not candidates:
candidates = valid_workers
if not candidates:
if ts.loose_restrictions:
return decide_worker(ts, all_workers, None, objective)
if not candidates:
return None
elif len(candidates) == 1:
return next(iter(candidates))
else:
return min(candidates, key=objective)
def validate_task_state(ts: TaskState) -> None:
"""Validate the given TaskState"""
assert ts.state in ALL_TASK_STATES, ts
if ts.waiting_on:
assert ts.waiting_on.issubset(ts.dependencies), (
"waiting not subset of dependencies",
str(ts.waiting_on),
str(ts.dependencies),
)
if ts.waiters:
assert ts.waiters.issubset(ts.dependents), (
"waiters not subset of dependents",
str(ts.waiters),
str(ts.dependents),
)
for dts in ts.waiting_on or ():
assert not dts.who_has, ("waiting on in-memory dep", str(ts), str(dts))
assert dts.state != "released", ("waiting on released dep", str(ts), str(dts))
for dts in ts.dependencies:
assert ts in dts.dependents, (
"not in dependency's dependents",
str(ts),
str(dts),
str(dts.dependents),
)
if ts.state in ("waiting", "queued", "processing", "no-worker"):
assert ts.waiting_on and dts in ts.waiting_on or dts.who_has, (
"dep missing",
str(ts),
str(dts),
)
assert dts.state != "forgotten"
for dts in ts.waiters or ():
assert dts.state in ("waiting", "queued", "processing", "no-worker"), (
"waiter not in play",
str(ts),
str(dts),
)
for dts in ts.dependents:
assert ts in dts.dependencies, (
"not in dependent's dependencies",
str(ts),
str(dts),
str(dts.dependencies),
)
assert dts.state != "forgotten"
assert (ts.processing_on is not None) == (ts.state == "processing")
assert bool(ts.who_has) == (ts.state == "memory"), (ts, ts.who_has, ts.state)
if ts.state == "queued":
assert not ts.processing_on
assert not ts.who_has
assert all(dts.who_has for dts in ts.dependencies), (
"task queued without all deps",
str(ts),
str(ts.dependencies),
)
if ts.state == "processing":
assert all(dts.who_has for dts in ts.dependencies), (
"task processing without all deps",
str(ts),
str(ts.dependencies),
)
assert not ts.waiting_on
if ts.who_has:
assert ts.waiters or ts.who_wants, (
"unneeded task in memory",
str(ts),
str(ts.who_has),
)
if ts.run_spec: # was computed
assert ts.type
assert isinstance(ts.type, str)
assert not any(
[
ts in dts.waiting_on
for dts in ts.dependents
if dts.waiting_on is not None
]
)
for ws in ts.who_has:
assert ts in ws.has_what, (
"not in who_has' has_what",
str(ts),
str(ws),
str(ws.has_what),
)
for cs in ts.who_wants or ():
assert ts in cs.wants_what, (
"not in who_wants' wants_what",
str(ts),
str(cs),
str(cs.wants_what),
)
if ts.actor:
if ts.state == "memory":
assert ts.who_has
assert sum(ts in ws.actors for ws in ts.who_has) == 1
if ts.state == "processing":
assert ts.processing_on
assert ts in ts.processing_on.actors
assert ts.state != "queued"
def validate_unrunnable(unrunnable: dict[TaskState, float]) -> None:
prev_unrunnable_since: float | None = None
prev_ts: TaskState | None = None
for ts, unrunnable_since in unrunnable.items():
assert ts.state == "no-worker"
if prev_ts is not None:
assert prev_unrunnable_since is not None
# Ensure that unrunnable_since is monotonically increasing when iterating over unrunnable.
# _check_no_workers relies on this.
assert prev_unrunnable_since <= unrunnable_since, (
prev_ts,
ts,
prev_unrunnable_since,
unrunnable_since,
)
prev_ts = ts
prev_unrunnable_since = unrunnable_since
def validate_worker_state(ws: WorkerState) -> None:
for ts in ws.has_what or ():
assert ts.who_has
assert ws in ts.who_has, (
"not in has_what' who_has",
str(ws),
str(ts),
str(ts.who_has),
)
for ts in ws.actors:
assert ts.state in ("memory", "processing")
def validate_state(
tasks: dict[Key, TaskState],
workers: dict[str, WorkerState],
clients: dict[str, ClientState],
) -> None:
"""Validate a current runtime state.
This performs a sequence of checks on the entire graph, running in about linear
time. This raises assert errors if anything doesn't check out.
"""
for ts in tasks.values():
validate_task_state(ts)
for ws in workers.values():
validate_worker_state(ws)
for cs in clients.values():
for ts in cs.wants_what or ():
assert ts.who_wants
assert cs in ts.who_wants, (
"not in wants_what' who_wants",
str(cs),
str(ts),
str(ts.who_wants),
)
def heartbeat_interval(n: int) -> float:
"""Interval in seconds that we desire heartbeats based on number of workers"""
if n <= 10:
return 0.5
elif n < 50:
return 1
elif n < 200:
return 2
else:
# No more than 200 heartbeats a second scaled by workers
return n / 200 + 1
def _task_slots_available(ws: WorkerState, saturation_factor: float) -> int:
"""Number of tasks that can be sent to this worker without oversaturating it"""
assert not math.isinf(saturation_factor)
return max(math.ceil(saturation_factor * ws.nthreads), 1) - (
len(ws.processing) - len(ws.long_running)
)
def _worker_full(ws: WorkerState, saturation_factor: float) -> bool:
if math.isinf(saturation_factor):
return False
return _task_slots_available(ws, saturation_factor) <= 0
class KilledWorker(Exception):
def __init__(self, task: Key, last_worker: WorkerState, allowed_failures: int):
super().__init__(task, last_worker, allowed_failures)
@property
def task(self) -> Key:
return self.args[0]
@property
def last_worker(self) -> WorkerState:
return self.args[1]
@property
def allowed_failures(self) -> int:
return self.args[2]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} on {self.allowed_failures + 1} "
"different workers, but all those workers died while running it. "
f"The last worker that attempt to run the task was {self.last_worker.address}. "
"Inspecting worker logs is often a good next step to diagnose what went wrong. "
"For more information see https://distributed.dask.org/en/stable/killed.html."
)
class NoValidWorkerError(Exception):
def __init__(
self,
task: Key,
host_restrictions: set[str],
worker_restrictions: set[str],
resource_restrictions: dict[str, float],
timeout: float,
):
super().__init__(
task, host_restrictions, worker_restrictions, resource_restrictions, timeout
)
@property
def task(self) -> Key:
return self.args[0]
@property
def host_restrictions(self) -> Any:
return self.args[1]
@property
def worker_restrictions(self) -> Any:
return self.args[2]
@property
def resource_restrictions(self) -> Any:
return self.args[3]
@property
def timeout(self) -> float:
return self.args[4]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} but timed out after {format_time(self.timeout)} "
"waiting for a valid worker matching all restrictions.\n\nRestrictions:\n"
f"host_restrictions={self.host_restrictions!s}\n"
f"worker_restrictions={self.worker_restrictions!s}\n"
f"resource_restrictions={self.resource_restrictions!s}\n"
)
class NoWorkerError(Exception):
def __init__(self, task: Key, timeout: float):
super().__init__(task, timeout)
@property
def task(self) -> Key:
return self.args[0]
@property
def timeout(self) -> float:
return self.args[1]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} but timed out after {format_time(self.timeout)} "
"waiting without any running workers."
)
class WorkerStatusPlugin(SchedulerPlugin):
"""A plugin to share worker status with a remote observer
This is used in cluster managers to keep updated about the status of the scheduler.
"""
name: ClassVar[str] = "worker-status"
bcomm: BatchedSend
def __init__(self, scheduler: Scheduler, comm: Comm):
self.bcomm = BatchedSend(interval="5ms")
self.bcomm.start(comm)
scheduler.add_plugin(self)
def add_worker(self, scheduler: Scheduler, worker: str) -> None:
ident = scheduler.workers[worker].identity()
del ident["metrics"]
del ident["last_seen"]
try:
self.bcomm.send(["add", {"workers": {worker: ident}}])
except CommClosedError:
scheduler.remove_plugin(name=self.name)
def remove_worker(self, scheduler: Scheduler, worker: str, **kwargs: Any) -> None:
try:
self.bcomm.send(["remove", worker])
except CommClosedError:
scheduler.remove_plugin(name=self.name)
def teardown(self) -> None:
self.bcomm.close()
class CollectTaskMetaDataPlugin(SchedulerPlugin):
scheduler: Scheduler
name: str
keys: set[Key]
metadata: dict[Key, Any]
state: dict[Key, TaskStateState]
def __init__(self, scheduler: Scheduler, name: str):
self.scheduler = scheduler
self.name = name
self.keys = set()
self.metadata = {}
self.state = {}
def update_graph(
self,
scheduler: Scheduler,
*,
keys: set[Key],
**kwargs: Any,
) -> None:
self.keys.update(keys)
def transition(
self,
key: Key,
start: TaskStateState,
finish: TaskStateState,
*args: Any,
**kwargs: Any,
) -> None:
if finish in ("memory", "erred"):
ts = self.scheduler.tasks.get(key)
if ts is not None and ts.key in self.keys:
self.metadata[key] = ts.metadata
self.state[key] = finish
self.keys.discard(key)
def _materialize_graph(
expr: Expr,
global_annotations: dict[str, Any],
validate: bool,
) -> tuple[dict[Key, T_runspec], dict[Key, set[Key]], dict[str, dict[Key, Any]]]:
> dsk: dict = expr.__dask_graph__()
E AttributeError: 'dict' object has no attribute '__dask_graph__'
distributed/scheduler.py:9383: AttributeError
Check warning on line 0 in distributed.cli.tests.test_dask_worker
github-actions / Unit Test Results
1 out of 5 runs failed: test_contact_listen_address[tcp://0.0.0.0:---no-nanny] (distributed.cli.tests.test_dask_worker)
artifacts/ubuntu-latest-mindeps-default-notci1/pytest.xml [took 0s]
Raw output
AttributeError: 'dict' object has no attribute '__dask_graph__'
c = <Client: No scheduler connected>
s = <Scheduler 'tcp://127.0.0.1:40515', workers: 0, cores: 0, tasks: 0>
nanny = '--no-nanny', listen_address = 'tcp://0.0.0.0:54333'
@pytest.mark.slow
@pytest.mark.skipif(not LINUX, reason="Need 127.0.0.2 to mean localhost")
@pytest.mark.parametrize("nanny", ["--nanny", "--no-nanny"])
@pytest.mark.parametrize("listen_address", ["tcp://0.0.0.0:", "tcp://127.0.0.2:"])
@gen_cluster(client=True, nthreads=[])
async def test_contact_listen_address(c, s, nanny, listen_address):
port = open_port()
listen_address += str(port)
with popen(
[
sys.executable,
"-m",
"dask",
"worker",
s.address,
nanny,
"--no-dashboard",
"--contact-address",
f"tcp://127.0.0.2:{port}",
"--listen-address",
listen_address,
]
):
await c.wait_for_workers(1)
info = c.scheduler_info()
assert info["workers"].keys() == {f"tcp://127.0.0.2:{port}"}
# roundtrip works
> assert await c.submit(lambda x: x + 1, 10) == 11
distributed/cli/tests/test_dask_worker.py:500:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
distributed/client.py:408: in _result
raise exc.with_traceback(tb)
distributed/utils.py:1507: in run_in_executor_with_context
return await loop.run_in_executor(
../../../miniconda3/envs/dask-distributed/lib/python3.10/concurrent/futures/thread.py:58: in run
result = self.fn(*self.args, **self.kwargs)
distributed/utils.py:1508: in <lambda>
executor, lambda: context.run(func, *args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
from __future__ import annotations
import asyncio
import contextlib
import dataclasses
import heapq
import inspect
import itertools
import json
import logging
import math
import operator
import os
import pickle
import random
import textwrap
import uuid
import warnings
import weakref
from abc import abstractmethod
from collections import defaultdict, deque
from collections.abc import (
Callable,
Collection,
Container,
Hashable,
Iterable,
Iterator,
Mapping,
Sequence,
Set,
)
from contextlib import suppress
from functools import partial
from typing import TYPE_CHECKING, Any, ClassVar, Literal, NamedTuple, cast, overload
import psutil
import tornado.web
from sortedcontainers import SortedDict, SortedSet
from tlz import (
concat,
first,
groupby,
merge,
merge_sorted,
merge_with,
partition,
pluck,
second,
take,
valmap,
)
from tornado.ioloop import IOLoop
import dask
import dask.utils
from dask._task_spec import DependenciesMapping, GraphNode, convert_legacy_graph
from dask.core import istask, validate_key
from dask.typing import Key, no_default
from dask.utils import (
_deprecated,
_deprecated_kwarg,
format_bytes,
format_time,
key_split,
parse_bytes,
parse_timedelta,
tmpfile,
)
from dask.widgets import get_template
from distributed import cluster_dump, preloading, profile
from distributed import versions as version_module
from distributed._asyncio import RLock
from distributed._stories import scheduler_story
from distributed.active_memory_manager import ActiveMemoryManagerExtension, RetireWorker
from distributed.batched import BatchedSend
from distributed.broker import Broker
from distributed.client import SourceCode
from distributed.collections import HeapSet
from distributed.comm import (
Comm,
CommClosedError,
get_address_host,
normalize_address,
resolve_address,
unparse_host_port,
)
from distributed.comm.addressing import addresses_from_user_args
from distributed.compatibility import PeriodicCallback
from distributed.core import (
ErrorMessage,
OKMessage,
Status,
clean_exception,
error_message,
rpc,
send_recv,
)
from distributed.diagnostics.memory_sampler import MemorySamplerExtension
from distributed.diagnostics.plugin import SchedulerPlugin, _get_plugin_name
from distributed.event import EventExtension
from distributed.gc import disable_gc_diagnosis, enable_gc_diagnosis
from distributed.http import get_handlers
from distributed.metrics import monotonic, time
from distributed.multi_lock import MultiLockExtension
from distributed.node import ServerNode
from distributed.proctitle import setproctitle
from distributed.protocol import deserialize
from distributed.protocol.pickle import dumps, loads
from distributed.protocol.serialize import Serialized, ToPickle, serialize
from distributed.publish import PublishExtension
from distributed.pubsub import PubSubSchedulerExtension
from distributed.queues import QueueExtension
from distributed.recreate_tasks import ReplayTaskScheduler
from distributed.security import Security
from distributed.semaphore import SemaphoreExtension
from distributed.shuffle import ShuffleSchedulerPlugin
from distributed.spans import SpanMetadata, SpansSchedulerExtension
from distributed.stealing import WorkStealing
from distributed.utils import (
All,
Deadline,
TimeoutError,
format_dashboard_link,
get_fileno_limit,
key_split_group,
log_errors,
offload,
recursive_to_dict,
wait_for,
)
from distributed.utils_comm import (
gather_from_workers,
retry_operation,
scatter_to_workers,
)
from distributed.variable import VariableExtension
if TYPE_CHECKING:
# TODO import from typing (requires Python >=3.10)
# TODO import from typing (requires Python >=3.11)
from typing_extensions import Self, TypeAlias
from dask._expr import Expr
# Not to be confused with distributed.worker_state_machine.TaskStateState
TaskStateState: TypeAlias = Literal[
"released",
"waiting",
"no-worker",
"queued",
"processing",
"memory",
"erred",
"forgotten",
]
ALL_TASK_STATES: Set[TaskStateState] = set(TaskStateState.__args__) # type: ignore
# {task key -> finish state}
# Not to be confused with distributed.worker_state_machine.Recs
Recs: TypeAlias = dict[Key, TaskStateState]
# {client or worker address: [{op: <key>, ...}, ...]}
Msgs: TypeAlias = dict[str, list[dict[str, Any]]]
# (recommendations, client messages, worker messages)
RecsMsgs: TypeAlias = tuple[Recs, Msgs, Msgs]
T_runspec: TypeAlias = GraphNode
logger = logging.getLogger(__name__)
LOG_PDB = dask.config.get("distributed.admin.pdb-on-err")
DEFAULT_DATA_SIZE = parse_bytes(
dask.config.get("distributed.scheduler.default-data-size")
)
STIMULUS_ID_UNSET = "<stimulus_id unset>"
DEFAULT_EXTENSIONS = {
"multi_locks": MultiLockExtension,
"publish": PublishExtension,
"replay-tasks": ReplayTaskScheduler,
"queues": QueueExtension,
"variables": VariableExtension,
"pubsub": PubSubSchedulerExtension,
"semaphores": SemaphoreExtension,
"events": EventExtension,
"amm": ActiveMemoryManagerExtension,
"memory_sampler": MemorySamplerExtension,
"shuffle": ShuffleSchedulerPlugin,
"spans": SpansSchedulerExtension,
"stealing": WorkStealing,
}
class ClientState:
"""A simple object holding information about a client."""
#: A unique identifier for this client. This is generally an opaque
#: string generated by the client itself.
client_key: str
#: Cached hash of :attr:`~ClientState.client_key`
_hash: int
#: A set of tasks this client wants to be kept in memory, so that it can download
#: its result when desired. This is the reverse mapping of
#: :class:`TaskState.who_wants`. Tasks are typically removed from this set when the
#: corresponding object in the client's space (for example a ``Future`` or a Dask
#: collection) gets garbage-collected.
wants_what: set[TaskState]
#: The last time we received a heartbeat from this client, in local scheduler time.
last_seen: float
#: Output of :func:`distributed.versions.get_versions` on the client
versions: dict[str, Any]
__slots__ = tuple(__annotations__)
def __init__(self, client: str, *, versions: dict[str, Any] | None = None):
self.client_key = client
self._hash = hash(client)
self.wants_what = set()
self.last_seen = time()
self.versions = versions or {}
def __hash__(self) -> int:
return self._hash
def __eq__(self, other: object) -> bool:
if not isinstance(other, ClientState):
return False
return self.client_key == other.client_key
def __repr__(self) -> str:
return f"<Client {self.client_key!r}>"
def __str__(self) -> str:
return self.client_key
def _to_dict_no_nest(self, *, exclude: Container[str] = ()) -> dict:
"""Dictionary representation for debugging purposes.
Not type stable and not intended for roundtrips.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
TaskState._to_dict
"""
return recursive_to_dict(
self,
exclude=set(exclude) | {"versions"}, # type: ignore
members=True,
)
class MemoryState:
"""Memory readings on a worker or on the whole cluster.
See :doc:`worker-memory`.
Attributes / properties:
managed_total
Sum of the output of sizeof() for all dask keys held by the worker in memory,
plus number of bytes spilled to disk
managed
Sum of the output of sizeof() for the dask keys held in RAM. Note that this may
be inaccurate, which may cause inaccurate unmanaged memory (see below).
spilled
Number of bytes for the dask keys spilled to the hard drive.
Note that this is the size on disk; size in memory may be different due to
compression and inaccuracies in sizeof(). In other words, given the same keys,
'managed' will change depending on the keys being in memory or spilled.
process
Total RSS memory measured by the OS on the worker process.
This is always exactly equal to managed + unmanaged.
unmanaged
process - managed. This is the sum of
- Python interpreter and modules
- global variables
- memory temporarily allocated by the dask tasks that are currently running
- memory fragmentation
- memory leaks
- memory not yet garbage collected
- memory not yet free()'d by the Python memory manager to the OS
unmanaged_old
Minimum of the 'unmanaged' measures over the last
``distributed.memory.recent-to-old-time`` seconds
unmanaged_recent
unmanaged - unmanaged_old; in other words process memory that has been recently
allocated but is not accounted for by dask; hopefully it's mostly a temporary
spike.
optimistic
managed + unmanaged_old; in other words the memory held long-term by
the process under the hopeful assumption that all unmanaged_recent memory is a
temporary spike
"""
process: int
unmanaged_old: int
managed: int
spilled: int
__slots__ = tuple(__annotations__)
def __init__(
self,
*,
process: int,
unmanaged_old: int,
managed: int,
spilled: int,
):
# Some data arrives with the heartbeat, some other arrives in realtime as the
# tasks progress. Also, sizeof() is not guaranteed to return correct results.
# This can cause glitches where a partial measure is larger than the whole, so
# we need to force all numbers to add up exactly by definition.
self.process = process
self.managed = min(self.process, managed)
self.spilled = spilled
# Subtractions between unsigned ints guaranteed by construction to be >= 0
self.unmanaged_old = min(unmanaged_old, process - self.managed)
@staticmethod
def sum(*infos: MemoryState) -> MemoryState:
process = 0
unmanaged_old = 0
managed = 0
spilled = 0
for ms in infos:
process += ms.process
unmanaged_old += ms.unmanaged_old
spilled += ms.spilled
managed += ms.managed
return MemoryState(
process=process,
unmanaged_old=unmanaged_old,
managed=managed,
spilled=spilled,
)
@property
def managed_total(self) -> int:
return self.managed + self.spilled
@property
def unmanaged(self) -> int:
# This is never negative thanks to __init__
return self.process - self.managed
@property
def unmanaged_recent(self) -> int:
# This is never negative thanks to __init__
return self.process - self.managed - self.unmanaged_old
@property
def optimistic(self) -> int:
return self.managed + self.unmanaged_old
@property
def managed_in_memory(self) -> int:
warnings.warn("managed_in_memory has been renamed to managed", FutureWarning)
return self.managed
@property
def managed_spilled(self) -> int:
warnings.warn("managed_spilled has been renamed to spilled", FutureWarning)
return self.spilled
def __repr__(self) -> str:
return (
f"Process memory (RSS) : {format_bytes(self.process)}\n"
f" - managed by Dask : {format_bytes(self.managed)}\n"
f" - unmanaged (old) : {format_bytes(self.unmanaged_old)}\n"
f" - unmanaged (recent): {format_bytes(self.unmanaged_recent)}\n"
f"Spilled to disk : {format_bytes(self.spilled)}\n"
)
def _to_dict(self, *, exclude: Container[str] = ()) -> dict:
"""Dictionary representation for debugging purposes.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
"""
return {
k: getattr(self, k)
for k in dir(self)
if not k.startswith("_")
and k not in {"sum", "managed_in_memory", "managed_spilled"}
}
class WorkerState:
"""A simple object holding information about a worker.
Not to be confused with :class:`distributed.worker_state_machine.WorkerState`.
"""
#: This worker's unique key. This can be its connected address
#: (such as ``"tcp://127.0.0.1:8891"``) or an alias (such as ``"alice"``).
address: str
pid: int
name: Hashable
#: The number of CPU threads made available on this worker
nthreads: int
#: Memory available to the worker, in bytes
memory_limit: int
local_directory: str
services: dict[str, int]
#: Output of :meth:`distributed.versions.get_versions` on the worker
versions: dict[str, Any]
#: Address of the associated :class:`~distributed.nanny.Nanny`, if present
nanny: str | None
#: Read-only worker status, synced one way from the remote Worker object
status: Status
#: Cached hash of :attr:`~WorkerState.server_id`
_hash: int
#: The total memory size, in bytes, used by the tasks this worker holds in memory
#: (i.e. the tasks in this worker's :attr:`~WorkerState.has_what`).
nbytes: int
#: Worker memory unknown to the worker, in bytes, which has been there for more than
#: 30 seconds. See :class:`MemoryState`.
_memory_unmanaged_old: int
#: History of the last 30 seconds' worth of unmanaged memory. Used to differentiate
#: between "old" and "new" unmanaged memory.
#: Format: ``[(timestamp, bytes), (timestamp, bytes), ...]``
_memory_unmanaged_history: deque[tuple[float, int]]
metrics: dict[str, Any]
#: The last time we received a heartbeat from this worker, in local scheduler time.
last_seen: float
time_delay: float
bandwidth: float
#: A set of all TaskStates on this worker that are actors. This only includes those
#: actors whose state actually lives on this worker, not actors to which this worker
#: has a reference.
actors: set[TaskState]
#: Underlying data of :meth:`WorkerState.has_what`
_has_what: dict[TaskState, None]
#: A set of tasks that have been submitted to this worker. Multiple tasks may be
# submitted to a worker in advance and the worker will run them eventually,
# depending on its execution resources (but see :doc:`work-stealing`).
#:
#: All the tasks here are in the "processing" state.
#: This attribute is kept in sync with :attr:`TaskState.processing_on`.
processing: set[TaskState]
#: Running tasks that invoked :func:`distributed.secede`
long_running: set[TaskState]
#: A dictionary of tasks that are currently being run on this worker.
#: Each task state is associated with the duration in seconds which the task has
#: been running.
executing: dict[TaskState, float]
#: The available resources on this worker, e.g. ``{"GPU": 2}``.
#: These are abstract quantities that constrain certain tasks from running at the
#: same time on this worker.
resources: dict[str, float]
#: The sum of each resource used by all tasks allocated to this worker.
#: The numbers in this dictionary can only be less or equal than those in this
#: worker's :attr:`~WorkerState.resources`.
used_resources: dict[str, float]
#: Arbitrary additional metadata to be added to :meth:`~WorkerState.identity`
extra: dict[str, Any]
# The unique server ID this WorkerState is referencing
server_id: str
# Reference to scheduler task_groups
scheduler_ref: weakref.ref[SchedulerState] | None
task_prefix_count: defaultdict[str, int]
_network_occ: int
_occupancy_cache: float | None
#: Keys that may need to be fetched to this worker, and the number of tasks that need them.
#: All tasks are currently in `memory` on a worker other than this one.
#: Much like `processing`, this does not exactly reflect worker state:
#: keys here may be queued to fetch, in flight, or already in memory
#: on the worker.
needs_what: dict[TaskState, int]
__slots__ = tuple(__annotations__)
def __init__(
self,
*,
address: str,
status: Status,
pid: int,
name: object,
nthreads: int = 0,
memory_limit: int,
local_directory: str,
nanny: str | None,
server_id: str,
services: dict[str, int] | None = None,
versions: dict[str, Any] | None = None,
extra: dict[str, Any] | None = None,
scheduler: SchedulerState | None = None,
):
self.server_id = server_id
self.address = address
self.pid = pid
self.name = name
self.nthreads = nthreads
self.memory_limit = memory_limit
self.local_directory = local_directory
self.services = services or {}
self.versions = versions or {}
self.nanny = nanny
self.status = status
self._hash = hash(self.server_id)
self.nbytes = 0
self._memory_unmanaged_old = 0
self._memory_unmanaged_history = deque()
self.metrics = {}
self.last_seen = time()
self.time_delay = 0
self.bandwidth = parse_bytes(dask.config.get("distributed.scheduler.bandwidth"))
self.actors = set()
self._has_what = {}
self.processing = set()
self.long_running = set()
self.executing = {}
self.resources = {}
self.used_resources = {}
self.extra = extra or {}
self.scheduler_ref = weakref.ref(scheduler) if scheduler else None
self.task_prefix_count = defaultdict(int)
self.needs_what = {}
self._network_occ = 0
self._occupancy_cache = None
def __hash__(self) -> int:
return self._hash
def __eq__(self, other: object) -> bool:
return self is other or (
isinstance(other, WorkerState) and other.server_id == self.server_id
)
@property
def has_what(self) -> Set[TaskState]:
"""An insertion-sorted set-like of tasks which currently reside on this worker.
All the tasks here are in the "memory" state.
This is the reverse mapping of :attr:`TaskState.who_has`.
This is a read-only public accessor. The data is implemented as a dict without
values, because rebalance() relies on dicts being insertion-sorted.
"""
return self._has_what.keys()
@property
def host(self) -> str:
return get_address_host(self.address)
@property
def memory(self) -> MemoryState:
"""Polished memory metrics for the worker.
**Design note on managed memory**
There are two measures available for managed memory:
- ``self.nbytes``
- ``self.metrics["managed_bytes"]``
At rest, the two numbers must be identical. However, ``self.nbytes`` is
immediately updated through the batched comms as soon as each task lands in
memory on the worker; ``self.metrics["managed_bytes"]`` instead is updated by
the heartbeat, which can lag several seconds behind.
Below we are mixing likely newer managed memory info from ``self.nbytes`` with
process and spilled memory from the heartbeat. This is deliberate, so that
managed memory total is updated more frequently.
Managed memory directly and immediately contributes to optimistic memory, which
is in turn used in Active Memory Manager heuristics (at the moment of writing;
more uses will likely be added in the future). So it's important to have it
up to date; much more than it is for process memory.
Having up-to-date managed memory info as soon as the scheduler learns about
task completion also substantially simplifies unit tests.
The flip side of this design is that it may cause some noise in the
unmanaged_recent measure. e.g.:
1. Delete 100MB of managed data
2. The updated managed memory reaches the scheduler faster than the
updated process memory
3. There's a blip where the scheduler thinks that there's a sudden 100MB
increase in unmanaged_recent, since process memory hasn't changed but managed
memory has decreased by 100MB
4. When the heartbeat arrives, process memory goes down and so does the
unmanaged_recent.
This is OK - one of the main reasons for the unmanaged_recent / unmanaged_old
split is exactly to concentrate all the noise in unmanaged_recent and exclude it
from optimistic memory, which is used for heuristics.
Something that is less OK, but also less frequent, is that the sudden deletion
of spilled keys will cause a negative blip in managed memory:
1. Delete 100MB of spilled data
2. The updated managed memory *total* reaches the scheduler faster than the
updated spilled portion
3. This causes the managed memory to temporarily plummet and be replaced by
unmanaged_recent, while spilled memory remains unaltered
4. When the heartbeat arrives, managed goes back up, unmanaged_recent
goes back down, and spilled goes down by 100MB as it should have to
begin with.
:issue:`6002` will let us solve this.
"""
return MemoryState(
process=self.metrics["memory"],
managed=max(0, self.nbytes - self.metrics["spilled_bytes"]["memory"]),
spilled=self.metrics["spilled_bytes"]["disk"],
unmanaged_old=self._memory_unmanaged_old,
)
def clean(self) -> WorkerState:
"""Return a version of this object that is appropriate for serialization"""
ws = WorkerState(
address=self.address,
status=self.status,
pid=self.pid,
name=self.name,
nthreads=self.nthreads,
memory_limit=self.memory_limit,
local_directory=self.local_directory,
services=self.services,
nanny=self.nanny,
extra=self.extra,
server_id=self.server_id,
)
ws._occupancy_cache = self.occupancy
ws.executing = {ts.key: duration for ts, duration in self.executing.items()} # type: ignore
return ws
def __repr__(self) -> str:
name = f", name: {self.name}" if self.name != self.address else ""
return (
f"<WorkerState {self.address!r}{name}, "
f"status: {self.status.name}, "
f"memory: {len(self.has_what)}, "
f"processing: {len(self.processing)}>"
)
def _repr_html_(self) -> str:
return get_template("worker_state.html.j2").render(
address=self.address,
name=self.name,
status=self.status.name,
has_what=self.has_what,
processing=self.processing,
)
def identity(self) -> dict[str, Any]:
return {
"type": "Worker",
"id": self.name,
"host": self.host,
"resources": self.resources,
"local_directory": self.local_directory,
"name": self.name,
"nthreads": self.nthreads,
"memory_limit": self.memory_limit,
"last_seen": self.last_seen,
"services": self.services,
"metrics": self.metrics,
"status": self.status.name,
"nanny": self.nanny,
**self.extra,
}
def _to_dict_no_nest(self, *, exclude: Container[str] = ()) -> dict[str, Any]:
"""Dictionary representation for debugging purposes.
Not type stable and not intended for roundtrips.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
TaskState._to_dict
"""
return recursive_to_dict(
self,
exclude=set(exclude) | {"versions"}, # type: ignore
members=True,
)
@property
def scheduler(self) -> SchedulerState:
assert self.scheduler_ref
s = self.scheduler_ref()
assert s
return s
def add_to_processing(self, ts: TaskState) -> None:
"""Assign a task to this worker for compute."""
if self.scheduler.validate:
assert ts not in self.processing
tp = ts.prefix
self.task_prefix_count[tp.name] += 1
self.scheduler._task_prefix_count_global[tp.name] += 1
self.processing.add(ts)
for dts in ts.dependencies:
assert dts.who_has
if self not in dts.who_has:
self._inc_needs_replica(dts)
def add_to_long_running(self, ts: TaskState) -> None:
if self.scheduler.validate:
assert ts in self.processing
assert ts not in self.long_running
self._remove_from_task_prefix_count(ts)
# Cannot remove from processing since we're using this for things like
# idleness detection. Idle workers are typically targeted for
# downscaling but we should not downscale workers with long running
# tasks
self.long_running.add(ts)
def remove_from_processing(self, ts: TaskState) -> None:
"""Remove a task from a workers processing"""
if self.scheduler.validate:
assert ts in self.processing
if ts in self.long_running:
self.long_running.discard(ts)
else:
self._remove_from_task_prefix_count(ts)
self.processing.remove(ts)
for dts in ts.dependencies:
if dts in self.needs_what:
self._dec_needs_replica(dts)
def _remove_from_task_prefix_count(self, ts: TaskState) -> None:
prefix_name = ts.prefix.name
count = self.task_prefix_count[prefix_name] - 1
tp_count = self.task_prefix_count
tp_count_global = self.scheduler._task_prefix_count_global
if count:
tp_count[prefix_name] = count
else:
del tp_count[prefix_name]
count = tp_count_global[prefix_name] - 1
if count:
tp_count_global[prefix_name] = count
else:
del tp_count_global[prefix_name]
def remove_replica(self, ts: TaskState) -> None:
"""The worker no longer has a task in memory"""
if self.scheduler.validate:
assert ts.who_has
assert self in ts.who_has
assert ts in self.has_what
assert ts not in self.needs…cheduler, title="Scheduler Profile (administrative)"
)
task_stream = TabPanel(child=task_stream, title="Task Stream")
bandwidth_workers = TabPanel(
child=bandwidth_workers.root, title="Bandwidth (Workers)"
)
bandwidth_types = TabPanel(
child=bandwidth_types.root, title="Bandwidth (Types)"
)
system = TabPanel(child=sysmon.root, title="System")
logs = TabPanel(child=logs.root, title="Scheduler Logs")
tabs = Tabs(
tabs=[
html,
task_stream,
system,
logs,
compute,
workers,
scheduler,
bandwidth_workers,
bandwidth_types,
],
sizing_mode="stretch_both",
)
from bokeh.core.templates import get_env
from bokeh.plotting import output_file, save
with tmpfile(extension=".html") as fn:
output_file(filename=fn, title="Dask Performance Report", mode=mode)
template_directory = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "dashboard", "templates"
)
template_environment = get_env()
template_environment.loader.searchpath.append(template_directory)
template = template_environment.get_template("performance_report.html")
save(tabs, filename=fn, template=template)
with open(fn) as f:
data = f.read()
return data
async def get_worker_logs(self, n=None, workers=None, nanny=False):
results = await self.broadcast(
msg={"op": "get_logs", "n": n}, workers=workers, nanny=nanny
)
return results
def log_event(self, topic: str | Collection[str], msg: Any) -> None:
"""Log an event under a given topic
Parameters
----------
topic : str, list[str]
Name of the topic under which to log an event. To log the same
event under multiple topics, pass a list of topic names.
msg
Event message to log. Note this must be msgpack serializable.
See also
--------
Client.log_event
"""
self._broker.publish(topic, msg)
def subscribe_topic(self, topic: str, client: str) -> None:
self._broker.subscribe(topic, client)
def unsubscribe_topic(self, topic: str, client: str) -> None:
self._broker.unsubscribe(topic, client)
@overload
def get_events(self, topic: str) -> tuple[tuple[float, Any], ...]: ...
@overload
def get_events(self) -> dict[str, tuple[tuple[float, Any], ...]]: ...
def get_events(
self, topic: str | None = None
) -> tuple[tuple[float, Any], ...] | dict[str, tuple[tuple[float, Any], ...]]:
return self._broker.get_events(topic)
async def get_worker_monitor_info(self, recent=False, starts=None):
if starts is None:
starts = {}
results = await asyncio.gather(
*(
self.rpc(w).get_monitor_info(recent=recent, start=starts.get(w, 0))
for w in self.workers
)
)
return dict(zip(self.workers, results))
###########
# Cleanup #
###########
@log_errors
async def check_worker_ttl(self) -> None:
now = time()
stimulus_id = f"check-worker-ttl-{now}"
assert self.worker_ttl
ttl = max(self.worker_ttl, 10 * heartbeat_interval(len(self.workers)))
to_restart = []
for ws in self.workers.values():
last_seen = now - ws.last_seen
if last_seen > ttl:
to_restart.append(ws.address)
logger.warning(
f"Worker failed to heartbeat for {last_seen:.0f}s; "
f"{'attempting restart' if ws.nanny else 'removing'}: {ws}"
)
if to_restart:
self.log_event(
"scheduler",
{
"action": "worker-ttl-timed-out",
"workers": to_restart.copy(),
"ttl": ttl,
},
)
await self.restart_workers(
to_restart,
wait_for_workers=False,
stimulus_id=stimulus_id,
)
def check_idle(self) -> float | None:
if self.status in (Status.closing, Status.closed):
return None # pragma: nocover
if self.transition_counter != self._idle_transition_counter:
self._idle_transition_counter = self.transition_counter
self.idle_since = None
return None
if self._active_graph_updates > 0:
self.idle_since = None
return None
if (
self.queued
or self.unrunnable
or any(ws.processing for ws in self.workers.values())
):
self.idle_since = None
return None
if not self.idle_since:
self.idle_since = time()
return self.idle_since
if self.jupyter:
last_activity = (
self._jupyter_server_application.web_app.last_activity().timestamp()
)
if last_activity > self.idle_since:
self.idle_since = last_activity
return self.idle_since
if self.idle_timeout:
if time() > self.idle_since + self.idle_timeout:
assert self.idle_since
logger.info(
"Scheduler closing after being idle for %s",
format_time(self.idle_timeout),
)
self._ongoing_background_tasks.call_soon(
self.close, reason="idle-timeout-exceeded"
)
return self.idle_since
def _check_no_workers(self) -> None:
if (
self.status in (Status.closing, Status.closed)
or self.no_workers_timeout is None
):
return
now = monotonic()
stimulus_id = f"check-no-workers-timeout-{time()}"
recommendations: Recs = {}
self._refresh_no_workers_since(now)
affected = self._check_unrunnable_task_timeouts(
now, recommendations=recommendations, stimulus_id=stimulus_id
)
affected.update(
self._check_queued_task_timeouts(
now, recommendations=recommendations, stimulus_id=stimulus_id
)
)
self.transitions(recommendations, stimulus_id=stimulus_id)
if affected:
self.log_event(
"scheduler",
{"action": "no-workers-timeout-exceeded", "keys": affected},
)
def _check_unrunnable_task_timeouts(
self, timestamp: float, recommendations: Recs, stimulus_id: str
) -> set[Key]:
assert self.no_workers_timeout
unsatisfied = []
no_workers = []
for ts, unrunnable_since in self.unrunnable.items():
if timestamp <= unrunnable_since + self.no_workers_timeout:
# unrunnable is insertion-ordered, which means that unrunnable_since will
# be monotonically increasing in this loop.
break
if (
self._no_workers_since is None
or self._no_workers_since >= unrunnable_since
):
unsatisfied.append(ts)
else:
no_workers.append(ts)
if not unsatisfied and not no_workers:
return set()
for ts in unsatisfied:
e = pickle.dumps(
NoValidWorkerError(
task=ts.key,
host_restrictions=(ts.host_restrictions or set()).copy(),
worker_restrictions=(ts.worker_restrictions or set()).copy(),
resource_restrictions=(ts.resource_restrictions or {}).copy(),
timeout=self.no_workers_timeout,
),
)
r = self.transition(
ts.key,
"erred",
exception=e,
cause=ts.key,
stimulus_id=stimulus_id,
)
recommendations.update(r)
logger.error(
"Task %s marked as failed because it timed out waiting "
"for its restrictions to become satisfied.",
ts.key,
)
self._fail_tasks_after_no_workers_timeout(
no_workers, recommendations, stimulus_id
)
return {ts.key for ts in concat([unsatisfied, no_workers])}
def _check_queued_task_timeouts(
self, timestamp: float, recommendations: Recs, stimulus_id: str
) -> set[Key]:
assert self.no_workers_timeout
if self._no_workers_since is None:
return set()
if timestamp <= self._no_workers_since + self.no_workers_timeout:
return set()
affected = list(self.queued)
self._fail_tasks_after_no_workers_timeout(
affected, recommendations, stimulus_id
)
return {ts.key for ts in affected}
def _fail_tasks_after_no_workers_timeout(
self, timed_out: Iterable[TaskState], recommendations: Recs, stimulus_id: str
) -> None:
assert self.no_workers_timeout
for ts in timed_out:
e = pickle.dumps(
NoWorkerError(
task=ts.key,
timeout=self.no_workers_timeout,
),
)
r = self.transition(
ts.key,
"erred",
exception=e,
cause=ts.key,
stimulus_id=stimulus_id,
)
recommendations.update(r)
logger.error(
"Task %s marked as failed because it timed out waiting "
"without any running workers.",
ts.key,
)
def _refresh_no_workers_since(self, timestamp: float | None = None) -> None:
if self.running or not (self.queued or self.unrunnable):
self._no_workers_since = None
return
if not self._no_workers_since:
self._no_workers_since = timestamp or monotonic()
return
def adaptive_target(self, target_duration=None):
"""Desired number of workers based on the current workload
This looks at the current running tasks and memory use, and returns a
number of desired workers. This is often used by adaptive scheduling.
Parameters
----------
target_duration : str
A desired duration of time for computations to take. This affects
how rapidly the scheduler will ask to scale.
See Also
--------
distributed.deploy.Adaptive
"""
if target_duration is None:
target_duration = dask.config.get("distributed.adaptive.target-duration")
target_duration = parse_timedelta(target_duration)
# CPU
queued = take(100, concat([self.queued, self.unrunnable.keys()]))
queued_occupancy = 0
for ts in queued:
queued_occupancy += self._get_prefix_duration(ts.prefix)
tasks_ready = len(self.queued) + len(self.unrunnable)
if tasks_ready > 100:
queued_occupancy *= tasks_ready / 100
cpu = math.ceil((self.total_occupancy + queued_occupancy) / target_duration)
# Avoid a few long tasks from asking for many cores
for ws in self.workers.values():
if tasks_ready > cpu:
break
tasks_ready += len(ws.processing)
else:
cpu = min(tasks_ready, cpu)
# Divide by average nthreads per worker
if self.workers:
nthreads = sum(ws.nthreads for ws in self.workers.values())
cpu = math.ceil(cpu / nthreads * len(self.workers))
if (self.unrunnable or self.queued) and not self.workers:
cpu = max(1, cpu)
# add more workers if more than 60% of memory is used
limit = sum(ws.memory_limit for ws in self.workers.values())
used = sum(ws.nbytes for ws in self.workers.values())
memory = 0
if used > 0.6 * limit and limit > 0:
memory = 2 * len(self.workers)
target = max(memory, cpu)
if target >= len(self.workers):
return target
else: # Scale down?
to_close = self.workers_to_close()
return len(self.workers) - len(to_close)
def request_acquire_replicas(
self, addr: str, keys: Iterable[Key], *, stimulus_id: str
) -> None:
"""Asynchronously ask a worker to acquire a replica of the listed keys from
other workers. This is a fire-and-forget operation which offers no feedback for
success or failure, and is intended for housekeeping and not for computation.
"""
who_has = {}
nbytes = {}
for key in keys:
ts = self.tasks[key]
assert ts.who_has
who_has[key] = [ws.address for ws in ts.who_has or ()]
nbytes[key] = ts.nbytes
self.stream_comms[addr].send(
{
"op": "acquire-replicas",
"who_has": who_has,
"nbytes": nbytes,
"stimulus_id": stimulus_id,
},
)
def request_remove_replicas(
self, addr: str, keys: list[Key], *, stimulus_id: str
) -> None:
"""Asynchronously ask a worker to discard its replica of the listed keys.
This must never be used to destroy the last replica of a key. This is a
fire-and-forget operation, intended for housekeeping and not for computation.
The replica disappears immediately from TaskState.who_has on the Scheduler side;
if the worker refuses to delete, e.g. because the task is a dependency of
another task running on it, it will (also asynchronously) inform the scheduler
to re-add itself to who_has. If the worker agrees to discard the task, there is
no feedback.
"""
ws = self.workers[addr]
# The scheduler immediately forgets about the replica and suggests the worker to
# drop it. The worker may refuse, at which point it will send back an add-keys
# message to reinstate it.
for key in keys:
ts = self.tasks[key]
if self.validate:
# Do not destroy the last copy
assert ts.who_has
assert len(ts.who_has) > 1
self.remove_replica(ts, ws)
self.stream_comms[addr].send(
{
"op": "remove-replicas",
"keys": keys,
"stimulus_id": stimulus_id,
}
)
def _task_to_report_msg(ts: TaskState) -> dict[str, Any] | None:
if ts.state == "forgotten":
return {"op": "cancelled-keys", "keys": [ts.key]}
elif ts.state == "memory":
return {"op": "key-in-memory", "key": ts.key}
elif ts.state == "erred":
failing_ts = ts.exception_blame
assert failing_ts
return {
"op": "task-erred",
"key": ts.key,
"exception": failing_ts.exception,
"traceback": failing_ts.traceback,
}
else:
return None
def _task_to_client_msgs(ts: TaskState) -> Msgs:
if ts.who_wants:
report_msg = _task_to_report_msg(ts)
if report_msg is not None:
return {cs.client_key: [report_msg] for cs in ts.who_wants}
return {}
def decide_worker(
ts: TaskState,
all_workers: set[WorkerState],
valid_workers: set[WorkerState] | None,
objective: Callable[[WorkerState], Any],
) -> WorkerState | None:
"""
Decide which worker should take task *ts*.
We choose the worker that has the data on which *ts* depends.
If several workers have dependencies then we choose the less-busy worker.
Optionally provide *valid_workers* of where jobs are allowed to occur
(if all workers are allowed to take the task, pass None instead).
If the task requires data communication because no eligible worker has
all the dependencies already, then we choose to minimize the number
of bytes sent between workers. This is determined by calling the
*objective* function.
"""
assert all(dts.who_has for dts in ts.dependencies)
if ts.actor:
candidates = all_workers.copy()
else:
candidates = {wws for dts in ts.dependencies for wws in dts.who_has or ()}
candidates &= all_workers
if valid_workers is None:
if not candidates:
candidates = all_workers.copy()
else:
candidates &= valid_workers
if not candidates:
candidates = valid_workers
if not candidates:
if ts.loose_restrictions:
return decide_worker(ts, all_workers, None, objective)
if not candidates:
return None
elif len(candidates) == 1:
return next(iter(candidates))
else:
return min(candidates, key=objective)
def validate_task_state(ts: TaskState) -> None:
"""Validate the given TaskState"""
assert ts.state in ALL_TASK_STATES, ts
if ts.waiting_on:
assert ts.waiting_on.issubset(ts.dependencies), (
"waiting not subset of dependencies",
str(ts.waiting_on),
str(ts.dependencies),
)
if ts.waiters:
assert ts.waiters.issubset(ts.dependents), (
"waiters not subset of dependents",
str(ts.waiters),
str(ts.dependents),
)
for dts in ts.waiting_on or ():
assert not dts.who_has, ("waiting on in-memory dep", str(ts), str(dts))
assert dts.state != "released", ("waiting on released dep", str(ts), str(dts))
for dts in ts.dependencies:
assert ts in dts.dependents, (
"not in dependency's dependents",
str(ts),
str(dts),
str(dts.dependents),
)
if ts.state in ("waiting", "queued", "processing", "no-worker"):
assert ts.waiting_on and dts in ts.waiting_on or dts.who_has, (
"dep missing",
str(ts),
str(dts),
)
assert dts.state != "forgotten"
for dts in ts.waiters or ():
assert dts.state in ("waiting", "queued", "processing", "no-worker"), (
"waiter not in play",
str(ts),
str(dts),
)
for dts in ts.dependents:
assert ts in dts.dependencies, (
"not in dependent's dependencies",
str(ts),
str(dts),
str(dts.dependencies),
)
assert dts.state != "forgotten"
assert (ts.processing_on is not None) == (ts.state == "processing")
assert bool(ts.who_has) == (ts.state == "memory"), (ts, ts.who_has, ts.state)
if ts.state == "queued":
assert not ts.processing_on
assert not ts.who_has
assert all(dts.who_has for dts in ts.dependencies), (
"task queued without all deps",
str(ts),
str(ts.dependencies),
)
if ts.state == "processing":
assert all(dts.who_has for dts in ts.dependencies), (
"task processing without all deps",
str(ts),
str(ts.dependencies),
)
assert not ts.waiting_on
if ts.who_has:
assert ts.waiters or ts.who_wants, (
"unneeded task in memory",
str(ts),
str(ts.who_has),
)
if ts.run_spec: # was computed
assert ts.type
assert isinstance(ts.type, str)
assert not any(
[
ts in dts.waiting_on
for dts in ts.dependents
if dts.waiting_on is not None
]
)
for ws in ts.who_has:
assert ts in ws.has_what, (
"not in who_has' has_what",
str(ts),
str(ws),
str(ws.has_what),
)
for cs in ts.who_wants or ():
assert ts in cs.wants_what, (
"not in who_wants' wants_what",
str(ts),
str(cs),
str(cs.wants_what),
)
if ts.actor:
if ts.state == "memory":
assert ts.who_has
assert sum(ts in ws.actors for ws in ts.who_has) == 1
if ts.state == "processing":
assert ts.processing_on
assert ts in ts.processing_on.actors
assert ts.state != "queued"
def validate_unrunnable(unrunnable: dict[TaskState, float]) -> None:
prev_unrunnable_since: float | None = None
prev_ts: TaskState | None = None
for ts, unrunnable_since in unrunnable.items():
assert ts.state == "no-worker"
if prev_ts is not None:
assert prev_unrunnable_since is not None
# Ensure that unrunnable_since is monotonically increasing when iterating over unrunnable.
# _check_no_workers relies on this.
assert prev_unrunnable_since <= unrunnable_since, (
prev_ts,
ts,
prev_unrunnable_since,
unrunnable_since,
)
prev_ts = ts
prev_unrunnable_since = unrunnable_since
def validate_worker_state(ws: WorkerState) -> None:
for ts in ws.has_what or ():
assert ts.who_has
assert ws in ts.who_has, (
"not in has_what' who_has",
str(ws),
str(ts),
str(ts.who_has),
)
for ts in ws.actors:
assert ts.state in ("memory", "processing")
def validate_state(
tasks: dict[Key, TaskState],
workers: dict[str, WorkerState],
clients: dict[str, ClientState],
) -> None:
"""Validate a current runtime state.
This performs a sequence of checks on the entire graph, running in about linear
time. This raises assert errors if anything doesn't check out.
"""
for ts in tasks.values():
validate_task_state(ts)
for ws in workers.values():
validate_worker_state(ws)
for cs in clients.values():
for ts in cs.wants_what or ():
assert ts.who_wants
assert cs in ts.who_wants, (
"not in wants_what' who_wants",
str(cs),
str(ts),
str(ts.who_wants),
)
def heartbeat_interval(n: int) -> float:
"""Interval in seconds that we desire heartbeats based on number of workers"""
if n <= 10:
return 0.5
elif n < 50:
return 1
elif n < 200:
return 2
else:
# No more than 200 heartbeats a second scaled by workers
return n / 200 + 1
def _task_slots_available(ws: WorkerState, saturation_factor: float) -> int:
"""Number of tasks that can be sent to this worker without oversaturating it"""
assert not math.isinf(saturation_factor)
return max(math.ceil(saturation_factor * ws.nthreads), 1) - (
len(ws.processing) - len(ws.long_running)
)
def _worker_full(ws: WorkerState, saturation_factor: float) -> bool:
if math.isinf(saturation_factor):
return False
return _task_slots_available(ws, saturation_factor) <= 0
class KilledWorker(Exception):
def __init__(self, task: Key, last_worker: WorkerState, allowed_failures: int):
super().__init__(task, last_worker, allowed_failures)
@property
def task(self) -> Key:
return self.args[0]
@property
def last_worker(self) -> WorkerState:
return self.args[1]
@property
def allowed_failures(self) -> int:
return self.args[2]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} on {self.allowed_failures + 1} "
"different workers, but all those workers died while running it. "
f"The last worker that attempt to run the task was {self.last_worker.address}. "
"Inspecting worker logs is often a good next step to diagnose what went wrong. "
"For more information see https://distributed.dask.org/en/stable/killed.html."
)
class NoValidWorkerError(Exception):
def __init__(
self,
task: Key,
host_restrictions: set[str],
worker_restrictions: set[str],
resource_restrictions: dict[str, float],
timeout: float,
):
super().__init__(
task, host_restrictions, worker_restrictions, resource_restrictions, timeout
)
@property
def task(self) -> Key:
return self.args[0]
@property
def host_restrictions(self) -> Any:
return self.args[1]
@property
def worker_restrictions(self) -> Any:
return self.args[2]
@property
def resource_restrictions(self) -> Any:
return self.args[3]
@property
def timeout(self) -> float:
return self.args[4]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} but timed out after {format_time(self.timeout)} "
"waiting for a valid worker matching all restrictions.\n\nRestrictions:\n"
f"host_restrictions={self.host_restrictions!s}\n"
f"worker_restrictions={self.worker_restrictions!s}\n"
f"resource_restrictions={self.resource_restrictions!s}\n"
)
class NoWorkerError(Exception):
def __init__(self, task: Key, timeout: float):
super().__init__(task, timeout)
@property
def task(self) -> Key:
return self.args[0]
@property
def timeout(self) -> float:
return self.args[1]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} but timed out after {format_time(self.timeout)} "
"waiting without any running workers."
)
class WorkerStatusPlugin(SchedulerPlugin):
"""A plugin to share worker status with a remote observer
This is used in cluster managers to keep updated about the status of the scheduler.
"""
name: ClassVar[str] = "worker-status"
bcomm: BatchedSend
def __init__(self, scheduler: Scheduler, comm: Comm):
self.bcomm = BatchedSend(interval="5ms")
self.bcomm.start(comm)
scheduler.add_plugin(self)
def add_worker(self, scheduler: Scheduler, worker: str) -> None:
ident = scheduler.workers[worker].identity()
del ident["metrics"]
del ident["last_seen"]
try:
self.bcomm.send(["add", {"workers": {worker: ident}}])
except CommClosedError:
scheduler.remove_plugin(name=self.name)
def remove_worker(self, scheduler: Scheduler, worker: str, **kwargs: Any) -> None:
try:
self.bcomm.send(["remove", worker])
except CommClosedError:
scheduler.remove_plugin(name=self.name)
def teardown(self) -> None:
self.bcomm.close()
class CollectTaskMetaDataPlugin(SchedulerPlugin):
scheduler: Scheduler
name: str
keys: set[Key]
metadata: dict[Key, Any]
state: dict[Key, TaskStateState]
def __init__(self, scheduler: Scheduler, name: str):
self.scheduler = scheduler
self.name = name
self.keys = set()
self.metadata = {}
self.state = {}
def update_graph(
self,
scheduler: Scheduler,
*,
keys: set[Key],
**kwargs: Any,
) -> None:
self.keys.update(keys)
def transition(
self,
key: Key,
start: TaskStateState,
finish: TaskStateState,
*args: Any,
**kwargs: Any,
) -> None:
if finish in ("memory", "erred"):
ts = self.scheduler.tasks.get(key)
if ts is not None and ts.key in self.keys:
self.metadata[key] = ts.metadata
self.state[key] = finish
self.keys.discard(key)
def _materialize_graph(
expr: Expr,
global_annotations: dict[str, Any],
validate: bool,
) -> tuple[dict[Key, T_runspec], dict[Key, set[Key]], dict[str, dict[Key, Any]]]:
> dsk: dict = expr.__dask_graph__()
E AttributeError: 'dict' object has no attribute '__dask_graph__'
distributed/scheduler.py:9383: AttributeError
Check warning on line 0 in distributed.cli.tests.test_dask_worker
github-actions / Unit Test Results
1 out of 5 runs failed: test_contact_listen_address[tcp://127.0.0.2:---nanny] (distributed.cli.tests.test_dask_worker)
artifacts/ubuntu-latest-mindeps-default-notci1/pytest.xml [took 1s]
Raw output
AttributeError: 'dict' object has no attribute '__dask_graph__'
c = <Client: No scheduler connected>
s = <Scheduler 'tcp://127.0.0.1:37619', workers: 0, cores: 0, tasks: 0>
nanny = '--nanny', listen_address = 'tcp://127.0.0.2:38987'
@pytest.mark.slow
@pytest.mark.skipif(not LINUX, reason="Need 127.0.0.2 to mean localhost")
@pytest.mark.parametrize("nanny", ["--nanny", "--no-nanny"])
@pytest.mark.parametrize("listen_address", ["tcp://0.0.0.0:", "tcp://127.0.0.2:"])
@gen_cluster(client=True, nthreads=[])
async def test_contact_listen_address(c, s, nanny, listen_address):
port = open_port()
listen_address += str(port)
with popen(
[
sys.executable,
"-m",
"dask",
"worker",
s.address,
nanny,
"--no-dashboard",
"--contact-address",
f"tcp://127.0.0.2:{port}",
"--listen-address",
listen_address,
]
):
await c.wait_for_workers(1)
info = c.scheduler_info()
assert info["workers"].keys() == {f"tcp://127.0.0.2:{port}"}
# roundtrip works
> assert await c.submit(lambda x: x + 1, 10) == 11
distributed/cli/tests/test_dask_worker.py:500:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
distributed/client.py:408: in _result
raise exc.with_traceback(tb)
distributed/utils.py:1507: in run_in_executor_with_context
return await loop.run_in_executor(
../../../miniconda3/envs/dask-distributed/lib/python3.10/concurrent/futures/thread.py:58: in run
result = self.fn(*self.args, **self.kwargs)
distributed/utils.py:1508: in <lambda>
executor, lambda: context.run(func, *args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
from __future__ import annotations
import asyncio
import contextlib
import dataclasses
import heapq
import inspect
import itertools
import json
import logging
import math
import operator
import os
import pickle
import random
import textwrap
import uuid
import warnings
import weakref
from abc import abstractmethod
from collections import defaultdict, deque
from collections.abc import (
Callable,
Collection,
Container,
Hashable,
Iterable,
Iterator,
Mapping,
Sequence,
Set,
)
from contextlib import suppress
from functools import partial
from typing import TYPE_CHECKING, Any, ClassVar, Literal, NamedTuple, cast, overload
import psutil
import tornado.web
from sortedcontainers import SortedDict, SortedSet
from tlz import (
concat,
first,
groupby,
merge,
merge_sorted,
merge_with,
partition,
pluck,
second,
take,
valmap,
)
from tornado.ioloop import IOLoop
import dask
import dask.utils
from dask._task_spec import DependenciesMapping, GraphNode, convert_legacy_graph
from dask.core import istask, validate_key
from dask.typing import Key, no_default
from dask.utils import (
_deprecated,
_deprecated_kwarg,
format_bytes,
format_time,
key_split,
parse_bytes,
parse_timedelta,
tmpfile,
)
from dask.widgets import get_template
from distributed import cluster_dump, preloading, profile
from distributed import versions as version_module
from distributed._asyncio import RLock
from distributed._stories import scheduler_story
from distributed.active_memory_manager import ActiveMemoryManagerExtension, RetireWorker
from distributed.batched import BatchedSend
from distributed.broker import Broker
from distributed.client import SourceCode
from distributed.collections import HeapSet
from distributed.comm import (
Comm,
CommClosedError,
get_address_host,
normalize_address,
resolve_address,
unparse_host_port,
)
from distributed.comm.addressing import addresses_from_user_args
from distributed.compatibility import PeriodicCallback
from distributed.core import (
ErrorMessage,
OKMessage,
Status,
clean_exception,
error_message,
rpc,
send_recv,
)
from distributed.diagnostics.memory_sampler import MemorySamplerExtension
from distributed.diagnostics.plugin import SchedulerPlugin, _get_plugin_name
from distributed.event import EventExtension
from distributed.gc import disable_gc_diagnosis, enable_gc_diagnosis
from distributed.http import get_handlers
from distributed.metrics import monotonic, time
from distributed.multi_lock import MultiLockExtension
from distributed.node import ServerNode
from distributed.proctitle import setproctitle
from distributed.protocol import deserialize
from distributed.protocol.pickle import dumps, loads
from distributed.protocol.serialize import Serialized, ToPickle, serialize
from distributed.publish import PublishExtension
from distributed.pubsub import PubSubSchedulerExtension
from distributed.queues import QueueExtension
from distributed.recreate_tasks import ReplayTaskScheduler
from distributed.security import Security
from distributed.semaphore import SemaphoreExtension
from distributed.shuffle import ShuffleSchedulerPlugin
from distributed.spans import SpanMetadata, SpansSchedulerExtension
from distributed.stealing import WorkStealing
from distributed.utils import (
All,
Deadline,
TimeoutError,
format_dashboard_link,
get_fileno_limit,
key_split_group,
log_errors,
offload,
recursive_to_dict,
wait_for,
)
from distributed.utils_comm import (
gather_from_workers,
retry_operation,
scatter_to_workers,
)
from distributed.variable import VariableExtension
if TYPE_CHECKING:
# TODO import from typing (requires Python >=3.10)
# TODO import from typing (requires Python >=3.11)
from typing_extensions import Self, TypeAlias
from dask._expr import Expr
# Not to be confused with distributed.worker_state_machine.TaskStateState
TaskStateState: TypeAlias = Literal[
"released",
"waiting",
"no-worker",
"queued",
"processing",
"memory",
"erred",
"forgotten",
]
ALL_TASK_STATES: Set[TaskStateState] = set(TaskStateState.__args__) # type: ignore
# {task key -> finish state}
# Not to be confused with distributed.worker_state_machine.Recs
Recs: TypeAlias = dict[Key, TaskStateState]
# {client or worker address: [{op: <key>, ...}, ...]}
Msgs: TypeAlias = dict[str, list[dict[str, Any]]]
# (recommendations, client messages, worker messages)
RecsMsgs: TypeAlias = tuple[Recs, Msgs, Msgs]
T_runspec: TypeAlias = GraphNode
logger = logging.getLogger(__name__)
LOG_PDB = dask.config.get("distributed.admin.pdb-on-err")
DEFAULT_DATA_SIZE = parse_bytes(
dask.config.get("distributed.scheduler.default-data-size")
)
STIMULUS_ID_UNSET = "<stimulus_id unset>"
DEFAULT_EXTENSIONS = {
"multi_locks": MultiLockExtension,
"publish": PublishExtension,
"replay-tasks": ReplayTaskScheduler,
"queues": QueueExtension,
"variables": VariableExtension,
"pubsub": PubSubSchedulerExtension,
"semaphores": SemaphoreExtension,
"events": EventExtension,
"amm": ActiveMemoryManagerExtension,
"memory_sampler": MemorySamplerExtension,
"shuffle": ShuffleSchedulerPlugin,
"spans": SpansSchedulerExtension,
"stealing": WorkStealing,
}
class ClientState:
"""A simple object holding information about a client."""
#: A unique identifier for this client. This is generally an opaque
#: string generated by the client itself.
client_key: str
#: Cached hash of :attr:`~ClientState.client_key`
_hash: int
#: A set of tasks this client wants to be kept in memory, so that it can download
#: its result when desired. This is the reverse mapping of
#: :class:`TaskState.who_wants`. Tasks are typically removed from this set when the
#: corresponding object in the client's space (for example a ``Future`` or a Dask
#: collection) gets garbage-collected.
wants_what: set[TaskState]
#: The last time we received a heartbeat from this client, in local scheduler time.
last_seen: float
#: Output of :func:`distributed.versions.get_versions` on the client
versions: dict[str, Any]
__slots__ = tuple(__annotations__)
def __init__(self, client: str, *, versions: dict[str, Any] | None = None):
self.client_key = client
self._hash = hash(client)
self.wants_what = set()
self.last_seen = time()
self.versions = versions or {}
def __hash__(self) -> int:
return self._hash
def __eq__(self, other: object) -> bool:
if not isinstance(other, ClientState):
return False
return self.client_key == other.client_key
def __repr__(self) -> str:
return f"<Client {self.client_key!r}>"
def __str__(self) -> str:
return self.client_key
def _to_dict_no_nest(self, *, exclude: Container[str] = ()) -> dict:
"""Dictionary representation for debugging purposes.
Not type stable and not intended for roundtrips.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
TaskState._to_dict
"""
return recursive_to_dict(
self,
exclude=set(exclude) | {"versions"}, # type: ignore
members=True,
)
class MemoryState:
"""Memory readings on a worker or on the whole cluster.
See :doc:`worker-memory`.
Attributes / properties:
managed_total
Sum of the output of sizeof() for all dask keys held by the worker in memory,
plus number of bytes spilled to disk
managed
Sum of the output of sizeof() for the dask keys held in RAM. Note that this may
be inaccurate, which may cause inaccurate unmanaged memory (see below).
spilled
Number of bytes for the dask keys spilled to the hard drive.
Note that this is the size on disk; size in memory may be different due to
compression and inaccuracies in sizeof(). In other words, given the same keys,
'managed' will change depending on the keys being in memory or spilled.
process
Total RSS memory measured by the OS on the worker process.
This is always exactly equal to managed + unmanaged.
unmanaged
process - managed. This is the sum of
- Python interpreter and modules
- global variables
- memory temporarily allocated by the dask tasks that are currently running
- memory fragmentation
- memory leaks
- memory not yet garbage collected
- memory not yet free()'d by the Python memory manager to the OS
unmanaged_old
Minimum of the 'unmanaged' measures over the last
``distributed.memory.recent-to-old-time`` seconds
unmanaged_recent
unmanaged - unmanaged_old; in other words process memory that has been recently
allocated but is not accounted for by dask; hopefully it's mostly a temporary
spike.
optimistic
managed + unmanaged_old; in other words the memory held long-term by
the process under the hopeful assumption that all unmanaged_recent memory is a
temporary spike
"""
process: int
unmanaged_old: int
managed: int
spilled: int
__slots__ = tuple(__annotations__)
def __init__(
self,
*,
process: int,
unmanaged_old: int,
managed: int,
spilled: int,
):
# Some data arrives with the heartbeat, some other arrives in realtime as the
# tasks progress. Also, sizeof() is not guaranteed to return correct results.
# This can cause glitches where a partial measure is larger than the whole, so
# we need to force all numbers to add up exactly by definition.
self.process = process
self.managed = min(self.process, managed)
self.spilled = spilled
# Subtractions between unsigned ints guaranteed by construction to be >= 0
self.unmanaged_old = min(unmanaged_old, process - self.managed)
@staticmethod
def sum(*infos: MemoryState) -> MemoryState:
process = 0
unmanaged_old = 0
managed = 0
spilled = 0
for ms in infos:
process += ms.process
unmanaged_old += ms.unmanaged_old
spilled += ms.spilled
managed += ms.managed
return MemoryState(
process=process,
unmanaged_old=unmanaged_old,
managed=managed,
spilled=spilled,
)
@property
def managed_total(self) -> int:
return self.managed + self.spilled
@property
def unmanaged(self) -> int:
# This is never negative thanks to __init__
return self.process - self.managed
@property
def unmanaged_recent(self) -> int:
# This is never negative thanks to __init__
return self.process - self.managed - self.unmanaged_old
@property
def optimistic(self) -> int:
return self.managed + self.unmanaged_old
@property
def managed_in_memory(self) -> int:
warnings.warn("managed_in_memory has been renamed to managed", FutureWarning)
return self.managed
@property
def managed_spilled(self) -> int:
warnings.warn("managed_spilled has been renamed to spilled", FutureWarning)
return self.spilled
def __repr__(self) -> str:
return (
f"Process memory (RSS) : {format_bytes(self.process)}\n"
f" - managed by Dask : {format_bytes(self.managed)}\n"
f" - unmanaged (old) : {format_bytes(self.unmanaged_old)}\n"
f" - unmanaged (recent): {format_bytes(self.unmanaged_recent)}\n"
f"Spilled to disk : {format_bytes(self.spilled)}\n"
)
def _to_dict(self, *, exclude: Container[str] = ()) -> dict:
"""Dictionary representation for debugging purposes.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
"""
return {
k: getattr(self, k)
for k in dir(self)
if not k.startswith("_")
and k not in {"sum", "managed_in_memory", "managed_spilled"}
}
class WorkerState:
"""A simple object holding information about a worker.
Not to be confused with :class:`distributed.worker_state_machine.WorkerState`.
"""
#: This worker's unique key. This can be its connected address
#: (such as ``"tcp://127.0.0.1:8891"``) or an alias (such as ``"alice"``).
address: str
pid: int
name: Hashable
#: The number of CPU threads made available on this worker
nthreads: int
#: Memory available to the worker, in bytes
memory_limit: int
local_directory: str
services: dict[str, int]
#: Output of :meth:`distributed.versions.get_versions` on the worker
versions: dict[str, Any]
#: Address of the associated :class:`~distributed.nanny.Nanny`, if present
nanny: str | None
#: Read-only worker status, synced one way from the remote Worker object
status: Status
#: Cached hash of :attr:`~WorkerState.server_id`
_hash: int
#: The total memory size, in bytes, used by the tasks this worker holds in memory
#: (i.e. the tasks in this worker's :attr:`~WorkerState.has_what`).
nbytes: int
#: Worker memory unknown to the worker, in bytes, which has been there for more than
#: 30 seconds. See :class:`MemoryState`.
_memory_unmanaged_old: int
#: History of the last 30 seconds' worth of unmanaged memory. Used to differentiate
#: between "old" and "new" unmanaged memory.
#: Format: ``[(timestamp, bytes), (timestamp, bytes), ...]``
_memory_unmanaged_history: deque[tuple[float, int]]
metrics: dict[str, Any]
#: The last time we received a heartbeat from this worker, in local scheduler time.
last_seen: float
time_delay: float
bandwidth: float
#: A set of all TaskStates on this worker that are actors. This only includes those
#: actors whose state actually lives on this worker, not actors to which this worker
#: has a reference.
actors: set[TaskState]
#: Underlying data of :meth:`WorkerState.has_what`
_has_what: dict[TaskState, None]
#: A set of tasks that have been submitted to this worker. Multiple tasks may be
# submitted to a worker in advance and the worker will run them eventually,
# depending on its execution resources (but see :doc:`work-stealing`).
#:
#: All the tasks here are in the "processing" state.
#: This attribute is kept in sync with :attr:`TaskState.processing_on`.
processing: set[TaskState]
#: Running tasks that invoked :func:`distributed.secede`
long_running: set[TaskState]
#: A dictionary of tasks that are currently being run on this worker.
#: Each task state is associated with the duration in seconds which the task has
#: been running.
executing: dict[TaskState, float]
#: The available resources on this worker, e.g. ``{"GPU": 2}``.
#: These are abstract quantities that constrain certain tasks from running at the
#: same time on this worker.
resources: dict[str, float]
#: The sum of each resource used by all tasks allocated to this worker.
#: The numbers in this dictionary can only be less or equal than those in this
#: worker's :attr:`~WorkerState.resources`.
used_resources: dict[str, float]
#: Arbitrary additional metadata to be added to :meth:`~WorkerState.identity`
extra: dict[str, Any]
# The unique server ID this WorkerState is referencing
server_id: str
# Reference to scheduler task_groups
scheduler_ref: weakref.ref[SchedulerState] | None
task_prefix_count: defaultdict[str, int]
_network_occ: int
_occupancy_cache: float | None
#: Keys that may need to be fetched to this worker, and the number of tasks that need them.
#: All tasks are currently in `memory` on a worker other than this one.
#: Much like `processing`, this does not exactly reflect worker state:
#: keys here may be queued to fetch, in flight, or already in memory
#: on the worker.
needs_what: dict[TaskState, int]
__slots__ = tuple(__annotations__)
def __init__(
self,
*,
address: str,
status: Status,
pid: int,
name: object,
nthreads: int = 0,
memory_limit: int,
local_directory: str,
nanny: str | None,
server_id: str,
services: dict[str, int] | None = None,
versions: dict[str, Any] | None = None,
extra: dict[str, Any] | None = None,
scheduler: SchedulerState | None = None,
):
self.server_id = server_id
self.address = address
self.pid = pid
self.name = name
self.nthreads = nthreads
self.memory_limit = memory_limit
self.local_directory = local_directory
self.services = services or {}
self.versions = versions or {}
self.nanny = nanny
self.status = status
self._hash = hash(self.server_id)
self.nbytes = 0
self._memory_unmanaged_old = 0
self._memory_unmanaged_history = deque()
self.metrics = {}
self.last_seen = time()
self.time_delay = 0
self.bandwidth = parse_bytes(dask.config.get("distributed.scheduler.bandwidth"))
self.actors = set()
self._has_what = {}
self.processing = set()
self.long_running = set()
self.executing = {}
self.resources = {}
self.used_resources = {}
self.extra = extra or {}
self.scheduler_ref = weakref.ref(scheduler) if scheduler else None
self.task_prefix_count = defaultdict(int)
self.needs_what = {}
self._network_occ = 0
self._occupancy_cache = None
def __hash__(self) -> int:
return self._hash
def __eq__(self, other: object) -> bool:
return self is other or (
isinstance(other, WorkerState) and other.server_id == self.server_id
)
@property
def has_what(self) -> Set[TaskState]:
"""An insertion-sorted set-like of tasks which currently reside on this worker.
All the tasks here are in the "memory" state.
This is the reverse mapping of :attr:`TaskState.who_has`.
This is a read-only public accessor. The data is implemented as a dict without
values, because rebalance() relies on dicts being insertion-sorted.
"""
return self._has_what.keys()
@property
def host(self) -> str:
return get_address_host(self.address)
@property
def memory(self) -> MemoryState:
"""Polished memory metrics for the worker.
**Design note on managed memory**
There are two measures available for managed memory:
- ``self.nbytes``
- ``self.metrics["managed_bytes"]``
At rest, the two numbers must be identical. However, ``self.nbytes`` is
immediately updated through the batched comms as soon as each task lands in
memory on the worker; ``self.metrics["managed_bytes"]`` instead is updated by
the heartbeat, which can lag several seconds behind.
Below we are mixing likely newer managed memory info from ``self.nbytes`` with
process and spilled memory from the heartbeat. This is deliberate, so that
managed memory total is updated more frequently.
Managed memory directly and immediately contributes to optimistic memory, which
is in turn used in Active Memory Manager heuristics (at the moment of writing;
more uses will likely be added in the future). So it's important to have it
up to date; much more than it is for process memory.
Having up-to-date managed memory info as soon as the scheduler learns about
task completion also substantially simplifies unit tests.
The flip side of this design is that it may cause some noise in the
unmanaged_recent measure. e.g.:
1. Delete 100MB of managed data
2. The updated managed memory reaches the scheduler faster than the
updated process memory
3. There's a blip where the scheduler thinks that there's a sudden 100MB
increase in unmanaged_recent, since process memory hasn't changed but managed
memory has decreased by 100MB
4. When the heartbeat arrives, process memory goes down and so does the
unmanaged_recent.
This is OK - one of the main reasons for the unmanaged_recent / unmanaged_old
split is exactly to concentrate all the noise in unmanaged_recent and exclude it
from optimistic memory, which is used for heuristics.
Something that is less OK, but also less frequent, is that the sudden deletion
of spilled keys will cause a negative blip in managed memory:
1. Delete 100MB of spilled data
2. The updated managed memory *total* reaches the scheduler faster than the
updated spilled portion
3. This causes the managed memory to temporarily plummet and be replaced by
unmanaged_recent, while spilled memory remains unaltered
4. When the heartbeat arrives, managed goes back up, unmanaged_recent
goes back down, and spilled goes down by 100MB as it should have to
begin with.
:issue:`6002` will let us solve this.
"""
return MemoryState(
process=self.metrics["memory"],
managed=max(0, self.nbytes - self.metrics["spilled_bytes"]["memory"]),
spilled=self.metrics["spilled_bytes"]["disk"],
unmanaged_old=self._memory_unmanaged_old,
)
def clean(self) -> WorkerState:
"""Return a version of this object that is appropriate for serialization"""
ws = WorkerState(
address=self.address,
status=self.status,
pid=self.pid,
name=self.name,
nthreads=self.nthreads,
memory_limit=self.memory_limit,
local_directory=self.local_directory,
services=self.services,
nanny=self.nanny,
extra=self.extra,
server_id=self.server_id,
)
ws._occupancy_cache = self.occupancy
ws.executing = {ts.key: duration for ts, duration in self.executing.items()} # type: ignore
return ws
def __repr__(self) -> str:
name = f", name: {self.name}" if self.name != self.address else ""
return (
f"<WorkerState {self.address!r}{name}, "
f"status: {self.status.name}, "
f"memory: {len(self.has_what)}, "
f"processing: {len(self.processing)}>"
)
def _repr_html_(self) -> str:
return get_template("worker_state.html.j2").render(
address=self.address,
name=self.name,
status=self.status.name,
has_what=self.has_what,
processing=self.processing,
)
def identity(self) -> dict[str, Any]:
return {
"type": "Worker",
"id": self.name,
"host": self.host,
"resources": self.resources,
"local_directory": self.local_directory,
"name": self.name,
"nthreads": self.nthreads,
"memory_limit": self.memory_limit,
"last_seen": self.last_seen,
"services": self.services,
"metrics": self.metrics,
"status": self.status.name,
"nanny": self.nanny,
**self.extra,
}
def _to_dict_no_nest(self, *, exclude: Container[str] = ()) -> dict[str, Any]:
"""Dictionary representation for debugging purposes.
Not type stable and not intended for roundtrips.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
TaskState._to_dict
"""
return recursive_to_dict(
self,
exclude=set(exclude) | {"versions"}, # type: ignore
members=True,
)
@property
def scheduler(self) -> SchedulerState:
assert self.scheduler_ref
s = self.scheduler_ref()
assert s
return s
def add_to_processing(self, ts: TaskState) -> None:
"""Assign a task to this worker for compute."""
if self.scheduler.validate:
assert ts not in self.processing
tp = ts.prefix
self.task_prefix_count[tp.name] += 1
self.scheduler._task_prefix_count_global[tp.name] += 1
self.processing.add(ts)
for dts in ts.dependencies:
assert dts.who_has
if self not in dts.who_has:
self._inc_needs_replica(dts)
def add_to_long_running(self, ts: TaskState) -> None:
if self.scheduler.validate:
assert ts in self.processing
assert ts not in self.long_running
self._remove_from_task_prefix_count(ts)
# Cannot remove from processing since we're using this for things like
# idleness detection. Idle workers are typically targeted for
# downscaling but we should not downscale workers with long running
# tasks
self.long_running.add(ts)
def remove_from_processing(self, ts: TaskState) -> None:
"""Remove a task from a workers processing"""
if self.scheduler.validate:
assert ts in self.processing
if ts in self.long_running:
self.long_running.discard(ts)
else:
self._remove_from_task_prefix_count(ts)
self.processing.remove(ts)
for dts in ts.dependencies:
if dts in self.needs_what:
self._dec_needs_replica(dts)
def _remove_from_task_prefix_count(self, ts: TaskState) -> None:
prefix_name = ts.prefix.name
count = self.task_prefix_count[prefix_name] - 1
tp_count = self.task_prefix_count
tp_count_global = self.scheduler._task_prefix_count_global
if count:
tp_count[prefix_name] = count
else:
del tp_count[prefix_name]
count = tp_count_global[prefix_name] - 1
if count:
tp_count_global[prefix_name] = count
else:
del tp_count_global[prefix_name]
def remove_replica(self, ts: TaskState) -> None:
"""The worker no longer has a task in memory"""
if self.scheduler.validate:
assert ts.who_has
assert self in ts.who_has
assert ts in self.has_what
assert ts not in self.needs_…cheduler, title="Scheduler Profile (administrative)"
)
task_stream = TabPanel(child=task_stream, title="Task Stream")
bandwidth_workers = TabPanel(
child=bandwidth_workers.root, title="Bandwidth (Workers)"
)
bandwidth_types = TabPanel(
child=bandwidth_types.root, title="Bandwidth (Types)"
)
system = TabPanel(child=sysmon.root, title="System")
logs = TabPanel(child=logs.root, title="Scheduler Logs")
tabs = Tabs(
tabs=[
html,
task_stream,
system,
logs,
compute,
workers,
scheduler,
bandwidth_workers,
bandwidth_types,
],
sizing_mode="stretch_both",
)
from bokeh.core.templates import get_env
from bokeh.plotting import output_file, save
with tmpfile(extension=".html") as fn:
output_file(filename=fn, title="Dask Performance Report", mode=mode)
template_directory = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "dashboard", "templates"
)
template_environment = get_env()
template_environment.loader.searchpath.append(template_directory)
template = template_environment.get_template("performance_report.html")
save(tabs, filename=fn, template=template)
with open(fn) as f:
data = f.read()
return data
async def get_worker_logs(self, n=None, workers=None, nanny=False):
results = await self.broadcast(
msg={"op": "get_logs", "n": n}, workers=workers, nanny=nanny
)
return results
def log_event(self, topic: str | Collection[str], msg: Any) -> None:
"""Log an event under a given topic
Parameters
----------
topic : str, list[str]
Name of the topic under which to log an event. To log the same
event under multiple topics, pass a list of topic names.
msg
Event message to log. Note this must be msgpack serializable.
See also
--------
Client.log_event
"""
self._broker.publish(topic, msg)
def subscribe_topic(self, topic: str, client: str) -> None:
self._broker.subscribe(topic, client)
def unsubscribe_topic(self, topic: str, client: str) -> None:
self._broker.unsubscribe(topic, client)
@overload
def get_events(self, topic: str) -> tuple[tuple[float, Any], ...]: ...
@overload
def get_events(self) -> dict[str, tuple[tuple[float, Any], ...]]: ...
def get_events(
self, topic: str | None = None
) -> tuple[tuple[float, Any], ...] | dict[str, tuple[tuple[float, Any], ...]]:
return self._broker.get_events(topic)
async def get_worker_monitor_info(self, recent=False, starts=None):
if starts is None:
starts = {}
results = await asyncio.gather(
*(
self.rpc(w).get_monitor_info(recent=recent, start=starts.get(w, 0))
for w in self.workers
)
)
return dict(zip(self.workers, results))
###########
# Cleanup #
###########
@log_errors
async def check_worker_ttl(self) -> None:
now = time()
stimulus_id = f"check-worker-ttl-{now}"
assert self.worker_ttl
ttl = max(self.worker_ttl, 10 * heartbeat_interval(len(self.workers)))
to_restart = []
for ws in self.workers.values():
last_seen = now - ws.last_seen
if last_seen > ttl:
to_restart.append(ws.address)
logger.warning(
f"Worker failed to heartbeat for {last_seen:.0f}s; "
f"{'attempting restart' if ws.nanny else 'removing'}: {ws}"
)
if to_restart:
self.log_event(
"scheduler",
{
"action": "worker-ttl-timed-out",
"workers": to_restart.copy(),
"ttl": ttl,
},
)
await self.restart_workers(
to_restart,
wait_for_workers=False,
stimulus_id=stimulus_id,
)
def check_idle(self) -> float | None:
if self.status in (Status.closing, Status.closed):
return None # pragma: nocover
if self.transition_counter != self._idle_transition_counter:
self._idle_transition_counter = self.transition_counter
self.idle_since = None
return None
if self._active_graph_updates > 0:
self.idle_since = None
return None
if (
self.queued
or self.unrunnable
or any(ws.processing for ws in self.workers.values())
):
self.idle_since = None
return None
if not self.idle_since:
self.idle_since = time()
return self.idle_since
if self.jupyter:
last_activity = (
self._jupyter_server_application.web_app.last_activity().timestamp()
)
if last_activity > self.idle_since:
self.idle_since = last_activity
return self.idle_since
if self.idle_timeout:
if time() > self.idle_since + self.idle_timeout:
assert self.idle_since
logger.info(
"Scheduler closing after being idle for %s",
format_time(self.idle_timeout),
)
self._ongoing_background_tasks.call_soon(
self.close, reason="idle-timeout-exceeded"
)
return self.idle_since
def _check_no_workers(self) -> None:
if (
self.status in (Status.closing, Status.closed)
or self.no_workers_timeout is None
):
return
now = monotonic()
stimulus_id = f"check-no-workers-timeout-{time()}"
recommendations: Recs = {}
self._refresh_no_workers_since(now)
affected = self._check_unrunnable_task_timeouts(
now, recommendations=recommendations, stimulus_id=stimulus_id
)
affected.update(
self._check_queued_task_timeouts(
now, recommendations=recommendations, stimulus_id=stimulus_id
)
)
self.transitions(recommendations, stimulus_id=stimulus_id)
if affected:
self.log_event(
"scheduler",
{"action": "no-workers-timeout-exceeded", "keys": affected},
)
def _check_unrunnable_task_timeouts(
self, timestamp: float, recommendations: Recs, stimulus_id: str
) -> set[Key]:
assert self.no_workers_timeout
unsatisfied = []
no_workers = []
for ts, unrunnable_since in self.unrunnable.items():
if timestamp <= unrunnable_since + self.no_workers_timeout:
# unrunnable is insertion-ordered, which means that unrunnable_since will
# be monotonically increasing in this loop.
break
if (
self._no_workers_since is None
or self._no_workers_since >= unrunnable_since
):
unsatisfied.append(ts)
else:
no_workers.append(ts)
if not unsatisfied and not no_workers:
return set()
for ts in unsatisfied:
e = pickle.dumps(
NoValidWorkerError(
task=ts.key,
host_restrictions=(ts.host_restrictions or set()).copy(),
worker_restrictions=(ts.worker_restrictions or set()).copy(),
resource_restrictions=(ts.resource_restrictions or {}).copy(),
timeout=self.no_workers_timeout,
),
)
r = self.transition(
ts.key,
"erred",
exception=e,
cause=ts.key,
stimulus_id=stimulus_id,
)
recommendations.update(r)
logger.error(
"Task %s marked as failed because it timed out waiting "
"for its restrictions to become satisfied.",
ts.key,
)
self._fail_tasks_after_no_workers_timeout(
no_workers, recommendations, stimulus_id
)
return {ts.key for ts in concat([unsatisfied, no_workers])}
def _check_queued_task_timeouts(
self, timestamp: float, recommendations: Recs, stimulus_id: str
) -> set[Key]:
assert self.no_workers_timeout
if self._no_workers_since is None:
return set()
if timestamp <= self._no_workers_since + self.no_workers_timeout:
return set()
affected = list(self.queued)
self._fail_tasks_after_no_workers_timeout(
affected, recommendations, stimulus_id
)
return {ts.key for ts in affected}
def _fail_tasks_after_no_workers_timeout(
self, timed_out: Iterable[TaskState], recommendations: Recs, stimulus_id: str
) -> None:
assert self.no_workers_timeout
for ts in timed_out:
e = pickle.dumps(
NoWorkerError(
task=ts.key,
timeout=self.no_workers_timeout,
),
)
r = self.transition(
ts.key,
"erred",
exception=e,
cause=ts.key,
stimulus_id=stimulus_id,
)
recommendations.update(r)
logger.error(
"Task %s marked as failed because it timed out waiting "
"without any running workers.",
ts.key,
)
def _refresh_no_workers_since(self, timestamp: float | None = None) -> None:
if self.running or not (self.queued or self.unrunnable):
self._no_workers_since = None
return
if not self._no_workers_since:
self._no_workers_since = timestamp or monotonic()
return
def adaptive_target(self, target_duration=None):
"""Desired number of workers based on the current workload
This looks at the current running tasks and memory use, and returns a
number of desired workers. This is often used by adaptive scheduling.
Parameters
----------
target_duration : str
A desired duration of time for computations to take. This affects
how rapidly the scheduler will ask to scale.
See Also
--------
distributed.deploy.Adaptive
"""
if target_duration is None:
target_duration = dask.config.get("distributed.adaptive.target-duration")
target_duration = parse_timedelta(target_duration)
# CPU
queued = take(100, concat([self.queued, self.unrunnable.keys()]))
queued_occupancy = 0
for ts in queued:
queued_occupancy += self._get_prefix_duration(ts.prefix)
tasks_ready = len(self.queued) + len(self.unrunnable)
if tasks_ready > 100:
queued_occupancy *= tasks_ready / 100
cpu = math.ceil((self.total_occupancy + queued_occupancy) / target_duration)
# Avoid a few long tasks from asking for many cores
for ws in self.workers.values():
if tasks_ready > cpu:
break
tasks_ready += len(ws.processing)
else:
cpu = min(tasks_ready, cpu)
# Divide by average nthreads per worker
if self.workers:
nthreads = sum(ws.nthreads for ws in self.workers.values())
cpu = math.ceil(cpu / nthreads * len(self.workers))
if (self.unrunnable or self.queued) and not self.workers:
cpu = max(1, cpu)
# add more workers if more than 60% of memory is used
limit = sum(ws.memory_limit for ws in self.workers.values())
used = sum(ws.nbytes for ws in self.workers.values())
memory = 0
if used > 0.6 * limit and limit > 0:
memory = 2 * len(self.workers)
target = max(memory, cpu)
if target >= len(self.workers):
return target
else: # Scale down?
to_close = self.workers_to_close()
return len(self.workers) - len(to_close)
def request_acquire_replicas(
self, addr: str, keys: Iterable[Key], *, stimulus_id: str
) -> None:
"""Asynchronously ask a worker to acquire a replica of the listed keys from
other workers. This is a fire-and-forget operation which offers no feedback for
success or failure, and is intended for housekeeping and not for computation.
"""
who_has = {}
nbytes = {}
for key in keys:
ts = self.tasks[key]
assert ts.who_has
who_has[key] = [ws.address for ws in ts.who_has or ()]
nbytes[key] = ts.nbytes
self.stream_comms[addr].send(
{
"op": "acquire-replicas",
"who_has": who_has,
"nbytes": nbytes,
"stimulus_id": stimulus_id,
},
)
def request_remove_replicas(
self, addr: str, keys: list[Key], *, stimulus_id: str
) -> None:
"""Asynchronously ask a worker to discard its replica of the listed keys.
This must never be used to destroy the last replica of a key. This is a
fire-and-forget operation, intended for housekeeping and not for computation.
The replica disappears immediately from TaskState.who_has on the Scheduler side;
if the worker refuses to delete, e.g. because the task is a dependency of
another task running on it, it will (also asynchronously) inform the scheduler
to re-add itself to who_has. If the worker agrees to discard the task, there is
no feedback.
"""
ws = self.workers[addr]
# The scheduler immediately forgets about the replica and suggests the worker to
# drop it. The worker may refuse, at which point it will send back an add-keys
# message to reinstate it.
for key in keys:
ts = self.tasks[key]
if self.validate:
# Do not destroy the last copy
assert ts.who_has
assert len(ts.who_has) > 1
self.remove_replica(ts, ws)
self.stream_comms[addr].send(
{
"op": "remove-replicas",
"keys": keys,
"stimulus_id": stimulus_id,
}
)
def _task_to_report_msg(ts: TaskState) -> dict[str, Any] | None:
if ts.state == "forgotten":
return {"op": "cancelled-keys", "keys": [ts.key]}
elif ts.state == "memory":
return {"op": "key-in-memory", "key": ts.key}
elif ts.state == "erred":
failing_ts = ts.exception_blame
assert failing_ts
return {
"op": "task-erred",
"key": ts.key,
"exception": failing_ts.exception,
"traceback": failing_ts.traceback,
}
else:
return None
def _task_to_client_msgs(ts: TaskState) -> Msgs:
if ts.who_wants:
report_msg = _task_to_report_msg(ts)
if report_msg is not None:
return {cs.client_key: [report_msg] for cs in ts.who_wants}
return {}
def decide_worker(
ts: TaskState,
all_workers: set[WorkerState],
valid_workers: set[WorkerState] | None,
objective: Callable[[WorkerState], Any],
) -> WorkerState | None:
"""
Decide which worker should take task *ts*.
We choose the worker that has the data on which *ts* depends.
If several workers have dependencies then we choose the less-busy worker.
Optionally provide *valid_workers* of where jobs are allowed to occur
(if all workers are allowed to take the task, pass None instead).
If the task requires data communication because no eligible worker has
all the dependencies already, then we choose to minimize the number
of bytes sent between workers. This is determined by calling the
*objective* function.
"""
assert all(dts.who_has for dts in ts.dependencies)
if ts.actor:
candidates = all_workers.copy()
else:
candidates = {wws for dts in ts.dependencies for wws in dts.who_has or ()}
candidates &= all_workers
if valid_workers is None:
if not candidates:
candidates = all_workers.copy()
else:
candidates &= valid_workers
if not candidates:
candidates = valid_workers
if not candidates:
if ts.loose_restrictions:
return decide_worker(ts, all_workers, None, objective)
if not candidates:
return None
elif len(candidates) == 1:
return next(iter(candidates))
else:
return min(candidates, key=objective)
def validate_task_state(ts: TaskState) -> None:
"""Validate the given TaskState"""
assert ts.state in ALL_TASK_STATES, ts
if ts.waiting_on:
assert ts.waiting_on.issubset(ts.dependencies), (
"waiting not subset of dependencies",
str(ts.waiting_on),
str(ts.dependencies),
)
if ts.waiters:
assert ts.waiters.issubset(ts.dependents), (
"waiters not subset of dependents",
str(ts.waiters),
str(ts.dependents),
)
for dts in ts.waiting_on or ():
assert not dts.who_has, ("waiting on in-memory dep", str(ts), str(dts))
assert dts.state != "released", ("waiting on released dep", str(ts), str(dts))
for dts in ts.dependencies:
assert ts in dts.dependents, (
"not in dependency's dependents",
str(ts),
str(dts),
str(dts.dependents),
)
if ts.state in ("waiting", "queued", "processing", "no-worker"):
assert ts.waiting_on and dts in ts.waiting_on or dts.who_has, (
"dep missing",
str(ts),
str(dts),
)
assert dts.state != "forgotten"
for dts in ts.waiters or ():
assert dts.state in ("waiting", "queued", "processing", "no-worker"), (
"waiter not in play",
str(ts),
str(dts),
)
for dts in ts.dependents:
assert ts in dts.dependencies, (
"not in dependent's dependencies",
str(ts),
str(dts),
str(dts.dependencies),
)
assert dts.state != "forgotten"
assert (ts.processing_on is not None) == (ts.state == "processing")
assert bool(ts.who_has) == (ts.state == "memory"), (ts, ts.who_has, ts.state)
if ts.state == "queued":
assert not ts.processing_on
assert not ts.who_has
assert all(dts.who_has for dts in ts.dependencies), (
"task queued without all deps",
str(ts),
str(ts.dependencies),
)
if ts.state == "processing":
assert all(dts.who_has for dts in ts.dependencies), (
"task processing without all deps",
str(ts),
str(ts.dependencies),
)
assert not ts.waiting_on
if ts.who_has:
assert ts.waiters or ts.who_wants, (
"unneeded task in memory",
str(ts),
str(ts.who_has),
)
if ts.run_spec: # was computed
assert ts.type
assert isinstance(ts.type, str)
assert not any(
[
ts in dts.waiting_on
for dts in ts.dependents
if dts.waiting_on is not None
]
)
for ws in ts.who_has:
assert ts in ws.has_what, (
"not in who_has' has_what",
str(ts),
str(ws),
str(ws.has_what),
)
for cs in ts.who_wants or ():
assert ts in cs.wants_what, (
"not in who_wants' wants_what",
str(ts),
str(cs),
str(cs.wants_what),
)
if ts.actor:
if ts.state == "memory":
assert ts.who_has
assert sum(ts in ws.actors for ws in ts.who_has) == 1
if ts.state == "processing":
assert ts.processing_on
assert ts in ts.processing_on.actors
assert ts.state != "queued"
def validate_unrunnable(unrunnable: dict[TaskState, float]) -> None:
prev_unrunnable_since: float | None = None
prev_ts: TaskState | None = None
for ts, unrunnable_since in unrunnable.items():
assert ts.state == "no-worker"
if prev_ts is not None:
assert prev_unrunnable_since is not None
# Ensure that unrunnable_since is monotonically increasing when iterating over unrunnable.
# _check_no_workers relies on this.
assert prev_unrunnable_since <= unrunnable_since, (
prev_ts,
ts,
prev_unrunnable_since,
unrunnable_since,
)
prev_ts = ts
prev_unrunnable_since = unrunnable_since
def validate_worker_state(ws: WorkerState) -> None:
for ts in ws.has_what or ():
assert ts.who_has
assert ws in ts.who_has, (
"not in has_what' who_has",
str(ws),
str(ts),
str(ts.who_has),
)
for ts in ws.actors:
assert ts.state in ("memory", "processing")
def validate_state(
tasks: dict[Key, TaskState],
workers: dict[str, WorkerState],
clients: dict[str, ClientState],
) -> None:
"""Validate a current runtime state.
This performs a sequence of checks on the entire graph, running in about linear
time. This raises assert errors if anything doesn't check out.
"""
for ts in tasks.values():
validate_task_state(ts)
for ws in workers.values():
validate_worker_state(ws)
for cs in clients.values():
for ts in cs.wants_what or ():
assert ts.who_wants
assert cs in ts.who_wants, (
"not in wants_what' who_wants",
str(cs),
str(ts),
str(ts.who_wants),
)
def heartbeat_interval(n: int) -> float:
"""Interval in seconds that we desire heartbeats based on number of workers"""
if n <= 10:
return 0.5
elif n < 50:
return 1
elif n < 200:
return 2
else:
# No more than 200 heartbeats a second scaled by workers
return n / 200 + 1
def _task_slots_available(ws: WorkerState, saturation_factor: float) -> int:
"""Number of tasks that can be sent to this worker without oversaturating it"""
assert not math.isinf(saturation_factor)
return max(math.ceil(saturation_factor * ws.nthreads), 1) - (
len(ws.processing) - len(ws.long_running)
)
def _worker_full(ws: WorkerState, saturation_factor: float) -> bool:
if math.isinf(saturation_factor):
return False
return _task_slots_available(ws, saturation_factor) <= 0
class KilledWorker(Exception):
def __init__(self, task: Key, last_worker: WorkerState, allowed_failures: int):
super().__init__(task, last_worker, allowed_failures)
@property
def task(self) -> Key:
return self.args[0]
@property
def last_worker(self) -> WorkerState:
return self.args[1]
@property
def allowed_failures(self) -> int:
return self.args[2]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} on {self.allowed_failures + 1} "
"different workers, but all those workers died while running it. "
f"The last worker that attempt to run the task was {self.last_worker.address}. "
"Inspecting worker logs is often a good next step to diagnose what went wrong. "
"For more information see https://distributed.dask.org/en/stable/killed.html."
)
class NoValidWorkerError(Exception):
def __init__(
self,
task: Key,
host_restrictions: set[str],
worker_restrictions: set[str],
resource_restrictions: dict[str, float],
timeout: float,
):
super().__init__(
task, host_restrictions, worker_restrictions, resource_restrictions, timeout
)
@property
def task(self) -> Key:
return self.args[0]
@property
def host_restrictions(self) -> Any:
return self.args[1]
@property
def worker_restrictions(self) -> Any:
return self.args[2]
@property
def resource_restrictions(self) -> Any:
return self.args[3]
@property
def timeout(self) -> float:
return self.args[4]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} but timed out after {format_time(self.timeout)} "
"waiting for a valid worker matching all restrictions.\n\nRestrictions:\n"
f"host_restrictions={self.host_restrictions!s}\n"
f"worker_restrictions={self.worker_restrictions!s}\n"
f"resource_restrictions={self.resource_restrictions!s}\n"
)
class NoWorkerError(Exception):
def __init__(self, task: Key, timeout: float):
super().__init__(task, timeout)
@property
def task(self) -> Key:
return self.args[0]
@property
def timeout(self) -> float:
return self.args[1]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} but timed out after {format_time(self.timeout)} "
"waiting without any running workers."
)
class WorkerStatusPlugin(SchedulerPlugin):
"""A plugin to share worker status with a remote observer
This is used in cluster managers to keep updated about the status of the scheduler.
"""
name: ClassVar[str] = "worker-status"
bcomm: BatchedSend
def __init__(self, scheduler: Scheduler, comm: Comm):
self.bcomm = BatchedSend(interval="5ms")
self.bcomm.start(comm)
scheduler.add_plugin(self)
def add_worker(self, scheduler: Scheduler, worker: str) -> None:
ident = scheduler.workers[worker].identity()
del ident["metrics"]
del ident["last_seen"]
try:
self.bcomm.send(["add", {"workers": {worker: ident}}])
except CommClosedError:
scheduler.remove_plugin(name=self.name)
def remove_worker(self, scheduler: Scheduler, worker: str, **kwargs: Any) -> None:
try:
self.bcomm.send(["remove", worker])
except CommClosedError:
scheduler.remove_plugin(name=self.name)
def teardown(self) -> None:
self.bcomm.close()
class CollectTaskMetaDataPlugin(SchedulerPlugin):
scheduler: Scheduler
name: str
keys: set[Key]
metadata: dict[Key, Any]
state: dict[Key, TaskStateState]
def __init__(self, scheduler: Scheduler, name: str):
self.scheduler = scheduler
self.name = name
self.keys = set()
self.metadata = {}
self.state = {}
def update_graph(
self,
scheduler: Scheduler,
*,
keys: set[Key],
**kwargs: Any,
) -> None:
self.keys.update(keys)
def transition(
self,
key: Key,
start: TaskStateState,
finish: TaskStateState,
*args: Any,
**kwargs: Any,
) -> None:
if finish in ("memory", "erred"):
ts = self.scheduler.tasks.get(key)
if ts is not None and ts.key in self.keys:
self.metadata[key] = ts.metadata
self.state[key] = finish
self.keys.discard(key)
def _materialize_graph(
expr: Expr,
global_annotations: dict[str, Any],
validate: bool,
) -> tuple[dict[Key, T_runspec], dict[Key, set[Key]], dict[str, dict[Key, Any]]]:
> dsk: dict = expr.__dask_graph__()
E AttributeError: 'dict' object has no attribute '__dask_graph__'
distributed/scheduler.py:9383: AttributeError
Check warning on line 0 in distributed.cli.tests.test_dask_worker
github-actions / Unit Test Results
1 out of 5 runs failed: test_contact_listen_address[tcp://127.0.0.2:---no-nanny] (distributed.cli.tests.test_dask_worker)
artifacts/ubuntu-latest-mindeps-default-notci1/pytest.xml [took 0s]
Raw output
AttributeError: 'dict' object has no attribute '__dask_graph__'
c = <Client: No scheduler connected>
s = <Scheduler 'tcp://127.0.0.1:34705', workers: 0, cores: 0, tasks: 0>
nanny = '--no-nanny', listen_address = 'tcp://127.0.0.2:60837'
@pytest.mark.slow
@pytest.mark.skipif(not LINUX, reason="Need 127.0.0.2 to mean localhost")
@pytest.mark.parametrize("nanny", ["--nanny", "--no-nanny"])
@pytest.mark.parametrize("listen_address", ["tcp://0.0.0.0:", "tcp://127.0.0.2:"])
@gen_cluster(client=True, nthreads=[])
async def test_contact_listen_address(c, s, nanny, listen_address):
port = open_port()
listen_address += str(port)
with popen(
[
sys.executable,
"-m",
"dask",
"worker",
s.address,
nanny,
"--no-dashboard",
"--contact-address",
f"tcp://127.0.0.2:{port}",
"--listen-address",
listen_address,
]
):
await c.wait_for_workers(1)
info = c.scheduler_info()
assert info["workers"].keys() == {f"tcp://127.0.0.2:{port}"}
# roundtrip works
> assert await c.submit(lambda x: x + 1, 10) == 11
distributed/cli/tests/test_dask_worker.py:500:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
distributed/client.py:408: in _result
raise exc.with_traceback(tb)
distributed/utils.py:1507: in run_in_executor_with_context
return await loop.run_in_executor(
../../../miniconda3/envs/dask-distributed/lib/python3.10/concurrent/futures/thread.py:58: in run
result = self.fn(*self.args, **self.kwargs)
distributed/utils.py:1508: in <lambda>
executor, lambda: context.run(func, *args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
from __future__ import annotations
import asyncio
import contextlib
import dataclasses
import heapq
import inspect
import itertools
import json
import logging
import math
import operator
import os
import pickle
import random
import textwrap
import uuid
import warnings
import weakref
from abc import abstractmethod
from collections import defaultdict, deque
from collections.abc import (
Callable,
Collection,
Container,
Hashable,
Iterable,
Iterator,
Mapping,
Sequence,
Set,
)
from contextlib import suppress
from functools import partial
from typing import TYPE_CHECKING, Any, ClassVar, Literal, NamedTuple, cast, overload
import psutil
import tornado.web
from sortedcontainers import SortedDict, SortedSet
from tlz import (
concat,
first,
groupby,
merge,
merge_sorted,
merge_with,
partition,
pluck,
second,
take,
valmap,
)
from tornado.ioloop import IOLoop
import dask
import dask.utils
from dask._task_spec import DependenciesMapping, GraphNode, convert_legacy_graph
from dask.core import istask, validate_key
from dask.typing import Key, no_default
from dask.utils import (
_deprecated,
_deprecated_kwarg,
format_bytes,
format_time,
key_split,
parse_bytes,
parse_timedelta,
tmpfile,
)
from dask.widgets import get_template
from distributed import cluster_dump, preloading, profile
from distributed import versions as version_module
from distributed._asyncio import RLock
from distributed._stories import scheduler_story
from distributed.active_memory_manager import ActiveMemoryManagerExtension, RetireWorker
from distributed.batched import BatchedSend
from distributed.broker import Broker
from distributed.client import SourceCode
from distributed.collections import HeapSet
from distributed.comm import (
Comm,
CommClosedError,
get_address_host,
normalize_address,
resolve_address,
unparse_host_port,
)
from distributed.comm.addressing import addresses_from_user_args
from distributed.compatibility import PeriodicCallback
from distributed.core import (
ErrorMessage,
OKMessage,
Status,
clean_exception,
error_message,
rpc,
send_recv,
)
from distributed.diagnostics.memory_sampler import MemorySamplerExtension
from distributed.diagnostics.plugin import SchedulerPlugin, _get_plugin_name
from distributed.event import EventExtension
from distributed.gc import disable_gc_diagnosis, enable_gc_diagnosis
from distributed.http import get_handlers
from distributed.metrics import monotonic, time
from distributed.multi_lock import MultiLockExtension
from distributed.node import ServerNode
from distributed.proctitle import setproctitle
from distributed.protocol import deserialize
from distributed.protocol.pickle import dumps, loads
from distributed.protocol.serialize import Serialized, ToPickle, serialize
from distributed.publish import PublishExtension
from distributed.pubsub import PubSubSchedulerExtension
from distributed.queues import QueueExtension
from distributed.recreate_tasks import ReplayTaskScheduler
from distributed.security import Security
from distributed.semaphore import SemaphoreExtension
from distributed.shuffle import ShuffleSchedulerPlugin
from distributed.spans import SpanMetadata, SpansSchedulerExtension
from distributed.stealing import WorkStealing
from distributed.utils import (
All,
Deadline,
TimeoutError,
format_dashboard_link,
get_fileno_limit,
key_split_group,
log_errors,
offload,
recursive_to_dict,
wait_for,
)
from distributed.utils_comm import (
gather_from_workers,
retry_operation,
scatter_to_workers,
)
from distributed.variable import VariableExtension
if TYPE_CHECKING:
# TODO import from typing (requires Python >=3.10)
# TODO import from typing (requires Python >=3.11)
from typing_extensions import Self, TypeAlias
from dask._expr import Expr
# Not to be confused with distributed.worker_state_machine.TaskStateState
TaskStateState: TypeAlias = Literal[
"released",
"waiting",
"no-worker",
"queued",
"processing",
"memory",
"erred",
"forgotten",
]
ALL_TASK_STATES: Set[TaskStateState] = set(TaskStateState.__args__) # type: ignore
# {task key -> finish state}
# Not to be confused with distributed.worker_state_machine.Recs
Recs: TypeAlias = dict[Key, TaskStateState]
# {client or worker address: [{op: <key>, ...}, ...]}
Msgs: TypeAlias = dict[str, list[dict[str, Any]]]
# (recommendations, client messages, worker messages)
RecsMsgs: TypeAlias = tuple[Recs, Msgs, Msgs]
T_runspec: TypeAlias = GraphNode
logger = logging.getLogger(__name__)
LOG_PDB = dask.config.get("distributed.admin.pdb-on-err")
DEFAULT_DATA_SIZE = parse_bytes(
dask.config.get("distributed.scheduler.default-data-size")
)
STIMULUS_ID_UNSET = "<stimulus_id unset>"
DEFAULT_EXTENSIONS = {
"multi_locks": MultiLockExtension,
"publish": PublishExtension,
"replay-tasks": ReplayTaskScheduler,
"queues": QueueExtension,
"variables": VariableExtension,
"pubsub": PubSubSchedulerExtension,
"semaphores": SemaphoreExtension,
"events": EventExtension,
"amm": ActiveMemoryManagerExtension,
"memory_sampler": MemorySamplerExtension,
"shuffle": ShuffleSchedulerPlugin,
"spans": SpansSchedulerExtension,
"stealing": WorkStealing,
}
class ClientState:
"""A simple object holding information about a client."""
#: A unique identifier for this client. This is generally an opaque
#: string generated by the client itself.
client_key: str
#: Cached hash of :attr:`~ClientState.client_key`
_hash: int
#: A set of tasks this client wants to be kept in memory, so that it can download
#: its result when desired. This is the reverse mapping of
#: :class:`TaskState.who_wants`. Tasks are typically removed from this set when the
#: corresponding object in the client's space (for example a ``Future`` or a Dask
#: collection) gets garbage-collected.
wants_what: set[TaskState]
#: The last time we received a heartbeat from this client, in local scheduler time.
last_seen: float
#: Output of :func:`distributed.versions.get_versions` on the client
versions: dict[str, Any]
__slots__ = tuple(__annotations__)
def __init__(self, client: str, *, versions: dict[str, Any] | None = None):
self.client_key = client
self._hash = hash(client)
self.wants_what = set()
self.last_seen = time()
self.versions = versions or {}
def __hash__(self) -> int:
return self._hash
def __eq__(self, other: object) -> bool:
if not isinstance(other, ClientState):
return False
return self.client_key == other.client_key
def __repr__(self) -> str:
return f"<Client {self.client_key!r}>"
def __str__(self) -> str:
return self.client_key
def _to_dict_no_nest(self, *, exclude: Container[str] = ()) -> dict:
"""Dictionary representation for debugging purposes.
Not type stable and not intended for roundtrips.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
TaskState._to_dict
"""
return recursive_to_dict(
self,
exclude=set(exclude) | {"versions"}, # type: ignore
members=True,
)
class MemoryState:
"""Memory readings on a worker or on the whole cluster.
See :doc:`worker-memory`.
Attributes / properties:
managed_total
Sum of the output of sizeof() for all dask keys held by the worker in memory,
plus number of bytes spilled to disk
managed
Sum of the output of sizeof() for the dask keys held in RAM. Note that this may
be inaccurate, which may cause inaccurate unmanaged memory (see below).
spilled
Number of bytes for the dask keys spilled to the hard drive.
Note that this is the size on disk; size in memory may be different due to
compression and inaccuracies in sizeof(). In other words, given the same keys,
'managed' will change depending on the keys being in memory or spilled.
process
Total RSS memory measured by the OS on the worker process.
This is always exactly equal to managed + unmanaged.
unmanaged
process - managed. This is the sum of
- Python interpreter and modules
- global variables
- memory temporarily allocated by the dask tasks that are currently running
- memory fragmentation
- memory leaks
- memory not yet garbage collected
- memory not yet free()'d by the Python memory manager to the OS
unmanaged_old
Minimum of the 'unmanaged' measures over the last
``distributed.memory.recent-to-old-time`` seconds
unmanaged_recent
unmanaged - unmanaged_old; in other words process memory that has been recently
allocated but is not accounted for by dask; hopefully it's mostly a temporary
spike.
optimistic
managed + unmanaged_old; in other words the memory held long-term by
the process under the hopeful assumption that all unmanaged_recent memory is a
temporary spike
"""
process: int
unmanaged_old: int
managed: int
spilled: int
__slots__ = tuple(__annotations__)
def __init__(
self,
*,
process: int,
unmanaged_old: int,
managed: int,
spilled: int,
):
# Some data arrives with the heartbeat, some other arrives in realtime as the
# tasks progress. Also, sizeof() is not guaranteed to return correct results.
# This can cause glitches where a partial measure is larger than the whole, so
# we need to force all numbers to add up exactly by definition.
self.process = process
self.managed = min(self.process, managed)
self.spilled = spilled
# Subtractions between unsigned ints guaranteed by construction to be >= 0
self.unmanaged_old = min(unmanaged_old, process - self.managed)
@staticmethod
def sum(*infos: MemoryState) -> MemoryState:
process = 0
unmanaged_old = 0
managed = 0
spilled = 0
for ms in infos:
process += ms.process
unmanaged_old += ms.unmanaged_old
spilled += ms.spilled
managed += ms.managed
return MemoryState(
process=process,
unmanaged_old=unmanaged_old,
managed=managed,
spilled=spilled,
)
@property
def managed_total(self) -> int:
return self.managed + self.spilled
@property
def unmanaged(self) -> int:
# This is never negative thanks to __init__
return self.process - self.managed
@property
def unmanaged_recent(self) -> int:
# This is never negative thanks to __init__
return self.process - self.managed - self.unmanaged_old
@property
def optimistic(self) -> int:
return self.managed + self.unmanaged_old
@property
def managed_in_memory(self) -> int:
warnings.warn("managed_in_memory has been renamed to managed", FutureWarning)
return self.managed
@property
def managed_spilled(self) -> int:
warnings.warn("managed_spilled has been renamed to spilled", FutureWarning)
return self.spilled
def __repr__(self) -> str:
return (
f"Process memory (RSS) : {format_bytes(self.process)}\n"
f" - managed by Dask : {format_bytes(self.managed)}\n"
f" - unmanaged (old) : {format_bytes(self.unmanaged_old)}\n"
f" - unmanaged (recent): {format_bytes(self.unmanaged_recent)}\n"
f"Spilled to disk : {format_bytes(self.spilled)}\n"
)
def _to_dict(self, *, exclude: Container[str] = ()) -> dict:
"""Dictionary representation for debugging purposes.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
"""
return {
k: getattr(self, k)
for k in dir(self)
if not k.startswith("_")
and k not in {"sum", "managed_in_memory", "managed_spilled"}
}
class WorkerState:
"""A simple object holding information about a worker.
Not to be confused with :class:`distributed.worker_state_machine.WorkerState`.
"""
#: This worker's unique key. This can be its connected address
#: (such as ``"tcp://127.0.0.1:8891"``) or an alias (such as ``"alice"``).
address: str
pid: int
name: Hashable
#: The number of CPU threads made available on this worker
nthreads: int
#: Memory available to the worker, in bytes
memory_limit: int
local_directory: str
services: dict[str, int]
#: Output of :meth:`distributed.versions.get_versions` on the worker
versions: dict[str, Any]
#: Address of the associated :class:`~distributed.nanny.Nanny`, if present
nanny: str | None
#: Read-only worker status, synced one way from the remote Worker object
status: Status
#: Cached hash of :attr:`~WorkerState.server_id`
_hash: int
#: The total memory size, in bytes, used by the tasks this worker holds in memory
#: (i.e. the tasks in this worker's :attr:`~WorkerState.has_what`).
nbytes: int
#: Worker memory unknown to the worker, in bytes, which has been there for more than
#: 30 seconds. See :class:`MemoryState`.
_memory_unmanaged_old: int
#: History of the last 30 seconds' worth of unmanaged memory. Used to differentiate
#: between "old" and "new" unmanaged memory.
#: Format: ``[(timestamp, bytes), (timestamp, bytes), ...]``
_memory_unmanaged_history: deque[tuple[float, int]]
metrics: dict[str, Any]
#: The last time we received a heartbeat from this worker, in local scheduler time.
last_seen: float
time_delay: float
bandwidth: float
#: A set of all TaskStates on this worker that are actors. This only includes those
#: actors whose state actually lives on this worker, not actors to which this worker
#: has a reference.
actors: set[TaskState]
#: Underlying data of :meth:`WorkerState.has_what`
_has_what: dict[TaskState, None]
#: A set of tasks that have been submitted to this worker. Multiple tasks may be
# submitted to a worker in advance and the worker will run them eventually,
# depending on its execution resources (but see :doc:`work-stealing`).
#:
#: All the tasks here are in the "processing" state.
#: This attribute is kept in sync with :attr:`TaskState.processing_on`.
processing: set[TaskState]
#: Running tasks that invoked :func:`distributed.secede`
long_running: set[TaskState]
#: A dictionary of tasks that are currently being run on this worker.
#: Each task state is associated with the duration in seconds which the task has
#: been running.
executing: dict[TaskState, float]
#: The available resources on this worker, e.g. ``{"GPU": 2}``.
#: These are abstract quantities that constrain certain tasks from running at the
#: same time on this worker.
resources: dict[str, float]
#: The sum of each resource used by all tasks allocated to this worker.
#: The numbers in this dictionary can only be less or equal than those in this
#: worker's :attr:`~WorkerState.resources`.
used_resources: dict[str, float]
#: Arbitrary additional metadata to be added to :meth:`~WorkerState.identity`
extra: dict[str, Any]
# The unique server ID this WorkerState is referencing
server_id: str
# Reference to scheduler task_groups
scheduler_ref: weakref.ref[SchedulerState] | None
task_prefix_count: defaultdict[str, int]
_network_occ: int
_occupancy_cache: float | None
#: Keys that may need to be fetched to this worker, and the number of tasks that need them.
#: All tasks are currently in `memory` on a worker other than this one.
#: Much like `processing`, this does not exactly reflect worker state:
#: keys here may be queued to fetch, in flight, or already in memory
#: on the worker.
needs_what: dict[TaskState, int]
__slots__ = tuple(__annotations__)
def __init__(
self,
*,
address: str,
status: Status,
pid: int,
name: object,
nthreads: int = 0,
memory_limit: int,
local_directory: str,
nanny: str | None,
server_id: str,
services: dict[str, int] | None = None,
versions: dict[str, Any] | None = None,
extra: dict[str, Any] | None = None,
scheduler: SchedulerState | None = None,
):
self.server_id = server_id
self.address = address
self.pid = pid
self.name = name
self.nthreads = nthreads
self.memory_limit = memory_limit
self.local_directory = local_directory
self.services = services or {}
self.versions = versions or {}
self.nanny = nanny
self.status = status
self._hash = hash(self.server_id)
self.nbytes = 0
self._memory_unmanaged_old = 0
self._memory_unmanaged_history = deque()
self.metrics = {}
self.last_seen = time()
self.time_delay = 0
self.bandwidth = parse_bytes(dask.config.get("distributed.scheduler.bandwidth"))
self.actors = set()
self._has_what = {}
self.processing = set()
self.long_running = set()
self.executing = {}
self.resources = {}
self.used_resources = {}
self.extra = extra or {}
self.scheduler_ref = weakref.ref(scheduler) if scheduler else None
self.task_prefix_count = defaultdict(int)
self.needs_what = {}
self._network_occ = 0
self._occupancy_cache = None
def __hash__(self) -> int:
return self._hash
def __eq__(self, other: object) -> bool:
return self is other or (
isinstance(other, WorkerState) and other.server_id == self.server_id
)
@property
def has_what(self) -> Set[TaskState]:
"""An insertion-sorted set-like of tasks which currently reside on this worker.
All the tasks here are in the "memory" state.
This is the reverse mapping of :attr:`TaskState.who_has`.
This is a read-only public accessor. The data is implemented as a dict without
values, because rebalance() relies on dicts being insertion-sorted.
"""
return self._has_what.keys()
@property
def host(self) -> str:
return get_address_host(self.address)
@property
def memory(self) -> MemoryState:
"""Polished memory metrics for the worker.
**Design note on managed memory**
There are two measures available for managed memory:
- ``self.nbytes``
- ``self.metrics["managed_bytes"]``
At rest, the two numbers must be identical. However, ``self.nbytes`` is
immediately updated through the batched comms as soon as each task lands in
memory on the worker; ``self.metrics["managed_bytes"]`` instead is updated by
the heartbeat, which can lag several seconds behind.
Below we are mixing likely newer managed memory info from ``self.nbytes`` with
process and spilled memory from the heartbeat. This is deliberate, so that
managed memory total is updated more frequently.
Managed memory directly and immediately contributes to optimistic memory, which
is in turn used in Active Memory Manager heuristics (at the moment of writing;
more uses will likely be added in the future). So it's important to have it
up to date; much more than it is for process memory.
Having up-to-date managed memory info as soon as the scheduler learns about
task completion also substantially simplifies unit tests.
The flip side of this design is that it may cause some noise in the
unmanaged_recent measure. e.g.:
1. Delete 100MB of managed data
2. The updated managed memory reaches the scheduler faster than the
updated process memory
3. There's a blip where the scheduler thinks that there's a sudden 100MB
increase in unmanaged_recent, since process memory hasn't changed but managed
memory has decreased by 100MB
4. When the heartbeat arrives, process memory goes down and so does the
unmanaged_recent.
This is OK - one of the main reasons for the unmanaged_recent / unmanaged_old
split is exactly to concentrate all the noise in unmanaged_recent and exclude it
from optimistic memory, which is used for heuristics.
Something that is less OK, but also less frequent, is that the sudden deletion
of spilled keys will cause a negative blip in managed memory:
1. Delete 100MB of spilled data
2. The updated managed memory *total* reaches the scheduler faster than the
updated spilled portion
3. This causes the managed memory to temporarily plummet and be replaced by
unmanaged_recent, while spilled memory remains unaltered
4. When the heartbeat arrives, managed goes back up, unmanaged_recent
goes back down, and spilled goes down by 100MB as it should have to
begin with.
:issue:`6002` will let us solve this.
"""
return MemoryState(
process=self.metrics["memory"],
managed=max(0, self.nbytes - self.metrics["spilled_bytes"]["memory"]),
spilled=self.metrics["spilled_bytes"]["disk"],
unmanaged_old=self._memory_unmanaged_old,
)
def clean(self) -> WorkerState:
"""Return a version of this object that is appropriate for serialization"""
ws = WorkerState(
address=self.address,
status=self.status,
pid=self.pid,
name=self.name,
nthreads=self.nthreads,
memory_limit=self.memory_limit,
local_directory=self.local_directory,
services=self.services,
nanny=self.nanny,
extra=self.extra,
server_id=self.server_id,
)
ws._occupancy_cache = self.occupancy
ws.executing = {ts.key: duration for ts, duration in self.executing.items()} # type: ignore
return ws
def __repr__(self) -> str:
name = f", name: {self.name}" if self.name != self.address else ""
return (
f"<WorkerState {self.address!r}{name}, "
f"status: {self.status.name}, "
f"memory: {len(self.has_what)}, "
f"processing: {len(self.processing)}>"
)
def _repr_html_(self) -> str:
return get_template("worker_state.html.j2").render(
address=self.address,
name=self.name,
status=self.status.name,
has_what=self.has_what,
processing=self.processing,
)
def identity(self) -> dict[str, Any]:
return {
"type": "Worker",
"id": self.name,
"host": self.host,
"resources": self.resources,
"local_directory": self.local_directory,
"name": self.name,
"nthreads": self.nthreads,
"memory_limit": self.memory_limit,
"last_seen": self.last_seen,
"services": self.services,
"metrics": self.metrics,
"status": self.status.name,
"nanny": self.nanny,
**self.extra,
}
def _to_dict_no_nest(self, *, exclude: Container[str] = ()) -> dict[str, Any]:
"""Dictionary representation for debugging purposes.
Not type stable and not intended for roundtrips.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
TaskState._to_dict
"""
return recursive_to_dict(
self,
exclude=set(exclude) | {"versions"}, # type: ignore
members=True,
)
@property
def scheduler(self) -> SchedulerState:
assert self.scheduler_ref
s = self.scheduler_ref()
assert s
return s
def add_to_processing(self, ts: TaskState) -> None:
"""Assign a task to this worker for compute."""
if self.scheduler.validate:
assert ts not in self.processing
tp = ts.prefix
self.task_prefix_count[tp.name] += 1
self.scheduler._task_prefix_count_global[tp.name] += 1
self.processing.add(ts)
for dts in ts.dependencies:
assert dts.who_has
if self not in dts.who_has:
self._inc_needs_replica(dts)
def add_to_long_running(self, ts: TaskState) -> None:
if self.scheduler.validate:
assert ts in self.processing
assert ts not in self.long_running
self._remove_from_task_prefix_count(ts)
# Cannot remove from processing since we're using this for things like
# idleness detection. Idle workers are typically targeted for
# downscaling but we should not downscale workers with long running
# tasks
self.long_running.add(ts)
def remove_from_processing(self, ts: TaskState) -> None:
"""Remove a task from a workers processing"""
if self.scheduler.validate:
assert ts in self.processing
if ts in self.long_running:
self.long_running.discard(ts)
else:
self._remove_from_task_prefix_count(ts)
self.processing.remove(ts)
for dts in ts.dependencies:
if dts in self.needs_what:
self._dec_needs_replica(dts)
def _remove_from_task_prefix_count(self, ts: TaskState) -> None:
prefix_name = ts.prefix.name
count = self.task_prefix_count[prefix_name] - 1
tp_count = self.task_prefix_count
tp_count_global = self.scheduler._task_prefix_count_global
if count:
tp_count[prefix_name] = count
else:
del tp_count[prefix_name]
count = tp_count_global[prefix_name] - 1
if count:
tp_count_global[prefix_name] = count
else:
del tp_count_global[prefix_name]
def remove_replica(self, ts: TaskState) -> None:
"""The worker no longer has a task in memory"""
if self.scheduler.validate:
assert ts.who_has
assert self in ts.who_has
assert ts in self.has_what
assert ts not in self.nee…cheduler, title="Scheduler Profile (administrative)"
)
task_stream = TabPanel(child=task_stream, title="Task Stream")
bandwidth_workers = TabPanel(
child=bandwidth_workers.root, title="Bandwidth (Workers)"
)
bandwidth_types = TabPanel(
child=bandwidth_types.root, title="Bandwidth (Types)"
)
system = TabPanel(child=sysmon.root, title="System")
logs = TabPanel(child=logs.root, title="Scheduler Logs")
tabs = Tabs(
tabs=[
html,
task_stream,
system,
logs,
compute,
workers,
scheduler,
bandwidth_workers,
bandwidth_types,
],
sizing_mode="stretch_both",
)
from bokeh.core.templates import get_env
from bokeh.plotting import output_file, save
with tmpfile(extension=".html") as fn:
output_file(filename=fn, title="Dask Performance Report", mode=mode)
template_directory = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "dashboard", "templates"
)
template_environment = get_env()
template_environment.loader.searchpath.append(template_directory)
template = template_environment.get_template("performance_report.html")
save(tabs, filename=fn, template=template)
with open(fn) as f:
data = f.read()
return data
async def get_worker_logs(self, n=None, workers=None, nanny=False):
results = await self.broadcast(
msg={"op": "get_logs", "n": n}, workers=workers, nanny=nanny
)
return results
def log_event(self, topic: str | Collection[str], msg: Any) -> None:
"""Log an event under a given topic
Parameters
----------
topic : str, list[str]
Name of the topic under which to log an event. To log the same
event under multiple topics, pass a list of topic names.
msg
Event message to log. Note this must be msgpack serializable.
See also
--------
Client.log_event
"""
self._broker.publish(topic, msg)
def subscribe_topic(self, topic: str, client: str) -> None:
self._broker.subscribe(topic, client)
def unsubscribe_topic(self, topic: str, client: str) -> None:
self._broker.unsubscribe(topic, client)
@overload
def get_events(self, topic: str) -> tuple[tuple[float, Any], ...]: ...
@overload
def get_events(self) -> dict[str, tuple[tuple[float, Any], ...]]: ...
def get_events(
self, topic: str | None = None
) -> tuple[tuple[float, Any], ...] | dict[str, tuple[tuple[float, Any], ...]]:
return self._broker.get_events(topic)
async def get_worker_monitor_info(self, recent=False, starts=None):
if starts is None:
starts = {}
results = await asyncio.gather(
*(
self.rpc(w).get_monitor_info(recent=recent, start=starts.get(w, 0))
for w in self.workers
)
)
return dict(zip(self.workers, results))
###########
# Cleanup #
###########
@log_errors
async def check_worker_ttl(self) -> None:
now = time()
stimulus_id = f"check-worker-ttl-{now}"
assert self.worker_ttl
ttl = max(self.worker_ttl, 10 * heartbeat_interval(len(self.workers)))
to_restart = []
for ws in self.workers.values():
last_seen = now - ws.last_seen
if last_seen > ttl:
to_restart.append(ws.address)
logger.warning(
f"Worker failed to heartbeat for {last_seen:.0f}s; "
f"{'attempting restart' if ws.nanny else 'removing'}: {ws}"
)
if to_restart:
self.log_event(
"scheduler",
{
"action": "worker-ttl-timed-out",
"workers": to_restart.copy(),
"ttl": ttl,
},
)
await self.restart_workers(
to_restart,
wait_for_workers=False,
stimulus_id=stimulus_id,
)
def check_idle(self) -> float | None:
if self.status in (Status.closing, Status.closed):
return None # pragma: nocover
if self.transition_counter != self._idle_transition_counter:
self._idle_transition_counter = self.transition_counter
self.idle_since = None
return None
if self._active_graph_updates > 0:
self.idle_since = None
return None
if (
self.queued
or self.unrunnable
or any(ws.processing for ws in self.workers.values())
):
self.idle_since = None
return None
if not self.idle_since:
self.idle_since = time()
return self.idle_since
if self.jupyter:
last_activity = (
self._jupyter_server_application.web_app.last_activity().timestamp()
)
if last_activity > self.idle_since:
self.idle_since = last_activity
return self.idle_since
if self.idle_timeout:
if time() > self.idle_since + self.idle_timeout:
assert self.idle_since
logger.info(
"Scheduler closing after being idle for %s",
format_time(self.idle_timeout),
)
self._ongoing_background_tasks.call_soon(
self.close, reason="idle-timeout-exceeded"
)
return self.idle_since
def _check_no_workers(self) -> None:
if (
self.status in (Status.closing, Status.closed)
or self.no_workers_timeout is None
):
return
now = monotonic()
stimulus_id = f"check-no-workers-timeout-{time()}"
recommendations: Recs = {}
self._refresh_no_workers_since(now)
affected = self._check_unrunnable_task_timeouts(
now, recommendations=recommendations, stimulus_id=stimulus_id
)
affected.update(
self._check_queued_task_timeouts(
now, recommendations=recommendations, stimulus_id=stimulus_id
)
)
self.transitions(recommendations, stimulus_id=stimulus_id)
if affected:
self.log_event(
"scheduler",
{"action": "no-workers-timeout-exceeded", "keys": affected},
)
def _check_unrunnable_task_timeouts(
self, timestamp: float, recommendations: Recs, stimulus_id: str
) -> set[Key]:
assert self.no_workers_timeout
unsatisfied = []
no_workers = []
for ts, unrunnable_since in self.unrunnable.items():
if timestamp <= unrunnable_since + self.no_workers_timeout:
# unrunnable is insertion-ordered, which means that unrunnable_since will
# be monotonically increasing in this loop.
break
if (
self._no_workers_since is None
or self._no_workers_since >= unrunnable_since
):
unsatisfied.append(ts)
else:
no_workers.append(ts)
if not unsatisfied and not no_workers:
return set()
for ts in unsatisfied:
e = pickle.dumps(
NoValidWorkerError(
task=ts.key,
host_restrictions=(ts.host_restrictions or set()).copy(),
worker_restrictions=(ts.worker_restrictions or set()).copy(),
resource_restrictions=(ts.resource_restrictions or {}).copy(),
timeout=self.no_workers_timeout,
),
)
r = self.transition(
ts.key,
"erred",
exception=e,
cause=ts.key,
stimulus_id=stimulus_id,
)
recommendations.update(r)
logger.error(
"Task %s marked as failed because it timed out waiting "
"for its restrictions to become satisfied.",
ts.key,
)
self._fail_tasks_after_no_workers_timeout(
no_workers, recommendations, stimulus_id
)
return {ts.key for ts in concat([unsatisfied, no_workers])}
def _check_queued_task_timeouts(
self, timestamp: float, recommendations: Recs, stimulus_id: str
) -> set[Key]:
assert self.no_workers_timeout
if self._no_workers_since is None:
return set()
if timestamp <= self._no_workers_since + self.no_workers_timeout:
return set()
affected = list(self.queued)
self._fail_tasks_after_no_workers_timeout(
affected, recommendations, stimulus_id
)
return {ts.key for ts in affected}
def _fail_tasks_after_no_workers_timeout(
self, timed_out: Iterable[TaskState], recommendations: Recs, stimulus_id: str
) -> None:
assert self.no_workers_timeout
for ts in timed_out:
e = pickle.dumps(
NoWorkerError(
task=ts.key,
timeout=self.no_workers_timeout,
),
)
r = self.transition(
ts.key,
"erred",
exception=e,
cause=ts.key,
stimulus_id=stimulus_id,
)
recommendations.update(r)
logger.error(
"Task %s marked as failed because it timed out waiting "
"without any running workers.",
ts.key,
)
def _refresh_no_workers_since(self, timestamp: float | None = None) -> None:
if self.running or not (self.queued or self.unrunnable):
self._no_workers_since = None
return
if not self._no_workers_since:
self._no_workers_since = timestamp or monotonic()
return
def adaptive_target(self, target_duration=None):
"""Desired number of workers based on the current workload
This looks at the current running tasks and memory use, and returns a
number of desired workers. This is often used by adaptive scheduling.
Parameters
----------
target_duration : str
A desired duration of time for computations to take. This affects
how rapidly the scheduler will ask to scale.
See Also
--------
distributed.deploy.Adaptive
"""
if target_duration is None:
target_duration = dask.config.get("distributed.adaptive.target-duration")
target_duration = parse_timedelta(target_duration)
# CPU
queued = take(100, concat([self.queued, self.unrunnable.keys()]))
queued_occupancy = 0
for ts in queued:
queued_occupancy += self._get_prefix_duration(ts.prefix)
tasks_ready = len(self.queued) + len(self.unrunnable)
if tasks_ready > 100:
queued_occupancy *= tasks_ready / 100
cpu = math.ceil((self.total_occupancy + queued_occupancy) / target_duration)
# Avoid a few long tasks from asking for many cores
for ws in self.workers.values():
if tasks_ready > cpu:
break
tasks_ready += len(ws.processing)
else:
cpu = min(tasks_ready, cpu)
# Divide by average nthreads per worker
if self.workers:
nthreads = sum(ws.nthreads for ws in self.workers.values())
cpu = math.ceil(cpu / nthreads * len(self.workers))
if (self.unrunnable or self.queued) and not self.workers:
cpu = max(1, cpu)
# add more workers if more than 60% of memory is used
limit = sum(ws.memory_limit for ws in self.workers.values())
used = sum(ws.nbytes for ws in self.workers.values())
memory = 0
if used > 0.6 * limit and limit > 0:
memory = 2 * len(self.workers)
target = max(memory, cpu)
if target >= len(self.workers):
return target
else: # Scale down?
to_close = self.workers_to_close()
return len(self.workers) - len(to_close)
def request_acquire_replicas(
self, addr: str, keys: Iterable[Key], *, stimulus_id: str
) -> None:
"""Asynchronously ask a worker to acquire a replica of the listed keys from
other workers. This is a fire-and-forget operation which offers no feedback for
success or failure, and is intended for housekeeping and not for computation.
"""
who_has = {}
nbytes = {}
for key in keys:
ts = self.tasks[key]
assert ts.who_has
who_has[key] = [ws.address for ws in ts.who_has or ()]
nbytes[key] = ts.nbytes
self.stream_comms[addr].send(
{
"op": "acquire-replicas",
"who_has": who_has,
"nbytes": nbytes,
"stimulus_id": stimulus_id,
},
)
def request_remove_replicas(
self, addr: str, keys: list[Key], *, stimulus_id: str
) -> None:
"""Asynchronously ask a worker to discard its replica of the listed keys.
This must never be used to destroy the last replica of a key. This is a
fire-and-forget operation, intended for housekeeping and not for computation.
The replica disappears immediately from TaskState.who_has on the Scheduler side;
if the worker refuses to delete, e.g. because the task is a dependency of
another task running on it, it will (also asynchronously) inform the scheduler
to re-add itself to who_has. If the worker agrees to discard the task, there is
no feedback.
"""
ws = self.workers[addr]
# The scheduler immediately forgets about the replica and suggests the worker to
# drop it. The worker may refuse, at which point it will send back an add-keys
# message to reinstate it.
for key in keys:
ts = self.tasks[key]
if self.validate:
# Do not destroy the last copy
assert ts.who_has
assert len(ts.who_has) > 1
self.remove_replica(ts, ws)
self.stream_comms[addr].send(
{
"op": "remove-replicas",
"keys": keys,
"stimulus_id": stimulus_id,
}
)
def _task_to_report_msg(ts: TaskState) -> dict[str, Any] | None:
if ts.state == "forgotten":
return {"op": "cancelled-keys", "keys": [ts.key]}
elif ts.state == "memory":
return {"op": "key-in-memory", "key": ts.key}
elif ts.state == "erred":
failing_ts = ts.exception_blame
assert failing_ts
return {
"op": "task-erred",
"key": ts.key,
"exception": failing_ts.exception,
"traceback": failing_ts.traceback,
}
else:
return None
def _task_to_client_msgs(ts: TaskState) -> Msgs:
if ts.who_wants:
report_msg = _task_to_report_msg(ts)
if report_msg is not None:
return {cs.client_key: [report_msg] for cs in ts.who_wants}
return {}
def decide_worker(
ts: TaskState,
all_workers: set[WorkerState],
valid_workers: set[WorkerState] | None,
objective: Callable[[WorkerState], Any],
) -> WorkerState | None:
"""
Decide which worker should take task *ts*.
We choose the worker that has the data on which *ts* depends.
If several workers have dependencies then we choose the less-busy worker.
Optionally provide *valid_workers* of where jobs are allowed to occur
(if all workers are allowed to take the task, pass None instead).
If the task requires data communication because no eligible worker has
all the dependencies already, then we choose to minimize the number
of bytes sent between workers. This is determined by calling the
*objective* function.
"""
assert all(dts.who_has for dts in ts.dependencies)
if ts.actor:
candidates = all_workers.copy()
else:
candidates = {wws for dts in ts.dependencies for wws in dts.who_has or ()}
candidates &= all_workers
if valid_workers is None:
if not candidates:
candidates = all_workers.copy()
else:
candidates &= valid_workers
if not candidates:
candidates = valid_workers
if not candidates:
if ts.loose_restrictions:
return decide_worker(ts, all_workers, None, objective)
if not candidates:
return None
elif len(candidates) == 1:
return next(iter(candidates))
else:
return min(candidates, key=objective)
def validate_task_state(ts: TaskState) -> None:
"""Validate the given TaskState"""
assert ts.state in ALL_TASK_STATES, ts
if ts.waiting_on:
assert ts.waiting_on.issubset(ts.dependencies), (
"waiting not subset of dependencies",
str(ts.waiting_on),
str(ts.dependencies),
)
if ts.waiters:
assert ts.waiters.issubset(ts.dependents), (
"waiters not subset of dependents",
str(ts.waiters),
str(ts.dependents),
)
for dts in ts.waiting_on or ():
assert not dts.who_has, ("waiting on in-memory dep", str(ts), str(dts))
assert dts.state != "released", ("waiting on released dep", str(ts), str(dts))
for dts in ts.dependencies:
assert ts in dts.dependents, (
"not in dependency's dependents",
str(ts),
str(dts),
str(dts.dependents),
)
if ts.state in ("waiting", "queued", "processing", "no-worker"):
assert ts.waiting_on and dts in ts.waiting_on or dts.who_has, (
"dep missing",
str(ts),
str(dts),
)
assert dts.state != "forgotten"
for dts in ts.waiters or ():
assert dts.state in ("waiting", "queued", "processing", "no-worker"), (
"waiter not in play",
str(ts),
str(dts),
)
for dts in ts.dependents:
assert ts in dts.dependencies, (
"not in dependent's dependencies",
str(ts),
str(dts),
str(dts.dependencies),
)
assert dts.state != "forgotten"
assert (ts.processing_on is not None) == (ts.state == "processing")
assert bool(ts.who_has) == (ts.state == "memory"), (ts, ts.who_has, ts.state)
if ts.state == "queued":
assert not ts.processing_on
assert not ts.who_has
assert all(dts.who_has for dts in ts.dependencies), (
"task queued without all deps",
str(ts),
str(ts.dependencies),
)
if ts.state == "processing":
assert all(dts.who_has for dts in ts.dependencies), (
"task processing without all deps",
str(ts),
str(ts.dependencies),
)
assert not ts.waiting_on
if ts.who_has:
assert ts.waiters or ts.who_wants, (
"unneeded task in memory",
str(ts),
str(ts.who_has),
)
if ts.run_spec: # was computed
assert ts.type
assert isinstance(ts.type, str)
assert not any(
[
ts in dts.waiting_on
for dts in ts.dependents
if dts.waiting_on is not None
]
)
for ws in ts.who_has:
assert ts in ws.has_what, (
"not in who_has' has_what",
str(ts),
str(ws),
str(ws.has_what),
)
for cs in ts.who_wants or ():
assert ts in cs.wants_what, (
"not in who_wants' wants_what",
str(ts),
str(cs),
str(cs.wants_what),
)
if ts.actor:
if ts.state == "memory":
assert ts.who_has
assert sum(ts in ws.actors for ws in ts.who_has) == 1
if ts.state == "processing":
assert ts.processing_on
assert ts in ts.processing_on.actors
assert ts.state != "queued"
def validate_unrunnable(unrunnable: dict[TaskState, float]) -> None:
prev_unrunnable_since: float | None = None
prev_ts: TaskState | None = None
for ts, unrunnable_since in unrunnable.items():
assert ts.state == "no-worker"
if prev_ts is not None:
assert prev_unrunnable_since is not None
# Ensure that unrunnable_since is monotonically increasing when iterating over unrunnable.
# _check_no_workers relies on this.
assert prev_unrunnable_since <= unrunnable_since, (
prev_ts,
ts,
prev_unrunnable_since,
unrunnable_since,
)
prev_ts = ts
prev_unrunnable_since = unrunnable_since
def validate_worker_state(ws: WorkerState) -> None:
for ts in ws.has_what or ():
assert ts.who_has
assert ws in ts.who_has, (
"not in has_what' who_has",
str(ws),
str(ts),
str(ts.who_has),
)
for ts in ws.actors:
assert ts.state in ("memory", "processing")
def validate_state(
tasks: dict[Key, TaskState],
workers: dict[str, WorkerState],
clients: dict[str, ClientState],
) -> None:
"""Validate a current runtime state.
This performs a sequence of checks on the entire graph, running in about linear
time. This raises assert errors if anything doesn't check out.
"""
for ts in tasks.values():
validate_task_state(ts)
for ws in workers.values():
validate_worker_state(ws)
for cs in clients.values():
for ts in cs.wants_what or ():
assert ts.who_wants
assert cs in ts.who_wants, (
"not in wants_what' who_wants",
str(cs),
str(ts),
str(ts.who_wants),
)
def heartbeat_interval(n: int) -> float:
"""Interval in seconds that we desire heartbeats based on number of workers"""
if n <= 10:
return 0.5
elif n < 50:
return 1
elif n < 200:
return 2
else:
# No more than 200 heartbeats a second scaled by workers
return n / 200 + 1
def _task_slots_available(ws: WorkerState, saturation_factor: float) -> int:
"""Number of tasks that can be sent to this worker without oversaturating it"""
assert not math.isinf(saturation_factor)
return max(math.ceil(saturation_factor * ws.nthreads), 1) - (
len(ws.processing) - len(ws.long_running)
)
def _worker_full(ws: WorkerState, saturation_factor: float) -> bool:
if math.isinf(saturation_factor):
return False
return _task_slots_available(ws, saturation_factor) <= 0
class KilledWorker(Exception):
def __init__(self, task: Key, last_worker: WorkerState, allowed_failures: int):
super().__init__(task, last_worker, allowed_failures)
@property
def task(self) -> Key:
return self.args[0]
@property
def last_worker(self) -> WorkerState:
return self.args[1]
@property
def allowed_failures(self) -> int:
return self.args[2]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} on {self.allowed_failures + 1} "
"different workers, but all those workers died while running it. "
f"The last worker that attempt to run the task was {self.last_worker.address}. "
"Inspecting worker logs is often a good next step to diagnose what went wrong. "
"For more information see https://distributed.dask.org/en/stable/killed.html."
)
class NoValidWorkerError(Exception):
def __init__(
self,
task: Key,
host_restrictions: set[str],
worker_restrictions: set[str],
resource_restrictions: dict[str, float],
timeout: float,
):
super().__init__(
task, host_restrictions, worker_restrictions, resource_restrictions, timeout
)
@property
def task(self) -> Key:
return self.args[0]
@property
def host_restrictions(self) -> Any:
return self.args[1]
@property
def worker_restrictions(self) -> Any:
return self.args[2]
@property
def resource_restrictions(self) -> Any:
return self.args[3]
@property
def timeout(self) -> float:
return self.args[4]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} but timed out after {format_time(self.timeout)} "
"waiting for a valid worker matching all restrictions.\n\nRestrictions:\n"
f"host_restrictions={self.host_restrictions!s}\n"
f"worker_restrictions={self.worker_restrictions!s}\n"
f"resource_restrictions={self.resource_restrictions!s}\n"
)
class NoWorkerError(Exception):
def __init__(self, task: Key, timeout: float):
super().__init__(task, timeout)
@property
def task(self) -> Key:
return self.args[0]
@property
def timeout(self) -> float:
return self.args[1]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} but timed out after {format_time(self.timeout)} "
"waiting without any running workers."
)
class WorkerStatusPlugin(SchedulerPlugin):
"""A plugin to share worker status with a remote observer
This is used in cluster managers to keep updated about the status of the scheduler.
"""
name: ClassVar[str] = "worker-status"
bcomm: BatchedSend
def __init__(self, scheduler: Scheduler, comm: Comm):
self.bcomm = BatchedSend(interval="5ms")
self.bcomm.start(comm)
scheduler.add_plugin(self)
def add_worker(self, scheduler: Scheduler, worker: str) -> None:
ident = scheduler.workers[worker].identity()
del ident["metrics"]
del ident["last_seen"]
try:
self.bcomm.send(["add", {"workers": {worker: ident}}])
except CommClosedError:
scheduler.remove_plugin(name=self.name)
def remove_worker(self, scheduler: Scheduler, worker: str, **kwargs: Any) -> None:
try:
self.bcomm.send(["remove", worker])
except CommClosedError:
scheduler.remove_plugin(name=self.name)
def teardown(self) -> None:
self.bcomm.close()
class CollectTaskMetaDataPlugin(SchedulerPlugin):
scheduler: Scheduler
name: str
keys: set[Key]
metadata: dict[Key, Any]
state: dict[Key, TaskStateState]
def __init__(self, scheduler: Scheduler, name: str):
self.scheduler = scheduler
self.name = name
self.keys = set()
self.metadata = {}
self.state = {}
def update_graph(
self,
scheduler: Scheduler,
*,
keys: set[Key],
**kwargs: Any,
) -> None:
self.keys.update(keys)
def transition(
self,
key: Key,
start: TaskStateState,
finish: TaskStateState,
*args: Any,
**kwargs: Any,
) -> None:
if finish in ("memory", "erred"):
ts = self.scheduler.tasks.get(key)
if ts is not None and ts.key in self.keys:
self.metadata[key] = ts.metadata
self.state[key] = finish
self.keys.discard(key)
def _materialize_graph(
expr: Expr,
global_annotations: dict[str, Any],
validate: bool,
) -> tuple[dict[Key, T_runspec], dict[Key, set[Key]], dict[str, dict[Key, Any]]]:
> dsk: dict = expr.__dask_graph__()
E AttributeError: 'dict' object has no attribute '__dask_graph__'
distributed/scheduler.py:9383: AttributeError
Check warning on line 0 in distributed.cli.tests.test_dask_worker
github-actions / Unit Test Results
1 out of 5 runs failed: test_respect_host_listen_address[127.0.0.2---nanny] (distributed.cli.tests.test_dask_worker)
artifacts/ubuntu-latest-mindeps-default-notci1/pytest.xml [took 1s]
Raw output
AttributeError: 'dict' object has no attribute '__dask_graph__'
c = <Client: No scheduler connected>
s = <Scheduler 'tcp://127.0.0.1:43649', workers: 0, cores: 0, tasks: 0>
nanny = '--nanny', host = '127.0.0.2'
@pytest.mark.slow
@pytest.mark.skipif(not LINUX, reason="Need 127.0.0.2 to mean localhost")
@pytest.mark.parametrize("nanny", ["--nanny", "--no-nanny"])
@pytest.mark.parametrize("host", ["127.0.0.2", "0.0.0.0"])
@gen_cluster(client=True, nthreads=[])
async def test_respect_host_listen_address(c, s, nanny, host):
with popen(
[
sys.executable,
"-m",
"dask",
"worker",
s.address,
nanny,
"--no-dashboard",
"--host",
host,
]
):
await c.wait_for_workers(1)
# roundtrip works
> assert await c.submit(lambda x: x + 1, 10) == 11
distributed/cli/tests/test_dask_worker.py:569:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
distributed/client.py:408: in _result
raise exc.with_traceback(tb)
distributed/utils.py:1507: in run_in_executor_with_context
return await loop.run_in_executor(
../../../miniconda3/envs/dask-distributed/lib/python3.10/concurrent/futures/thread.py:58: in run
result = self.fn(*self.args, **self.kwargs)
distributed/utils.py:1508: in <lambda>
executor, lambda: context.run(func, *args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
from __future__ import annotations
import asyncio
import contextlib
import dataclasses
import heapq
import inspect
import itertools
import json
import logging
import math
import operator
import os
import pickle
import random
import textwrap
import uuid
import warnings
import weakref
from abc import abstractmethod
from collections import defaultdict, deque
from collections.abc import (
Callable,
Collection,
Container,
Hashable,
Iterable,
Iterator,
Mapping,
Sequence,
Set,
)
from contextlib import suppress
from functools import partial
from typing import TYPE_CHECKING, Any, ClassVar, Literal, NamedTuple, cast, overload
import psutil
import tornado.web
from sortedcontainers import SortedDict, SortedSet
from tlz import (
concat,
first,
groupby,
merge,
merge_sorted,
merge_with,
partition,
pluck,
second,
take,
valmap,
)
from tornado.ioloop import IOLoop
import dask
import dask.utils
from dask._task_spec import DependenciesMapping, GraphNode, convert_legacy_graph
from dask.core import istask, validate_key
from dask.typing import Key, no_default
from dask.utils import (
_deprecated,
_deprecated_kwarg,
format_bytes,
format_time,
key_split,
parse_bytes,
parse_timedelta,
tmpfile,
)
from dask.widgets import get_template
from distributed import cluster_dump, preloading, profile
from distributed import versions as version_module
from distributed._asyncio import RLock
from distributed._stories import scheduler_story
from distributed.active_memory_manager import ActiveMemoryManagerExtension, RetireWorker
from distributed.batched import BatchedSend
from distributed.broker import Broker
from distributed.client import SourceCode
from distributed.collections import HeapSet
from distributed.comm import (
Comm,
CommClosedError,
get_address_host,
normalize_address,
resolve_address,
unparse_host_port,
)
from distributed.comm.addressing import addresses_from_user_args
from distributed.compatibility import PeriodicCallback
from distributed.core import (
ErrorMessage,
OKMessage,
Status,
clean_exception,
error_message,
rpc,
send_recv,
)
from distributed.diagnostics.memory_sampler import MemorySamplerExtension
from distributed.diagnostics.plugin import SchedulerPlugin, _get_plugin_name
from distributed.event import EventExtension
from distributed.gc import disable_gc_diagnosis, enable_gc_diagnosis
from distributed.http import get_handlers
from distributed.metrics import monotonic, time
from distributed.multi_lock import MultiLockExtension
from distributed.node import ServerNode
from distributed.proctitle import setproctitle
from distributed.protocol import deserialize
from distributed.protocol.pickle import dumps, loads
from distributed.protocol.serialize import Serialized, ToPickle, serialize
from distributed.publish import PublishExtension
from distributed.pubsub import PubSubSchedulerExtension
from distributed.queues import QueueExtension
from distributed.recreate_tasks import ReplayTaskScheduler
from distributed.security import Security
from distributed.semaphore import SemaphoreExtension
from distributed.shuffle import ShuffleSchedulerPlugin
from distributed.spans import SpanMetadata, SpansSchedulerExtension
from distributed.stealing import WorkStealing
from distributed.utils import (
All,
Deadline,
TimeoutError,
format_dashboard_link,
get_fileno_limit,
key_split_group,
log_errors,
offload,
recursive_to_dict,
wait_for,
)
from distributed.utils_comm import (
gather_from_workers,
retry_operation,
scatter_to_workers,
)
from distributed.variable import VariableExtension
if TYPE_CHECKING:
# TODO import from typing (requires Python >=3.10)
# TODO import from typing (requires Python >=3.11)
from typing_extensions import Self, TypeAlias
from dask._expr import Expr
# Not to be confused with distributed.worker_state_machine.TaskStateState
TaskStateState: TypeAlias = Literal[
"released",
"waiting",
"no-worker",
"queued",
"processing",
"memory",
"erred",
"forgotten",
]
ALL_TASK_STATES: Set[TaskStateState] = set(TaskStateState.__args__) # type: ignore
# {task key -> finish state}
# Not to be confused with distributed.worker_state_machine.Recs
Recs: TypeAlias = dict[Key, TaskStateState]
# {client or worker address: [{op: <key>, ...}, ...]}
Msgs: TypeAlias = dict[str, list[dict[str, Any]]]
# (recommendations, client messages, worker messages)
RecsMsgs: TypeAlias = tuple[Recs, Msgs, Msgs]
T_runspec: TypeAlias = GraphNode
logger = logging.getLogger(__name__)
LOG_PDB = dask.config.get("distributed.admin.pdb-on-err")
DEFAULT_DATA_SIZE = parse_bytes(
dask.config.get("distributed.scheduler.default-data-size")
)
STIMULUS_ID_UNSET = "<stimulus_id unset>"
DEFAULT_EXTENSIONS = {
"multi_locks": MultiLockExtension,
"publish": PublishExtension,
"replay-tasks": ReplayTaskScheduler,
"queues": QueueExtension,
"variables": VariableExtension,
"pubsub": PubSubSchedulerExtension,
"semaphores": SemaphoreExtension,
"events": EventExtension,
"amm": ActiveMemoryManagerExtension,
"memory_sampler": MemorySamplerExtension,
"shuffle": ShuffleSchedulerPlugin,
"spans": SpansSchedulerExtension,
"stealing": WorkStealing,
}
class ClientState:
"""A simple object holding information about a client."""
#: A unique identifier for this client. This is generally an opaque
#: string generated by the client itself.
client_key: str
#: Cached hash of :attr:`~ClientState.client_key`
_hash: int
#: A set of tasks this client wants to be kept in memory, so that it can download
#: its result when desired. This is the reverse mapping of
#: :class:`TaskState.who_wants`. Tasks are typically removed from this set when the
#: corresponding object in the client's space (for example a ``Future`` or a Dask
#: collection) gets garbage-collected.
wants_what: set[TaskState]
#: The last time we received a heartbeat from this client, in local scheduler time.
last_seen: float
#: Output of :func:`distributed.versions.get_versions` on the client
versions: dict[str, Any]
__slots__ = tuple(__annotations__)
def __init__(self, client: str, *, versions: dict[str, Any] | None = None):
self.client_key = client
self._hash = hash(client)
self.wants_what = set()
self.last_seen = time()
self.versions = versions or {}
def __hash__(self) -> int:
return self._hash
def __eq__(self, other: object) -> bool:
if not isinstance(other, ClientState):
return False
return self.client_key == other.client_key
def __repr__(self) -> str:
return f"<Client {self.client_key!r}>"
def __str__(self) -> str:
return self.client_key
def _to_dict_no_nest(self, *, exclude: Container[str] = ()) -> dict:
"""Dictionary representation for debugging purposes.
Not type stable and not intended for roundtrips.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
TaskState._to_dict
"""
return recursive_to_dict(
self,
exclude=set(exclude) | {"versions"}, # type: ignore
members=True,
)
class MemoryState:
"""Memory readings on a worker or on the whole cluster.
See :doc:`worker-memory`.
Attributes / properties:
managed_total
Sum of the output of sizeof() for all dask keys held by the worker in memory,
plus number of bytes spilled to disk
managed
Sum of the output of sizeof() for the dask keys held in RAM. Note that this may
be inaccurate, which may cause inaccurate unmanaged memory (see below).
spilled
Number of bytes for the dask keys spilled to the hard drive.
Note that this is the size on disk; size in memory may be different due to
compression and inaccuracies in sizeof(). In other words, given the same keys,
'managed' will change depending on the keys being in memory or spilled.
process
Total RSS memory measured by the OS on the worker process.
This is always exactly equal to managed + unmanaged.
unmanaged
process - managed. This is the sum of
- Python interpreter and modules
- global variables
- memory temporarily allocated by the dask tasks that are currently running
- memory fragmentation
- memory leaks
- memory not yet garbage collected
- memory not yet free()'d by the Python memory manager to the OS
unmanaged_old
Minimum of the 'unmanaged' measures over the last
``distributed.memory.recent-to-old-time`` seconds
unmanaged_recent
unmanaged - unmanaged_old; in other words process memory that has been recently
allocated but is not accounted for by dask; hopefully it's mostly a temporary
spike.
optimistic
managed + unmanaged_old; in other words the memory held long-term by
the process under the hopeful assumption that all unmanaged_recent memory is a
temporary spike
"""
process: int
unmanaged_old: int
managed: int
spilled: int
__slots__ = tuple(__annotations__)
def __init__(
self,
*,
process: int,
unmanaged_old: int,
managed: int,
spilled: int,
):
# Some data arrives with the heartbeat, some other arrives in realtime as the
# tasks progress. Also, sizeof() is not guaranteed to return correct results.
# This can cause glitches where a partial measure is larger than the whole, so
# we need to force all numbers to add up exactly by definition.
self.process = process
self.managed = min(self.process, managed)
self.spilled = spilled
# Subtractions between unsigned ints guaranteed by construction to be >= 0
self.unmanaged_old = min(unmanaged_old, process - self.managed)
@staticmethod
def sum(*infos: MemoryState) -> MemoryState:
process = 0
unmanaged_old = 0
managed = 0
spilled = 0
for ms in infos:
process += ms.process
unmanaged_old += ms.unmanaged_old
spilled += ms.spilled
managed += ms.managed
return MemoryState(
process=process,
unmanaged_old=unmanaged_old,
managed=managed,
spilled=spilled,
)
@property
def managed_total(self) -> int:
return self.managed + self.spilled
@property
def unmanaged(self) -> int:
# This is never negative thanks to __init__
return self.process - self.managed
@property
def unmanaged_recent(self) -> int:
# This is never negative thanks to __init__
return self.process - self.managed - self.unmanaged_old
@property
def optimistic(self) -> int:
return self.managed + self.unmanaged_old
@property
def managed_in_memory(self) -> int:
warnings.warn("managed_in_memory has been renamed to managed", FutureWarning)
return self.managed
@property
def managed_spilled(self) -> int:
warnings.warn("managed_spilled has been renamed to spilled", FutureWarning)
return self.spilled
def __repr__(self) -> str:
return (
f"Process memory (RSS) : {format_bytes(self.process)}\n"
f" - managed by Dask : {format_bytes(self.managed)}\n"
f" - unmanaged (old) : {format_bytes(self.unmanaged_old)}\n"
f" - unmanaged (recent): {format_bytes(self.unmanaged_recent)}\n"
f"Spilled to disk : {format_bytes(self.spilled)}\n"
)
def _to_dict(self, *, exclude: Container[str] = ()) -> dict:
"""Dictionary representation for debugging purposes.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
"""
return {
k: getattr(self, k)
for k in dir(self)
if not k.startswith("_")
and k not in {"sum", "managed_in_memory", "managed_spilled"}
}
class WorkerState:
"""A simple object holding information about a worker.
Not to be confused with :class:`distributed.worker_state_machine.WorkerState`.
"""
#: This worker's unique key. This can be its connected address
#: (such as ``"tcp://127.0.0.1:8891"``) or an alias (such as ``"alice"``).
address: str
pid: int
name: Hashable
#: The number of CPU threads made available on this worker
nthreads: int
#: Memory available to the worker, in bytes
memory_limit: int
local_directory: str
services: dict[str, int]
#: Output of :meth:`distributed.versions.get_versions` on the worker
versions: dict[str, Any]
#: Address of the associated :class:`~distributed.nanny.Nanny`, if present
nanny: str | None
#: Read-only worker status, synced one way from the remote Worker object
status: Status
#: Cached hash of :attr:`~WorkerState.server_id`
_hash: int
#: The total memory size, in bytes, used by the tasks this worker holds in memory
#: (i.e. the tasks in this worker's :attr:`~WorkerState.has_what`).
nbytes: int
#: Worker memory unknown to the worker, in bytes, which has been there for more than
#: 30 seconds. See :class:`MemoryState`.
_memory_unmanaged_old: int
#: History of the last 30 seconds' worth of unmanaged memory. Used to differentiate
#: between "old" and "new" unmanaged memory.
#: Format: ``[(timestamp, bytes), (timestamp, bytes), ...]``
_memory_unmanaged_history: deque[tuple[float, int]]
metrics: dict[str, Any]
#: The last time we received a heartbeat from this worker, in local scheduler time.
last_seen: float
time_delay: float
bandwidth: float
#: A set of all TaskStates on this worker that are actors. This only includes those
#: actors whose state actually lives on this worker, not actors to which this worker
#: has a reference.
actors: set[TaskState]
#: Underlying data of :meth:`WorkerState.has_what`
_has_what: dict[TaskState, None]
#: A set of tasks that have been submitted to this worker. Multiple tasks may be
# submitted to a worker in advance and the worker will run them eventually,
# depending on its execution resources (but see :doc:`work-stealing`).
#:
#: All the tasks here are in the "processing" state.
#: This attribute is kept in sync with :attr:`TaskState.processing_on`.
processing: set[TaskState]
#: Running tasks that invoked :func:`distributed.secede`
long_running: set[TaskState]
#: A dictionary of tasks that are currently being run on this worker.
#: Each task state is associated with the duration in seconds which the task has
#: been running.
executing: dict[TaskState, float]
#: The available resources on this worker, e.g. ``{"GPU": 2}``.
#: These are abstract quantities that constrain certain tasks from running at the
#: same time on this worker.
resources: dict[str, float]
#: The sum of each resource used by all tasks allocated to this worker.
#: The numbers in this dictionary can only be less or equal than those in this
#: worker's :attr:`~WorkerState.resources`.
used_resources: dict[str, float]
#: Arbitrary additional metadata to be added to :meth:`~WorkerState.identity`
extra: dict[str, Any]
# The unique server ID this WorkerState is referencing
server_id: str
# Reference to scheduler task_groups
scheduler_ref: weakref.ref[SchedulerState] | None
task_prefix_count: defaultdict[str, int]
_network_occ: int
_occupancy_cache: float | None
#: Keys that may need to be fetched to this worker, and the number of tasks that need them.
#: All tasks are currently in `memory` on a worker other than this one.
#: Much like `processing`, this does not exactly reflect worker state:
#: keys here may be queued to fetch, in flight, or already in memory
#: on the worker.
needs_what: dict[TaskState, int]
__slots__ = tuple(__annotations__)
def __init__(
self,
*,
address: str,
status: Status,
pid: int,
name: object,
nthreads: int = 0,
memory_limit: int,
local_directory: str,
nanny: str | None,
server_id: str,
services: dict[str, int] | None = None,
versions: dict[str, Any] | None = None,
extra: dict[str, Any] | None = None,
scheduler: SchedulerState | None = None,
):
self.server_id = server_id
self.address = address
self.pid = pid
self.name = name
self.nthreads = nthreads
self.memory_limit = memory_limit
self.local_directory = local_directory
self.services = services or {}
self.versions = versions or {}
self.nanny = nanny
self.status = status
self._hash = hash(self.server_id)
self.nbytes = 0
self._memory_unmanaged_old = 0
self._memory_unmanaged_history = deque()
self.metrics = {}
self.last_seen = time()
self.time_delay = 0
self.bandwidth = parse_bytes(dask.config.get("distributed.scheduler.bandwidth"))
self.actors = set()
self._has_what = {}
self.processing = set()
self.long_running = set()
self.executing = {}
self.resources = {}
self.used_resources = {}
self.extra = extra or {}
self.scheduler_ref = weakref.ref(scheduler) if scheduler else None
self.task_prefix_count = defaultdict(int)
self.needs_what = {}
self._network_occ = 0
self._occupancy_cache = None
def __hash__(self) -> int:
return self._hash
def __eq__(self, other: object) -> bool:
return self is other or (
isinstance(other, WorkerState) and other.server_id == self.server_id
)
@property
def has_what(self) -> Set[TaskState]:
"""An insertion-sorted set-like of tasks which currently reside on this worker.
All the tasks here are in the "memory" state.
This is the reverse mapping of :attr:`TaskState.who_has`.
This is a read-only public accessor. The data is implemented as a dict without
values, because rebalance() relies on dicts being insertion-sorted.
"""
return self._has_what.keys()
@property
def host(self) -> str:
return get_address_host(self.address)
@property
def memory(self) -> MemoryState:
"""Polished memory metrics for the worker.
**Design note on managed memory**
There are two measures available for managed memory:
- ``self.nbytes``
- ``self.metrics["managed_bytes"]``
At rest, the two numbers must be identical. However, ``self.nbytes`` is
immediately updated through the batched comms as soon as each task lands in
memory on the worker; ``self.metrics["managed_bytes"]`` instead is updated by
the heartbeat, which can lag several seconds behind.
Below we are mixing likely newer managed memory info from ``self.nbytes`` with
process and spilled memory from the heartbeat. This is deliberate, so that
managed memory total is updated more frequently.
Managed memory directly and immediately contributes to optimistic memory, which
is in turn used in Active Memory Manager heuristics (at the moment of writing;
more uses will likely be added in the future). So it's important to have it
up to date; much more than it is for process memory.
Having up-to-date managed memory info as soon as the scheduler learns about
task completion also substantially simplifies unit tests.
The flip side of this design is that it may cause some noise in the
unmanaged_recent measure. e.g.:
1. Delete 100MB of managed data
2. The updated managed memory reaches the scheduler faster than the
updated process memory
3. There's a blip where the scheduler thinks that there's a sudden 100MB
increase in unmanaged_recent, since process memory hasn't changed but managed
memory has decreased by 100MB
4. When the heartbeat arrives, process memory goes down and so does the
unmanaged_recent.
This is OK - one of the main reasons for the unmanaged_recent / unmanaged_old
split is exactly to concentrate all the noise in unmanaged_recent and exclude it
from optimistic memory, which is used for heuristics.
Something that is less OK, but also less frequent, is that the sudden deletion
of spilled keys will cause a negative blip in managed memory:
1. Delete 100MB of spilled data
2. The updated managed memory *total* reaches the scheduler faster than the
updated spilled portion
3. This causes the managed memory to temporarily plummet and be replaced by
unmanaged_recent, while spilled memory remains unaltered
4. When the heartbeat arrives, managed goes back up, unmanaged_recent
goes back down, and spilled goes down by 100MB as it should have to
begin with.
:issue:`6002` will let us solve this.
"""
return MemoryState(
process=self.metrics["memory"],
managed=max(0, self.nbytes - self.metrics["spilled_bytes"]["memory"]),
spilled=self.metrics["spilled_bytes"]["disk"],
unmanaged_old=self._memory_unmanaged_old,
)
def clean(self) -> WorkerState:
"""Return a version of this object that is appropriate for serialization"""
ws = WorkerState(
address=self.address,
status=self.status,
pid=self.pid,
name=self.name,
nthreads=self.nthreads,
memory_limit=self.memory_limit,
local_directory=self.local_directory,
services=self.services,
nanny=self.nanny,
extra=self.extra,
server_id=self.server_id,
)
ws._occupancy_cache = self.occupancy
ws.executing = {ts.key: duration for ts, duration in self.executing.items()} # type: ignore
return ws
def __repr__(self) -> str:
name = f", name: {self.name}" if self.name != self.address else ""
return (
f"<WorkerState {self.address!r}{name}, "
f"status: {self.status.name}, "
f"memory: {len(self.has_what)}, "
f"processing: {len(self.processing)}>"
)
def _repr_html_(self) -> str:
return get_template("worker_state.html.j2").render(
address=self.address,
name=self.name,
status=self.status.name,
has_what=self.has_what,
processing=self.processing,
)
def identity(self) -> dict[str, Any]:
return {
"type": "Worker",
"id": self.name,
"host": self.host,
"resources": self.resources,
"local_directory": self.local_directory,
"name": self.name,
"nthreads": self.nthreads,
"memory_limit": self.memory_limit,
"last_seen": self.last_seen,
"services": self.services,
"metrics": self.metrics,
"status": self.status.name,
"nanny": self.nanny,
**self.extra,
}
def _to_dict_no_nest(self, *, exclude: Container[str] = ()) -> dict[str, Any]:
"""Dictionary representation for debugging purposes.
Not type stable and not intended for roundtrips.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
TaskState._to_dict
"""
return recursive_to_dict(
self,
exclude=set(exclude) | {"versions"}, # type: ignore
members=True,
)
@property
def scheduler(self) -> SchedulerState:
assert self.scheduler_ref
s = self.scheduler_ref()
assert s
return s
def add_to_processing(self, ts: TaskState) -> None:
"""Assign a task to this worker for compute."""
if self.scheduler.validate:
assert ts not in self.processing
tp = ts.prefix
self.task_prefix_count[tp.name] += 1
self.scheduler._task_prefix_count_global[tp.name] += 1
self.processing.add(ts)
for dts in ts.dependencies:
assert dts.who_has
if self not in dts.who_has:
self._inc_needs_replica(dts)
def add_to_long_running(self, ts: TaskState) -> None:
if self.scheduler.validate:
assert ts in self.processing
assert ts not in self.long_running
self._remove_from_task_prefix_count(ts)
# Cannot remove from processing since we're using this for things like
# idleness detection. Idle workers are typically targeted for
# downscaling but we should not downscale workers with long running
# tasks
self.long_running.add(ts)
def remove_from_processing(self, ts: TaskState) -> None:
"""Remove a task from a workers processing"""
if self.scheduler.validate:
assert ts in self.processing
if ts in self.long_running:
self.long_running.discard(ts)
else:
self._remove_from_task_prefix_count(ts)
self.processing.remove(ts)
for dts in ts.dependencies:
if dts in self.needs_what:
self._dec_needs_replica(dts)
def _remove_from_task_prefix_count(self, ts: TaskState) -> None:
prefix_name = ts.prefix.name
count = self.task_prefix_count[prefix_name] - 1
tp_count = self.task_prefix_count
tp_count_global = self.scheduler._task_prefix_count_global
if count:
tp_count[prefix_name] = count
else:
del tp_count[prefix_name]
count = tp_count_global[prefix_name] - 1
if count:
tp_count_global[prefix_name] = count
else:
del tp_count_global[prefix_name]
def remove_replica(self, ts: TaskState) -> None:
"""The worker no longer has a task in memory"""
if self.scheduler.validate:
assert ts.who_has
assert self in ts.who_has
assert ts in self.has_what
assert ts not in self.needs_what
self.nbytes -= ts.get_nbytes()
del self._has_what[ts]
ts.who_has.remove(self) # type: ignore
if not ts.who_has:
ts.who_has = None
def _inc_needs_replica(self, ts: TaskState) -> None:
"""Assign a task fetch to this worker and …cheduler, title="Scheduler Profile (administrative)"
)
task_stream = TabPanel(child=task_stream, title="Task Stream")
bandwidth_workers = TabPanel(
child=bandwidth_workers.root, title="Bandwidth (Workers)"
)
bandwidth_types = TabPanel(
child=bandwidth_types.root, title="Bandwidth (Types)"
)
system = TabPanel(child=sysmon.root, title="System")
logs = TabPanel(child=logs.root, title="Scheduler Logs")
tabs = Tabs(
tabs=[
html,
task_stream,
system,
logs,
compute,
workers,
scheduler,
bandwidth_workers,
bandwidth_types,
],
sizing_mode="stretch_both",
)
from bokeh.core.templates import get_env
from bokeh.plotting import output_file, save
with tmpfile(extension=".html") as fn:
output_file(filename=fn, title="Dask Performance Report", mode=mode)
template_directory = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "dashboard", "templates"
)
template_environment = get_env()
template_environment.loader.searchpath.append(template_directory)
template = template_environment.get_template("performance_report.html")
save(tabs, filename=fn, template=template)
with open(fn) as f:
data = f.read()
return data
async def get_worker_logs(self, n=None, workers=None, nanny=False):
results = await self.broadcast(
msg={"op": "get_logs", "n": n}, workers=workers, nanny=nanny
)
return results
def log_event(self, topic: str | Collection[str], msg: Any) -> None:
"""Log an event under a given topic
Parameters
----------
topic : str, list[str]
Name of the topic under which to log an event. To log the same
event under multiple topics, pass a list of topic names.
msg
Event message to log. Note this must be msgpack serializable.
See also
--------
Client.log_event
"""
self._broker.publish(topic, msg)
def subscribe_topic(self, topic: str, client: str) -> None:
self._broker.subscribe(topic, client)
def unsubscribe_topic(self, topic: str, client: str) -> None:
self._broker.unsubscribe(topic, client)
@overload
def get_events(self, topic: str) -> tuple[tuple[float, Any], ...]: ...
@overload
def get_events(self) -> dict[str, tuple[tuple[float, Any], ...]]: ...
def get_events(
self, topic: str | None = None
) -> tuple[tuple[float, Any], ...] | dict[str, tuple[tuple[float, Any], ...]]:
return self._broker.get_events(topic)
async def get_worker_monitor_info(self, recent=False, starts=None):
if starts is None:
starts = {}
results = await asyncio.gather(
*(
self.rpc(w).get_monitor_info(recent=recent, start=starts.get(w, 0))
for w in self.workers
)
)
return dict(zip(self.workers, results))
###########
# Cleanup #
###########
@log_errors
async def check_worker_ttl(self) -> None:
now = time()
stimulus_id = f"check-worker-ttl-{now}"
assert self.worker_ttl
ttl = max(self.worker_ttl, 10 * heartbeat_interval(len(self.workers)))
to_restart = []
for ws in self.workers.values():
last_seen = now - ws.last_seen
if last_seen > ttl:
to_restart.append(ws.address)
logger.warning(
f"Worker failed to heartbeat for {last_seen:.0f}s; "
f"{'attempting restart' if ws.nanny else 'removing'}: {ws}"
)
if to_restart:
self.log_event(
"scheduler",
{
"action": "worker-ttl-timed-out",
"workers": to_restart.copy(),
"ttl": ttl,
},
)
await self.restart_workers(
to_restart,
wait_for_workers=False,
stimulus_id=stimulus_id,
)
def check_idle(self) -> float | None:
if self.status in (Status.closing, Status.closed):
return None # pragma: nocover
if self.transition_counter != self._idle_transition_counter:
self._idle_transition_counter = self.transition_counter
self.idle_since = None
return None
if self._active_graph_updates > 0:
self.idle_since = None
return None
if (
self.queued
or self.unrunnable
or any(ws.processing for ws in self.workers.values())
):
self.idle_since = None
return None
if not self.idle_since:
self.idle_since = time()
return self.idle_since
if self.jupyter:
last_activity = (
self._jupyter_server_application.web_app.last_activity().timestamp()
)
if last_activity > self.idle_since:
self.idle_since = last_activity
return self.idle_since
if self.idle_timeout:
if time() > self.idle_since + self.idle_timeout:
assert self.idle_since
logger.info(
"Scheduler closing after being idle for %s",
format_time(self.idle_timeout),
)
self._ongoing_background_tasks.call_soon(
self.close, reason="idle-timeout-exceeded"
)
return self.idle_since
def _check_no_workers(self) -> None:
if (
self.status in (Status.closing, Status.closed)
or self.no_workers_timeout is None
):
return
now = monotonic()
stimulus_id = f"check-no-workers-timeout-{time()}"
recommendations: Recs = {}
self._refresh_no_workers_since(now)
affected = self._check_unrunnable_task_timeouts(
now, recommendations=recommendations, stimulus_id=stimulus_id
)
affected.update(
self._check_queued_task_timeouts(
now, recommendations=recommendations, stimulus_id=stimulus_id
)
)
self.transitions(recommendations, stimulus_id=stimulus_id)
if affected:
self.log_event(
"scheduler",
{"action": "no-workers-timeout-exceeded", "keys": affected},
)
def _check_unrunnable_task_timeouts(
self, timestamp: float, recommendations: Recs, stimulus_id: str
) -> set[Key]:
assert self.no_workers_timeout
unsatisfied = []
no_workers = []
for ts, unrunnable_since in self.unrunnable.items():
if timestamp <= unrunnable_since + self.no_workers_timeout:
# unrunnable is insertion-ordered, which means that unrunnable_since will
# be monotonically increasing in this loop.
break
if (
self._no_workers_since is None
or self._no_workers_since >= unrunnable_since
):
unsatisfied.append(ts)
else:
no_workers.append(ts)
if not unsatisfied and not no_workers:
return set()
for ts in unsatisfied:
e = pickle.dumps(
NoValidWorkerError(
task=ts.key,
host_restrictions=(ts.host_restrictions or set()).copy(),
worker_restrictions=(ts.worker_restrictions or set()).copy(),
resource_restrictions=(ts.resource_restrictions or {}).copy(),
timeout=self.no_workers_timeout,
),
)
r = self.transition(
ts.key,
"erred",
exception=e,
cause=ts.key,
stimulus_id=stimulus_id,
)
recommendations.update(r)
logger.error(
"Task %s marked as failed because it timed out waiting "
"for its restrictions to become satisfied.",
ts.key,
)
self._fail_tasks_after_no_workers_timeout(
no_workers, recommendations, stimulus_id
)
return {ts.key for ts in concat([unsatisfied, no_workers])}
def _check_queued_task_timeouts(
self, timestamp: float, recommendations: Recs, stimulus_id: str
) -> set[Key]:
assert self.no_workers_timeout
if self._no_workers_since is None:
return set()
if timestamp <= self._no_workers_since + self.no_workers_timeout:
return set()
affected = list(self.queued)
self._fail_tasks_after_no_workers_timeout(
affected, recommendations, stimulus_id
)
return {ts.key for ts in affected}
def _fail_tasks_after_no_workers_timeout(
self, timed_out: Iterable[TaskState], recommendations: Recs, stimulus_id: str
) -> None:
assert self.no_workers_timeout
for ts in timed_out:
e = pickle.dumps(
NoWorkerError(
task=ts.key,
timeout=self.no_workers_timeout,
),
)
r = self.transition(
ts.key,
"erred",
exception=e,
cause=ts.key,
stimulus_id=stimulus_id,
)
recommendations.update(r)
logger.error(
"Task %s marked as failed because it timed out waiting "
"without any running workers.",
ts.key,
)
def _refresh_no_workers_since(self, timestamp: float | None = None) -> None:
if self.running or not (self.queued or self.unrunnable):
self._no_workers_since = None
return
if not self._no_workers_since:
self._no_workers_since = timestamp or monotonic()
return
def adaptive_target(self, target_duration=None):
"""Desired number of workers based on the current workload
This looks at the current running tasks and memory use, and returns a
number of desired workers. This is often used by adaptive scheduling.
Parameters
----------
target_duration : str
A desired duration of time for computations to take. This affects
how rapidly the scheduler will ask to scale.
See Also
--------
distributed.deploy.Adaptive
"""
if target_duration is None:
target_duration = dask.config.get("distributed.adaptive.target-duration")
target_duration = parse_timedelta(target_duration)
# CPU
queued = take(100, concat([self.queued, self.unrunnable.keys()]))
queued_occupancy = 0
for ts in queued:
queued_occupancy += self._get_prefix_duration(ts.prefix)
tasks_ready = len(self.queued) + len(self.unrunnable)
if tasks_ready > 100:
queued_occupancy *= tasks_ready / 100
cpu = math.ceil((self.total_occupancy + queued_occupancy) / target_duration)
# Avoid a few long tasks from asking for many cores
for ws in self.workers.values():
if tasks_ready > cpu:
break
tasks_ready += len(ws.processing)
else:
cpu = min(tasks_ready, cpu)
# Divide by average nthreads per worker
if self.workers:
nthreads = sum(ws.nthreads for ws in self.workers.values())
cpu = math.ceil(cpu / nthreads * len(self.workers))
if (self.unrunnable or self.queued) and not self.workers:
cpu = max(1, cpu)
# add more workers if more than 60% of memory is used
limit = sum(ws.memory_limit for ws in self.workers.values())
used = sum(ws.nbytes for ws in self.workers.values())
memory = 0
if used > 0.6 * limit and limit > 0:
memory = 2 * len(self.workers)
target = max(memory, cpu)
if target >= len(self.workers):
return target
else: # Scale down?
to_close = self.workers_to_close()
return len(self.workers) - len(to_close)
def request_acquire_replicas(
self, addr: str, keys: Iterable[Key], *, stimulus_id: str
) -> None:
"""Asynchronously ask a worker to acquire a replica of the listed keys from
other workers. This is a fire-and-forget operation which offers no feedback for
success or failure, and is intended for housekeeping and not for computation.
"""
who_has = {}
nbytes = {}
for key in keys:
ts = self.tasks[key]
assert ts.who_has
who_has[key] = [ws.address for ws in ts.who_has or ()]
nbytes[key] = ts.nbytes
self.stream_comms[addr].send(
{
"op": "acquire-replicas",
"who_has": who_has,
"nbytes": nbytes,
"stimulus_id": stimulus_id,
},
)
def request_remove_replicas(
self, addr: str, keys: list[Key], *, stimulus_id: str
) -> None:
"""Asynchronously ask a worker to discard its replica of the listed keys.
This must never be used to destroy the last replica of a key. This is a
fire-and-forget operation, intended for housekeeping and not for computation.
The replica disappears immediately from TaskState.who_has on the Scheduler side;
if the worker refuses to delete, e.g. because the task is a dependency of
another task running on it, it will (also asynchronously) inform the scheduler
to re-add itself to who_has. If the worker agrees to discard the task, there is
no feedback.
"""
ws = self.workers[addr]
# The scheduler immediately forgets about the replica and suggests the worker to
# drop it. The worker may refuse, at which point it will send back an add-keys
# message to reinstate it.
for key in keys:
ts = self.tasks[key]
if self.validate:
# Do not destroy the last copy
assert ts.who_has
assert len(ts.who_has) > 1
self.remove_replica(ts, ws)
self.stream_comms[addr].send(
{
"op": "remove-replicas",
"keys": keys,
"stimulus_id": stimulus_id,
}
)
def _task_to_report_msg(ts: TaskState) -> dict[str, Any] | None:
if ts.state == "forgotten":
return {"op": "cancelled-keys", "keys": [ts.key]}
elif ts.state == "memory":
return {"op": "key-in-memory", "key": ts.key}
elif ts.state == "erred":
failing_ts = ts.exception_blame
assert failing_ts
return {
"op": "task-erred",
"key": ts.key,
"exception": failing_ts.exception,
"traceback": failing_ts.traceback,
}
else:
return None
def _task_to_client_msgs(ts: TaskState) -> Msgs:
if ts.who_wants:
report_msg = _task_to_report_msg(ts)
if report_msg is not None:
return {cs.client_key: [report_msg] for cs in ts.who_wants}
return {}
def decide_worker(
ts: TaskState,
all_workers: set[WorkerState],
valid_workers: set[WorkerState] | None,
objective: Callable[[WorkerState], Any],
) -> WorkerState | None:
"""
Decide which worker should take task *ts*.
We choose the worker that has the data on which *ts* depends.
If several workers have dependencies then we choose the less-busy worker.
Optionally provide *valid_workers* of where jobs are allowed to occur
(if all workers are allowed to take the task, pass None instead).
If the task requires data communication because no eligible worker has
all the dependencies already, then we choose to minimize the number
of bytes sent between workers. This is determined by calling the
*objective* function.
"""
assert all(dts.who_has for dts in ts.dependencies)
if ts.actor:
candidates = all_workers.copy()
else:
candidates = {wws for dts in ts.dependencies for wws in dts.who_has or ()}
candidates &= all_workers
if valid_workers is None:
if not candidates:
candidates = all_workers.copy()
else:
candidates &= valid_workers
if not candidates:
candidates = valid_workers
if not candidates:
if ts.loose_restrictions:
return decide_worker(ts, all_workers, None, objective)
if not candidates:
return None
elif len(candidates) == 1:
return next(iter(candidates))
else:
return min(candidates, key=objective)
def validate_task_state(ts: TaskState) -> None:
"""Validate the given TaskState"""
assert ts.state in ALL_TASK_STATES, ts
if ts.waiting_on:
assert ts.waiting_on.issubset(ts.dependencies), (
"waiting not subset of dependencies",
str(ts.waiting_on),
str(ts.dependencies),
)
if ts.waiters:
assert ts.waiters.issubset(ts.dependents), (
"waiters not subset of dependents",
str(ts.waiters),
str(ts.dependents),
)
for dts in ts.waiting_on or ():
assert not dts.who_has, ("waiting on in-memory dep", str(ts), str(dts))
assert dts.state != "released", ("waiting on released dep", str(ts), str(dts))
for dts in ts.dependencies:
assert ts in dts.dependents, (
"not in dependency's dependents",
str(ts),
str(dts),
str(dts.dependents),
)
if ts.state in ("waiting", "queued", "processing", "no-worker"):
assert ts.waiting_on and dts in ts.waiting_on or dts.who_has, (
"dep missing",
str(ts),
str(dts),
)
assert dts.state != "forgotten"
for dts in ts.waiters or ():
assert dts.state in ("waiting", "queued", "processing", "no-worker"), (
"waiter not in play",
str(ts),
str(dts),
)
for dts in ts.dependents:
assert ts in dts.dependencies, (
"not in dependent's dependencies",
str(ts),
str(dts),
str(dts.dependencies),
)
assert dts.state != "forgotten"
assert (ts.processing_on is not None) == (ts.state == "processing")
assert bool(ts.who_has) == (ts.state == "memory"), (ts, ts.who_has, ts.state)
if ts.state == "queued":
assert not ts.processing_on
assert not ts.who_has
assert all(dts.who_has for dts in ts.dependencies), (
"task queued without all deps",
str(ts),
str(ts.dependencies),
)
if ts.state == "processing":
assert all(dts.who_has for dts in ts.dependencies), (
"task processing without all deps",
str(ts),
str(ts.dependencies),
)
assert not ts.waiting_on
if ts.who_has:
assert ts.waiters or ts.who_wants, (
"unneeded task in memory",
str(ts),
str(ts.who_has),
)
if ts.run_spec: # was computed
assert ts.type
assert isinstance(ts.type, str)
assert not any(
[
ts in dts.waiting_on
for dts in ts.dependents
if dts.waiting_on is not None
]
)
for ws in ts.who_has:
assert ts in ws.has_what, (
"not in who_has' has_what",
str(ts),
str(ws),
str(ws.has_what),
)
for cs in ts.who_wants or ():
assert ts in cs.wants_what, (
"not in who_wants' wants_what",
str(ts),
str(cs),
str(cs.wants_what),
)
if ts.actor:
if ts.state == "memory":
assert ts.who_has
assert sum(ts in ws.actors for ws in ts.who_has) == 1
if ts.state == "processing":
assert ts.processing_on
assert ts in ts.processing_on.actors
assert ts.state != "queued"
def validate_unrunnable(unrunnable: dict[TaskState, float]) -> None:
prev_unrunnable_since: float | None = None
prev_ts: TaskState | None = None
for ts, unrunnable_since in unrunnable.items():
assert ts.state == "no-worker"
if prev_ts is not None:
assert prev_unrunnable_since is not None
# Ensure that unrunnable_since is monotonically increasing when iterating over unrunnable.
# _check_no_workers relies on this.
assert prev_unrunnable_since <= unrunnable_since, (
prev_ts,
ts,
prev_unrunnable_since,
unrunnable_since,
)
prev_ts = ts
prev_unrunnable_since = unrunnable_since
def validate_worker_state(ws: WorkerState) -> None:
for ts in ws.has_what or ():
assert ts.who_has
assert ws in ts.who_has, (
"not in has_what' who_has",
str(ws),
str(ts),
str(ts.who_has),
)
for ts in ws.actors:
assert ts.state in ("memory", "processing")
def validate_state(
tasks: dict[Key, TaskState],
workers: dict[str, WorkerState],
clients: dict[str, ClientState],
) -> None:
"""Validate a current runtime state.
This performs a sequence of checks on the entire graph, running in about linear
time. This raises assert errors if anything doesn't check out.
"""
for ts in tasks.values():
validate_task_state(ts)
for ws in workers.values():
validate_worker_state(ws)
for cs in clients.values():
for ts in cs.wants_what or ():
assert ts.who_wants
assert cs in ts.who_wants, (
"not in wants_what' who_wants",
str(cs),
str(ts),
str(ts.who_wants),
)
def heartbeat_interval(n: int) -> float:
"""Interval in seconds that we desire heartbeats based on number of workers"""
if n <= 10:
return 0.5
elif n < 50:
return 1
elif n < 200:
return 2
else:
# No more than 200 heartbeats a second scaled by workers
return n / 200 + 1
def _task_slots_available(ws: WorkerState, saturation_factor: float) -> int:
"""Number of tasks that can be sent to this worker without oversaturating it"""
assert not math.isinf(saturation_factor)
return max(math.ceil(saturation_factor * ws.nthreads), 1) - (
len(ws.processing) - len(ws.long_running)
)
def _worker_full(ws: WorkerState, saturation_factor: float) -> bool:
if math.isinf(saturation_factor):
return False
return _task_slots_available(ws, saturation_factor) <= 0
class KilledWorker(Exception):
def __init__(self, task: Key, last_worker: WorkerState, allowed_failures: int):
super().__init__(task, last_worker, allowed_failures)
@property
def task(self) -> Key:
return self.args[0]
@property
def last_worker(self) -> WorkerState:
return self.args[1]
@property
def allowed_failures(self) -> int:
return self.args[2]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} on {self.allowed_failures + 1} "
"different workers, but all those workers died while running it. "
f"The last worker that attempt to run the task was {self.last_worker.address}. "
"Inspecting worker logs is often a good next step to diagnose what went wrong. "
"For more information see https://distributed.dask.org/en/stable/killed.html."
)
class NoValidWorkerError(Exception):
def __init__(
self,
task: Key,
host_restrictions: set[str],
worker_restrictions: set[str],
resource_restrictions: dict[str, float],
timeout: float,
):
super().__init__(
task, host_restrictions, worker_restrictions, resource_restrictions, timeout
)
@property
def task(self) -> Key:
return self.args[0]
@property
def host_restrictions(self) -> Any:
return self.args[1]
@property
def worker_restrictions(self) -> Any:
return self.args[2]
@property
def resource_restrictions(self) -> Any:
return self.args[3]
@property
def timeout(self) -> float:
return self.args[4]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} but timed out after {format_time(self.timeout)} "
"waiting for a valid worker matching all restrictions.\n\nRestrictions:\n"
f"host_restrictions={self.host_restrictions!s}\n"
f"worker_restrictions={self.worker_restrictions!s}\n"
f"resource_restrictions={self.resource_restrictions!s}\n"
)
class NoWorkerError(Exception):
def __init__(self, task: Key, timeout: float):
super().__init__(task, timeout)
@property
def task(self) -> Key:
return self.args[0]
@property
def timeout(self) -> float:
return self.args[1]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} but timed out after {format_time(self.timeout)} "
"waiting without any running workers."
)
class WorkerStatusPlugin(SchedulerPlugin):
"""A plugin to share worker status with a remote observer
This is used in cluster managers to keep updated about the status of the scheduler.
"""
name: ClassVar[str] = "worker-status"
bcomm: BatchedSend
def __init__(self, scheduler: Scheduler, comm: Comm):
self.bcomm = BatchedSend(interval="5ms")
self.bcomm.start(comm)
scheduler.add_plugin(self)
def add_worker(self, scheduler: Scheduler, worker: str) -> None:
ident = scheduler.workers[worker].identity()
del ident["metrics"]
del ident["last_seen"]
try:
self.bcomm.send(["add", {"workers": {worker: ident}}])
except CommClosedError:
scheduler.remove_plugin(name=self.name)
def remove_worker(self, scheduler: Scheduler, worker: str, **kwargs: Any) -> None:
try:
self.bcomm.send(["remove", worker])
except CommClosedError:
scheduler.remove_plugin(name=self.name)
def teardown(self) -> None:
self.bcomm.close()
class CollectTaskMetaDataPlugin(SchedulerPlugin):
scheduler: Scheduler
name: str
keys: set[Key]
metadata: dict[Key, Any]
state: dict[Key, TaskStateState]
def __init__(self, scheduler: Scheduler, name: str):
self.scheduler = scheduler
self.name = name
self.keys = set()
self.metadata = {}
self.state = {}
def update_graph(
self,
scheduler: Scheduler,
*,
keys: set[Key],
**kwargs: Any,
) -> None:
self.keys.update(keys)
def transition(
self,
key: Key,
start: TaskStateState,
finish: TaskStateState,
*args: Any,
**kwargs: Any,
) -> None:
if finish in ("memory", "erred"):
ts = self.scheduler.tasks.get(key)
if ts is not None and ts.key in self.keys:
self.metadata[key] = ts.metadata
self.state[key] = finish
self.keys.discard(key)
def _materialize_graph(
expr: Expr,
global_annotations: dict[str, Any],
validate: bool,
) -> tuple[dict[Key, T_runspec], dict[Key, set[Key]], dict[str, dict[Key, Any]]]:
> dsk: dict = expr.__dask_graph__()
E AttributeError: 'dict' object has no attribute '__dask_graph__'
distributed/scheduler.py:9383: AttributeError
Check warning on line 0 in distributed.cli.tests.test_dask_worker
github-actions / Unit Test Results
1 out of 5 runs failed: test_respect_host_listen_address[127.0.0.2---no-nanny] (distributed.cli.tests.test_dask_worker)
artifacts/ubuntu-latest-mindeps-default-notci1/pytest.xml [took 0s]
Raw output
AttributeError: 'dict' object has no attribute '__dask_graph__'
c = <Client: No scheduler connected>
s = <Scheduler 'tcp://127.0.0.1:41191', workers: 0, cores: 0, tasks: 0>
nanny = '--no-nanny', host = '127.0.0.2'
@pytest.mark.slow
@pytest.mark.skipif(not LINUX, reason="Need 127.0.0.2 to mean localhost")
@pytest.mark.parametrize("nanny", ["--nanny", "--no-nanny"])
@pytest.mark.parametrize("host", ["127.0.0.2", "0.0.0.0"])
@gen_cluster(client=True, nthreads=[])
async def test_respect_host_listen_address(c, s, nanny, host):
with popen(
[
sys.executable,
"-m",
"dask",
"worker",
s.address,
nanny,
"--no-dashboard",
"--host",
host,
]
):
await c.wait_for_workers(1)
# roundtrip works
> assert await c.submit(lambda x: x + 1, 10) == 11
distributed/cli/tests/test_dask_worker.py:569:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
distributed/client.py:408: in _result
raise exc.with_traceback(tb)
distributed/utils.py:1507: in run_in_executor_with_context
return await loop.run_in_executor(
../../../miniconda3/envs/dask-distributed/lib/python3.10/concurrent/futures/thread.py:58: in run
result = self.fn(*self.args, **self.kwargs)
distributed/utils.py:1508: in <lambda>
executor, lambda: context.run(func, *args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
from __future__ import annotations
import asyncio
import contextlib
import dataclasses
import heapq
import inspect
import itertools
import json
import logging
import math
import operator
import os
import pickle
import random
import textwrap
import uuid
import warnings
import weakref
from abc import abstractmethod
from collections import defaultdict, deque
from collections.abc import (
Callable,
Collection,
Container,
Hashable,
Iterable,
Iterator,
Mapping,
Sequence,
Set,
)
from contextlib import suppress
from functools import partial
from typing import TYPE_CHECKING, Any, ClassVar, Literal, NamedTuple, cast, overload
import psutil
import tornado.web
from sortedcontainers import SortedDict, SortedSet
from tlz import (
concat,
first,
groupby,
merge,
merge_sorted,
merge_with,
partition,
pluck,
second,
take,
valmap,
)
from tornado.ioloop import IOLoop
import dask
import dask.utils
from dask._task_spec import DependenciesMapping, GraphNode, convert_legacy_graph
from dask.core import istask, validate_key
from dask.typing import Key, no_default
from dask.utils import (
_deprecated,
_deprecated_kwarg,
format_bytes,
format_time,
key_split,
parse_bytes,
parse_timedelta,
tmpfile,
)
from dask.widgets import get_template
from distributed import cluster_dump, preloading, profile
from distributed import versions as version_module
from distributed._asyncio import RLock
from distributed._stories import scheduler_story
from distributed.active_memory_manager import ActiveMemoryManagerExtension, RetireWorker
from distributed.batched import BatchedSend
from distributed.broker import Broker
from distributed.client import SourceCode
from distributed.collections import HeapSet
from distributed.comm import (
Comm,
CommClosedError,
get_address_host,
normalize_address,
resolve_address,
unparse_host_port,
)
from distributed.comm.addressing import addresses_from_user_args
from distributed.compatibility import PeriodicCallback
from distributed.core import (
ErrorMessage,
OKMessage,
Status,
clean_exception,
error_message,
rpc,
send_recv,
)
from distributed.diagnostics.memory_sampler import MemorySamplerExtension
from distributed.diagnostics.plugin import SchedulerPlugin, _get_plugin_name
from distributed.event import EventExtension
from distributed.gc import disable_gc_diagnosis, enable_gc_diagnosis
from distributed.http import get_handlers
from distributed.metrics import monotonic, time
from distributed.multi_lock import MultiLockExtension
from distributed.node import ServerNode
from distributed.proctitle import setproctitle
from distributed.protocol import deserialize
from distributed.protocol.pickle import dumps, loads
from distributed.protocol.serialize import Serialized, ToPickle, serialize
from distributed.publish import PublishExtension
from distributed.pubsub import PubSubSchedulerExtension
from distributed.queues import QueueExtension
from distributed.recreate_tasks import ReplayTaskScheduler
from distributed.security import Security
from distributed.semaphore import SemaphoreExtension
from distributed.shuffle import ShuffleSchedulerPlugin
from distributed.spans import SpanMetadata, SpansSchedulerExtension
from distributed.stealing import WorkStealing
from distributed.utils import (
All,
Deadline,
TimeoutError,
format_dashboard_link,
get_fileno_limit,
key_split_group,
log_errors,
offload,
recursive_to_dict,
wait_for,
)
from distributed.utils_comm import (
gather_from_workers,
retry_operation,
scatter_to_workers,
)
from distributed.variable import VariableExtension
if TYPE_CHECKING:
# TODO import from typing (requires Python >=3.10)
# TODO import from typing (requires Python >=3.11)
from typing_extensions import Self, TypeAlias
from dask._expr import Expr
# Not to be confused with distributed.worker_state_machine.TaskStateState
TaskStateState: TypeAlias = Literal[
"released",
"waiting",
"no-worker",
"queued",
"processing",
"memory",
"erred",
"forgotten",
]
ALL_TASK_STATES: Set[TaskStateState] = set(TaskStateState.__args__) # type: ignore
# {task key -> finish state}
# Not to be confused with distributed.worker_state_machine.Recs
Recs: TypeAlias = dict[Key, TaskStateState]
# {client or worker address: [{op: <key>, ...}, ...]}
Msgs: TypeAlias = dict[str, list[dict[str, Any]]]
# (recommendations, client messages, worker messages)
RecsMsgs: TypeAlias = tuple[Recs, Msgs, Msgs]
T_runspec: TypeAlias = GraphNode
logger = logging.getLogger(__name__)
LOG_PDB = dask.config.get("distributed.admin.pdb-on-err")
DEFAULT_DATA_SIZE = parse_bytes(
dask.config.get("distributed.scheduler.default-data-size")
)
STIMULUS_ID_UNSET = "<stimulus_id unset>"
DEFAULT_EXTENSIONS = {
"multi_locks": MultiLockExtension,
"publish": PublishExtension,
"replay-tasks": ReplayTaskScheduler,
"queues": QueueExtension,
"variables": VariableExtension,
"pubsub": PubSubSchedulerExtension,
"semaphores": SemaphoreExtension,
"events": EventExtension,
"amm": ActiveMemoryManagerExtension,
"memory_sampler": MemorySamplerExtension,
"shuffle": ShuffleSchedulerPlugin,
"spans": SpansSchedulerExtension,
"stealing": WorkStealing,
}
class ClientState:
"""A simple object holding information about a client."""
#: A unique identifier for this client. This is generally an opaque
#: string generated by the client itself.
client_key: str
#: Cached hash of :attr:`~ClientState.client_key`
_hash: int
#: A set of tasks this client wants to be kept in memory, so that it can download
#: its result when desired. This is the reverse mapping of
#: :class:`TaskState.who_wants`. Tasks are typically removed from this set when the
#: corresponding object in the client's space (for example a ``Future`` or a Dask
#: collection) gets garbage-collected.
wants_what: set[TaskState]
#: The last time we received a heartbeat from this client, in local scheduler time.
last_seen: float
#: Output of :func:`distributed.versions.get_versions` on the client
versions: dict[str, Any]
__slots__ = tuple(__annotations__)
def __init__(self, client: str, *, versions: dict[str, Any] | None = None):
self.client_key = client
self._hash = hash(client)
self.wants_what = set()
self.last_seen = time()
self.versions = versions or {}
def __hash__(self) -> int:
return self._hash
def __eq__(self, other: object) -> bool:
if not isinstance(other, ClientState):
return False
return self.client_key == other.client_key
def __repr__(self) -> str:
return f"<Client {self.client_key!r}>"
def __str__(self) -> str:
return self.client_key
def _to_dict_no_nest(self, *, exclude: Container[str] = ()) -> dict:
"""Dictionary representation for debugging purposes.
Not type stable and not intended for roundtrips.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
TaskState._to_dict
"""
return recursive_to_dict(
self,
exclude=set(exclude) | {"versions"}, # type: ignore
members=True,
)
class MemoryState:
"""Memory readings on a worker or on the whole cluster.
See :doc:`worker-memory`.
Attributes / properties:
managed_total
Sum of the output of sizeof() for all dask keys held by the worker in memory,
plus number of bytes spilled to disk
managed
Sum of the output of sizeof() for the dask keys held in RAM. Note that this may
be inaccurate, which may cause inaccurate unmanaged memory (see below).
spilled
Number of bytes for the dask keys spilled to the hard drive.
Note that this is the size on disk; size in memory may be different due to
compression and inaccuracies in sizeof(). In other words, given the same keys,
'managed' will change depending on the keys being in memory or spilled.
process
Total RSS memory measured by the OS on the worker process.
This is always exactly equal to managed + unmanaged.
unmanaged
process - managed. This is the sum of
- Python interpreter and modules
- global variables
- memory temporarily allocated by the dask tasks that are currently running
- memory fragmentation
- memory leaks
- memory not yet garbage collected
- memory not yet free()'d by the Python memory manager to the OS
unmanaged_old
Minimum of the 'unmanaged' measures over the last
``distributed.memory.recent-to-old-time`` seconds
unmanaged_recent
unmanaged - unmanaged_old; in other words process memory that has been recently
allocated but is not accounted for by dask; hopefully it's mostly a temporary
spike.
optimistic
managed + unmanaged_old; in other words the memory held long-term by
the process under the hopeful assumption that all unmanaged_recent memory is a
temporary spike
"""
process: int
unmanaged_old: int
managed: int
spilled: int
__slots__ = tuple(__annotations__)
def __init__(
self,
*,
process: int,
unmanaged_old: int,
managed: int,
spilled: int,
):
# Some data arrives with the heartbeat, some other arrives in realtime as the
# tasks progress. Also, sizeof() is not guaranteed to return correct results.
# This can cause glitches where a partial measure is larger than the whole, so
# we need to force all numbers to add up exactly by definition.
self.process = process
self.managed = min(self.process, managed)
self.spilled = spilled
# Subtractions between unsigned ints guaranteed by construction to be >= 0
self.unmanaged_old = min(unmanaged_old, process - self.managed)
@staticmethod
def sum(*infos: MemoryState) -> MemoryState:
process = 0
unmanaged_old = 0
managed = 0
spilled = 0
for ms in infos:
process += ms.process
unmanaged_old += ms.unmanaged_old
spilled += ms.spilled
managed += ms.managed
return MemoryState(
process=process,
unmanaged_old=unmanaged_old,
managed=managed,
spilled=spilled,
)
@property
def managed_total(self) -> int:
return self.managed + self.spilled
@property
def unmanaged(self) -> int:
# This is never negative thanks to __init__
return self.process - self.managed
@property
def unmanaged_recent(self) -> int:
# This is never negative thanks to __init__
return self.process - self.managed - self.unmanaged_old
@property
def optimistic(self) -> int:
return self.managed + self.unmanaged_old
@property
def managed_in_memory(self) -> int:
warnings.warn("managed_in_memory has been renamed to managed", FutureWarning)
return self.managed
@property
def managed_spilled(self) -> int:
warnings.warn("managed_spilled has been renamed to spilled", FutureWarning)
return self.spilled
def __repr__(self) -> str:
return (
f"Process memory (RSS) : {format_bytes(self.process)}\n"
f" - managed by Dask : {format_bytes(self.managed)}\n"
f" - unmanaged (old) : {format_bytes(self.unmanaged_old)}\n"
f" - unmanaged (recent): {format_bytes(self.unmanaged_recent)}\n"
f"Spilled to disk : {format_bytes(self.spilled)}\n"
)
def _to_dict(self, *, exclude: Container[str] = ()) -> dict:
"""Dictionary representation for debugging purposes.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
"""
return {
k: getattr(self, k)
for k in dir(self)
if not k.startswith("_")
and k not in {"sum", "managed_in_memory", "managed_spilled"}
}
class WorkerState:
"""A simple object holding information about a worker.
Not to be confused with :class:`distributed.worker_state_machine.WorkerState`.
"""
#: This worker's unique key. This can be its connected address
#: (such as ``"tcp://127.0.0.1:8891"``) or an alias (such as ``"alice"``).
address: str
pid: int
name: Hashable
#: The number of CPU threads made available on this worker
nthreads: int
#: Memory available to the worker, in bytes
memory_limit: int
local_directory: str
services: dict[str, int]
#: Output of :meth:`distributed.versions.get_versions` on the worker
versions: dict[str, Any]
#: Address of the associated :class:`~distributed.nanny.Nanny`, if present
nanny: str | None
#: Read-only worker status, synced one way from the remote Worker object
status: Status
#: Cached hash of :attr:`~WorkerState.server_id`
_hash: int
#: The total memory size, in bytes, used by the tasks this worker holds in memory
#: (i.e. the tasks in this worker's :attr:`~WorkerState.has_what`).
nbytes: int
#: Worker memory unknown to the worker, in bytes, which has been there for more than
#: 30 seconds. See :class:`MemoryState`.
_memory_unmanaged_old: int
#: History of the last 30 seconds' worth of unmanaged memory. Used to differentiate
#: between "old" and "new" unmanaged memory.
#: Format: ``[(timestamp, bytes), (timestamp, bytes), ...]``
_memory_unmanaged_history: deque[tuple[float, int]]
metrics: dict[str, Any]
#: The last time we received a heartbeat from this worker, in local scheduler time.
last_seen: float
time_delay: float
bandwidth: float
#: A set of all TaskStates on this worker that are actors. This only includes those
#: actors whose state actually lives on this worker, not actors to which this worker
#: has a reference.
actors: set[TaskState]
#: Underlying data of :meth:`WorkerState.has_what`
_has_what: dict[TaskState, None]
#: A set of tasks that have been submitted to this worker. Multiple tasks may be
# submitted to a worker in advance and the worker will run them eventually,
# depending on its execution resources (but see :doc:`work-stealing`).
#:
#: All the tasks here are in the "processing" state.
#: This attribute is kept in sync with :attr:`TaskState.processing_on`.
processing: set[TaskState]
#: Running tasks that invoked :func:`distributed.secede`
long_running: set[TaskState]
#: A dictionary of tasks that are currently being run on this worker.
#: Each task state is associated with the duration in seconds which the task has
#: been running.
executing: dict[TaskState, float]
#: The available resources on this worker, e.g. ``{"GPU": 2}``.
#: These are abstract quantities that constrain certain tasks from running at the
#: same time on this worker.
resources: dict[str, float]
#: The sum of each resource used by all tasks allocated to this worker.
#: The numbers in this dictionary can only be less or equal than those in this
#: worker's :attr:`~WorkerState.resources`.
used_resources: dict[str, float]
#: Arbitrary additional metadata to be added to :meth:`~WorkerState.identity`
extra: dict[str, Any]
# The unique server ID this WorkerState is referencing
server_id: str
# Reference to scheduler task_groups
scheduler_ref: weakref.ref[SchedulerState] | None
task_prefix_count: defaultdict[str, int]
_network_occ: int
_occupancy_cache: float | None
#: Keys that may need to be fetched to this worker, and the number of tasks that need them.
#: All tasks are currently in `memory` on a worker other than this one.
#: Much like `processing`, this does not exactly reflect worker state:
#: keys here may be queued to fetch, in flight, or already in memory
#: on the worker.
needs_what: dict[TaskState, int]
__slots__ = tuple(__annotations__)
def __init__(
self,
*,
address: str,
status: Status,
pid: int,
name: object,
nthreads: int = 0,
memory_limit: int,
local_directory: str,
nanny: str | None,
server_id: str,
services: dict[str, int] | None = None,
versions: dict[str, Any] | None = None,
extra: dict[str, Any] | None = None,
scheduler: SchedulerState | None = None,
):
self.server_id = server_id
self.address = address
self.pid = pid
self.name = name
self.nthreads = nthreads
self.memory_limit = memory_limit
self.local_directory = local_directory
self.services = services or {}
self.versions = versions or {}
self.nanny = nanny
self.status = status
self._hash = hash(self.server_id)
self.nbytes = 0
self._memory_unmanaged_old = 0
self._memory_unmanaged_history = deque()
self.metrics = {}
self.last_seen = time()
self.time_delay = 0
self.bandwidth = parse_bytes(dask.config.get("distributed.scheduler.bandwidth"))
self.actors = set()
self._has_what = {}
self.processing = set()
self.long_running = set()
self.executing = {}
self.resources = {}
self.used_resources = {}
self.extra = extra or {}
self.scheduler_ref = weakref.ref(scheduler) if scheduler else None
self.task_prefix_count = defaultdict(int)
self.needs_what = {}
self._network_occ = 0
self._occupancy_cache = None
def __hash__(self) -> int:
return self._hash
def __eq__(self, other: object) -> bool:
return self is other or (
isinstance(other, WorkerState) and other.server_id == self.server_id
)
@property
def has_what(self) -> Set[TaskState]:
"""An insertion-sorted set-like of tasks which currently reside on this worker.
All the tasks here are in the "memory" state.
This is the reverse mapping of :attr:`TaskState.who_has`.
This is a read-only public accessor. The data is implemented as a dict without
values, because rebalance() relies on dicts being insertion-sorted.
"""
return self._has_what.keys()
@property
def host(self) -> str:
return get_address_host(self.address)
@property
def memory(self) -> MemoryState:
"""Polished memory metrics for the worker.
**Design note on managed memory**
There are two measures available for managed memory:
- ``self.nbytes``
- ``self.metrics["managed_bytes"]``
At rest, the two numbers must be identical. However, ``self.nbytes`` is
immediately updated through the batched comms as soon as each task lands in
memory on the worker; ``self.metrics["managed_bytes"]`` instead is updated by
the heartbeat, which can lag several seconds behind.
Below we are mixing likely newer managed memory info from ``self.nbytes`` with
process and spilled memory from the heartbeat. This is deliberate, so that
managed memory total is updated more frequently.
Managed memory directly and immediately contributes to optimistic memory, which
is in turn used in Active Memory Manager heuristics (at the moment of writing;
more uses will likely be added in the future). So it's important to have it
up to date; much more than it is for process memory.
Having up-to-date managed memory info as soon as the scheduler learns about
task completion also substantially simplifies unit tests.
The flip side of this design is that it may cause some noise in the
unmanaged_recent measure. e.g.:
1. Delete 100MB of managed data
2. The updated managed memory reaches the scheduler faster than the
updated process memory
3. There's a blip where the scheduler thinks that there's a sudden 100MB
increase in unmanaged_recent, since process memory hasn't changed but managed
memory has decreased by 100MB
4. When the heartbeat arrives, process memory goes down and so does the
unmanaged_recent.
This is OK - one of the main reasons for the unmanaged_recent / unmanaged_old
split is exactly to concentrate all the noise in unmanaged_recent and exclude it
from optimistic memory, which is used for heuristics.
Something that is less OK, but also less frequent, is that the sudden deletion
of spilled keys will cause a negative blip in managed memory:
1. Delete 100MB of spilled data
2. The updated managed memory *total* reaches the scheduler faster than the
updated spilled portion
3. This causes the managed memory to temporarily plummet and be replaced by
unmanaged_recent, while spilled memory remains unaltered
4. When the heartbeat arrives, managed goes back up, unmanaged_recent
goes back down, and spilled goes down by 100MB as it should have to
begin with.
:issue:`6002` will let us solve this.
"""
return MemoryState(
process=self.metrics["memory"],
managed=max(0, self.nbytes - self.metrics["spilled_bytes"]["memory"]),
spilled=self.metrics["spilled_bytes"]["disk"],
unmanaged_old=self._memory_unmanaged_old,
)
def clean(self) -> WorkerState:
"""Return a version of this object that is appropriate for serialization"""
ws = WorkerState(
address=self.address,
status=self.status,
pid=self.pid,
name=self.name,
nthreads=self.nthreads,
memory_limit=self.memory_limit,
local_directory=self.local_directory,
services=self.services,
nanny=self.nanny,
extra=self.extra,
server_id=self.server_id,
)
ws._occupancy_cache = self.occupancy
ws.executing = {ts.key: duration for ts, duration in self.executing.items()} # type: ignore
return ws
def __repr__(self) -> str:
name = f", name: {self.name}" if self.name != self.address else ""
return (
f"<WorkerState {self.address!r}{name}, "
f"status: {self.status.name}, "
f"memory: {len(self.has_what)}, "
f"processing: {len(self.processing)}>"
)
def _repr_html_(self) -> str:
return get_template("worker_state.html.j2").render(
address=self.address,
name=self.name,
status=self.status.name,
has_what=self.has_what,
processing=self.processing,
)
def identity(self) -> dict[str, Any]:
return {
"type": "Worker",
"id": self.name,
"host": self.host,
"resources": self.resources,
"local_directory": self.local_directory,
"name": self.name,
"nthreads": self.nthreads,
"memory_limit": self.memory_limit,
"last_seen": self.last_seen,
"services": self.services,
"metrics": self.metrics,
"status": self.status.name,
"nanny": self.nanny,
**self.extra,
}
def _to_dict_no_nest(self, *, exclude: Container[str] = ()) -> dict[str, Any]:
"""Dictionary representation for debugging purposes.
Not type stable and not intended for roundtrips.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
TaskState._to_dict
"""
return recursive_to_dict(
self,
exclude=set(exclude) | {"versions"}, # type: ignore
members=True,
)
@property
def scheduler(self) -> SchedulerState:
assert self.scheduler_ref
s = self.scheduler_ref()
assert s
return s
def add_to_processing(self, ts: TaskState) -> None:
"""Assign a task to this worker for compute."""
if self.scheduler.validate:
assert ts not in self.processing
tp = ts.prefix
self.task_prefix_count[tp.name] += 1
self.scheduler._task_prefix_count_global[tp.name] += 1
self.processing.add(ts)
for dts in ts.dependencies:
assert dts.who_has
if self not in dts.who_has:
self._inc_needs_replica(dts)
def add_to_long_running(self, ts: TaskState) -> None:
if self.scheduler.validate:
assert ts in self.processing
assert ts not in self.long_running
self._remove_from_task_prefix_count(ts)
# Cannot remove from processing since we're using this for things like
# idleness detection. Idle workers are typically targeted for
# downscaling but we should not downscale workers with long running
# tasks
self.long_running.add(ts)
def remove_from_processing(self, ts: TaskState) -> None:
"""Remove a task from a workers processing"""
if self.scheduler.validate:
assert ts in self.processing
if ts in self.long_running:
self.long_running.discard(ts)
else:
self._remove_from_task_prefix_count(ts)
self.processing.remove(ts)
for dts in ts.dependencies:
if dts in self.needs_what:
self._dec_needs_replica(dts)
def _remove_from_task_prefix_count(self, ts: TaskState) -> None:
prefix_name = ts.prefix.name
count = self.task_prefix_count[prefix_name] - 1
tp_count = self.task_prefix_count
tp_count_global = self.scheduler._task_prefix_count_global
if count:
tp_count[prefix_name] = count
else:
del tp_count[prefix_name]
count = tp_count_global[prefix_name] - 1
if count:
tp_count_global[prefix_name] = count
else:
del tp_count_global[prefix_name]
def remove_replica(self, ts: TaskState) -> None:
"""The worker no longer has a task in memory"""
if self.scheduler.validate:
assert ts.who_has
assert self in ts.who_has
assert ts in self.has_what
assert ts not in self.needs_what
self.nbytes -= ts.get_nbytes()
del self._has_what[ts]
ts.who_has.remove(self) # type: ignore
if not ts.who_has:
ts.who_has = None
def _inc_needs_replica(self, ts: TaskState) -> None:
"""Assign a task fetch to this worker a…cheduler, title="Scheduler Profile (administrative)"
)
task_stream = TabPanel(child=task_stream, title="Task Stream")
bandwidth_workers = TabPanel(
child=bandwidth_workers.root, title="Bandwidth (Workers)"
)
bandwidth_types = TabPanel(
child=bandwidth_types.root, title="Bandwidth (Types)"
)
system = TabPanel(child=sysmon.root, title="System")
logs = TabPanel(child=logs.root, title="Scheduler Logs")
tabs = Tabs(
tabs=[
html,
task_stream,
system,
logs,
compute,
workers,
scheduler,
bandwidth_workers,
bandwidth_types,
],
sizing_mode="stretch_both",
)
from bokeh.core.templates import get_env
from bokeh.plotting import output_file, save
with tmpfile(extension=".html") as fn:
output_file(filename=fn, title="Dask Performance Report", mode=mode)
template_directory = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "dashboard", "templates"
)
template_environment = get_env()
template_environment.loader.searchpath.append(template_directory)
template = template_environment.get_template("performance_report.html")
save(tabs, filename=fn, template=template)
with open(fn) as f:
data = f.read()
return data
async def get_worker_logs(self, n=None, workers=None, nanny=False):
results = await self.broadcast(
msg={"op": "get_logs", "n": n}, workers=workers, nanny=nanny
)
return results
def log_event(self, topic: str | Collection[str], msg: Any) -> None:
"""Log an event under a given topic
Parameters
----------
topic : str, list[str]
Name of the topic under which to log an event. To log the same
event under multiple topics, pass a list of topic names.
msg
Event message to log. Note this must be msgpack serializable.
See also
--------
Client.log_event
"""
self._broker.publish(topic, msg)
def subscribe_topic(self, topic: str, client: str) -> None:
self._broker.subscribe(topic, client)
def unsubscribe_topic(self, topic: str, client: str) -> None:
self._broker.unsubscribe(topic, client)
@overload
def get_events(self, topic: str) -> tuple[tuple[float, Any], ...]: ...
@overload
def get_events(self) -> dict[str, tuple[tuple[float, Any], ...]]: ...
def get_events(
self, topic: str | None = None
) -> tuple[tuple[float, Any], ...] | dict[str, tuple[tuple[float, Any], ...]]:
return self._broker.get_events(topic)
async def get_worker_monitor_info(self, recent=False, starts=None):
if starts is None:
starts = {}
results = await asyncio.gather(
*(
self.rpc(w).get_monitor_info(recent=recent, start=starts.get(w, 0))
for w in self.workers
)
)
return dict(zip(self.workers, results))
###########
# Cleanup #
###########
@log_errors
async def check_worker_ttl(self) -> None:
now = time()
stimulus_id = f"check-worker-ttl-{now}"
assert self.worker_ttl
ttl = max(self.worker_ttl, 10 * heartbeat_interval(len(self.workers)))
to_restart = []
for ws in self.workers.values():
last_seen = now - ws.last_seen
if last_seen > ttl:
to_restart.append(ws.address)
logger.warning(
f"Worker failed to heartbeat for {last_seen:.0f}s; "
f"{'attempting restart' if ws.nanny else 'removing'}: {ws}"
)
if to_restart:
self.log_event(
"scheduler",
{
"action": "worker-ttl-timed-out",
"workers": to_restart.copy(),
"ttl": ttl,
},
)
await self.restart_workers(
to_restart,
wait_for_workers=False,
stimulus_id=stimulus_id,
)
def check_idle(self) -> float | None:
if self.status in (Status.closing, Status.closed):
return None # pragma: nocover
if self.transition_counter != self._idle_transition_counter:
self._idle_transition_counter = self.transition_counter
self.idle_since = None
return None
if self._active_graph_updates > 0:
self.idle_since = None
return None
if (
self.queued
or self.unrunnable
or any(ws.processing for ws in self.workers.values())
):
self.idle_since = None
return None
if not self.idle_since:
self.idle_since = time()
return self.idle_since
if self.jupyter:
last_activity = (
self._jupyter_server_application.web_app.last_activity().timestamp()
)
if last_activity > self.idle_since:
self.idle_since = last_activity
return self.idle_since
if self.idle_timeout:
if time() > self.idle_since + self.idle_timeout:
assert self.idle_since
logger.info(
"Scheduler closing after being idle for %s",
format_time(self.idle_timeout),
)
self._ongoing_background_tasks.call_soon(
self.close, reason="idle-timeout-exceeded"
)
return self.idle_since
def _check_no_workers(self) -> None:
if (
self.status in (Status.closing, Status.closed)
or self.no_workers_timeout is None
):
return
now = monotonic()
stimulus_id = f"check-no-workers-timeout-{time()}"
recommendations: Recs = {}
self._refresh_no_workers_since(now)
affected = self._check_unrunnable_task_timeouts(
now, recommendations=recommendations, stimulus_id=stimulus_id
)
affected.update(
self._check_queued_task_timeouts(
now, recommendations=recommendations, stimulus_id=stimulus_id
)
)
self.transitions(recommendations, stimulus_id=stimulus_id)
if affected:
self.log_event(
"scheduler",
{"action": "no-workers-timeout-exceeded", "keys": affected},
)
def _check_unrunnable_task_timeouts(
self, timestamp: float, recommendations: Recs, stimulus_id: str
) -> set[Key]:
assert self.no_workers_timeout
unsatisfied = []
no_workers = []
for ts, unrunnable_since in self.unrunnable.items():
if timestamp <= unrunnable_since + self.no_workers_timeout:
# unrunnable is insertion-ordered, which means that unrunnable_since will
# be monotonically increasing in this loop.
break
if (
self._no_workers_since is None
or self._no_workers_since >= unrunnable_since
):
unsatisfied.append(ts)
else:
no_workers.append(ts)
if not unsatisfied and not no_workers:
return set()
for ts in unsatisfied:
e = pickle.dumps(
NoValidWorkerError(
task=ts.key,
host_restrictions=(ts.host_restrictions or set()).copy(),
worker_restrictions=(ts.worker_restrictions or set()).copy(),
resource_restrictions=(ts.resource_restrictions or {}).copy(),
timeout=self.no_workers_timeout,
),
)
r = self.transition(
ts.key,
"erred",
exception=e,
cause=ts.key,
stimulus_id=stimulus_id,
)
recommendations.update(r)
logger.error(
"Task %s marked as failed because it timed out waiting "
"for its restrictions to become satisfied.",
ts.key,
)
self._fail_tasks_after_no_workers_timeout(
no_workers, recommendations, stimulus_id
)
return {ts.key for ts in concat([unsatisfied, no_workers])}
def _check_queued_task_timeouts(
self, timestamp: float, recommendations: Recs, stimulus_id: str
) -> set[Key]:
assert self.no_workers_timeout
if self._no_workers_since is None:
return set()
if timestamp <= self._no_workers_since + self.no_workers_timeout:
return set()
affected = list(self.queued)
self._fail_tasks_after_no_workers_timeout(
affected, recommendations, stimulus_id
)
return {ts.key for ts in affected}
def _fail_tasks_after_no_workers_timeout(
self, timed_out: Iterable[TaskState], recommendations: Recs, stimulus_id: str
) -> None:
assert self.no_workers_timeout
for ts in timed_out:
e = pickle.dumps(
NoWorkerError(
task=ts.key,
timeout=self.no_workers_timeout,
),
)
r = self.transition(
ts.key,
"erred",
exception=e,
cause=ts.key,
stimulus_id=stimulus_id,
)
recommendations.update(r)
logger.error(
"Task %s marked as failed because it timed out waiting "
"without any running workers.",
ts.key,
)
def _refresh_no_workers_since(self, timestamp: float | None = None) -> None:
if self.running or not (self.queued or self.unrunnable):
self._no_workers_since = None
return
if not self._no_workers_since:
self._no_workers_since = timestamp or monotonic()
return
def adaptive_target(self, target_duration=None):
"""Desired number of workers based on the current workload
This looks at the current running tasks and memory use, and returns a
number of desired workers. This is often used by adaptive scheduling.
Parameters
----------
target_duration : str
A desired duration of time for computations to take. This affects
how rapidly the scheduler will ask to scale.
See Also
--------
distributed.deploy.Adaptive
"""
if target_duration is None:
target_duration = dask.config.get("distributed.adaptive.target-duration")
target_duration = parse_timedelta(target_duration)
# CPU
queued = take(100, concat([self.queued, self.unrunnable.keys()]))
queued_occupancy = 0
for ts in queued:
queued_occupancy += self._get_prefix_duration(ts.prefix)
tasks_ready = len(self.queued) + len(self.unrunnable)
if tasks_ready > 100:
queued_occupancy *= tasks_ready / 100
cpu = math.ceil((self.total_occupancy + queued_occupancy) / target_duration)
# Avoid a few long tasks from asking for many cores
for ws in self.workers.values():
if tasks_ready > cpu:
break
tasks_ready += len(ws.processing)
else:
cpu = min(tasks_ready, cpu)
# Divide by average nthreads per worker
if self.workers:
nthreads = sum(ws.nthreads for ws in self.workers.values())
cpu = math.ceil(cpu / nthreads * len(self.workers))
if (self.unrunnable or self.queued) and not self.workers:
cpu = max(1, cpu)
# add more workers if more than 60% of memory is used
limit = sum(ws.memory_limit for ws in self.workers.values())
used = sum(ws.nbytes for ws in self.workers.values())
memory = 0
if used > 0.6 * limit and limit > 0:
memory = 2 * len(self.workers)
target = max(memory, cpu)
if target >= len(self.workers):
return target
else: # Scale down?
to_close = self.workers_to_close()
return len(self.workers) - len(to_close)
def request_acquire_replicas(
self, addr: str, keys: Iterable[Key], *, stimulus_id: str
) -> None:
"""Asynchronously ask a worker to acquire a replica of the listed keys from
other workers. This is a fire-and-forget operation which offers no feedback for
success or failure, and is intended for housekeeping and not for computation.
"""
who_has = {}
nbytes = {}
for key in keys:
ts = self.tasks[key]
assert ts.who_has
who_has[key] = [ws.address for ws in ts.who_has or ()]
nbytes[key] = ts.nbytes
self.stream_comms[addr].send(
{
"op": "acquire-replicas",
"who_has": who_has,
"nbytes": nbytes,
"stimulus_id": stimulus_id,
},
)
def request_remove_replicas(
self, addr: str, keys: list[Key], *, stimulus_id: str
) -> None:
"""Asynchronously ask a worker to discard its replica of the listed keys.
This must never be used to destroy the last replica of a key. This is a
fire-and-forget operation, intended for housekeeping and not for computation.
The replica disappears immediately from TaskState.who_has on the Scheduler side;
if the worker refuses to delete, e.g. because the task is a dependency of
another task running on it, it will (also asynchronously) inform the scheduler
to re-add itself to who_has. If the worker agrees to discard the task, there is
no feedback.
"""
ws = self.workers[addr]
# The scheduler immediately forgets about the replica and suggests the worker to
# drop it. The worker may refuse, at which point it will send back an add-keys
# message to reinstate it.
for key in keys:
ts = self.tasks[key]
if self.validate:
# Do not destroy the last copy
assert ts.who_has
assert len(ts.who_has) > 1
self.remove_replica(ts, ws)
self.stream_comms[addr].send(
{
"op": "remove-replicas",
"keys": keys,
"stimulus_id": stimulus_id,
}
)
def _task_to_report_msg(ts: TaskState) -> dict[str, Any] | None:
if ts.state == "forgotten":
return {"op": "cancelled-keys", "keys": [ts.key]}
elif ts.state == "memory":
return {"op": "key-in-memory", "key": ts.key}
elif ts.state == "erred":
failing_ts = ts.exception_blame
assert failing_ts
return {
"op": "task-erred",
"key": ts.key,
"exception": failing_ts.exception,
"traceback": failing_ts.traceback,
}
else:
return None
def _task_to_client_msgs(ts: TaskState) -> Msgs:
if ts.who_wants:
report_msg = _task_to_report_msg(ts)
if report_msg is not None:
return {cs.client_key: [report_msg] for cs in ts.who_wants}
return {}
def decide_worker(
ts: TaskState,
all_workers: set[WorkerState],
valid_workers: set[WorkerState] | None,
objective: Callable[[WorkerState], Any],
) -> WorkerState | None:
"""
Decide which worker should take task *ts*.
We choose the worker that has the data on which *ts* depends.
If several workers have dependencies then we choose the less-busy worker.
Optionally provide *valid_workers* of where jobs are allowed to occur
(if all workers are allowed to take the task, pass None instead).
If the task requires data communication because no eligible worker has
all the dependencies already, then we choose to minimize the number
of bytes sent between workers. This is determined by calling the
*objective* function.
"""
assert all(dts.who_has for dts in ts.dependencies)
if ts.actor:
candidates = all_workers.copy()
else:
candidates = {wws for dts in ts.dependencies for wws in dts.who_has or ()}
candidates &= all_workers
if valid_workers is None:
if not candidates:
candidates = all_workers.copy()
else:
candidates &= valid_workers
if not candidates:
candidates = valid_workers
if not candidates:
if ts.loose_restrictions:
return decide_worker(ts, all_workers, None, objective)
if not candidates:
return None
elif len(candidates) == 1:
return next(iter(candidates))
else:
return min(candidates, key=objective)
def validate_task_state(ts: TaskState) -> None:
"""Validate the given TaskState"""
assert ts.state in ALL_TASK_STATES, ts
if ts.waiting_on:
assert ts.waiting_on.issubset(ts.dependencies), (
"waiting not subset of dependencies",
str(ts.waiting_on),
str(ts.dependencies),
)
if ts.waiters:
assert ts.waiters.issubset(ts.dependents), (
"waiters not subset of dependents",
str(ts.waiters),
str(ts.dependents),
)
for dts in ts.waiting_on or ():
assert not dts.who_has, ("waiting on in-memory dep", str(ts), str(dts))
assert dts.state != "released", ("waiting on released dep", str(ts), str(dts))
for dts in ts.dependencies:
assert ts in dts.dependents, (
"not in dependency's dependents",
str(ts),
str(dts),
str(dts.dependents),
)
if ts.state in ("waiting", "queued", "processing", "no-worker"):
assert ts.waiting_on and dts in ts.waiting_on or dts.who_has, (
"dep missing",
str(ts),
str(dts),
)
assert dts.state != "forgotten"
for dts in ts.waiters or ():
assert dts.state in ("waiting", "queued", "processing", "no-worker"), (
"waiter not in play",
str(ts),
str(dts),
)
for dts in ts.dependents:
assert ts in dts.dependencies, (
"not in dependent's dependencies",
str(ts),
str(dts),
str(dts.dependencies),
)
assert dts.state != "forgotten"
assert (ts.processing_on is not None) == (ts.state == "processing")
assert bool(ts.who_has) == (ts.state == "memory"), (ts, ts.who_has, ts.state)
if ts.state == "queued":
assert not ts.processing_on
assert not ts.who_has
assert all(dts.who_has for dts in ts.dependencies), (
"task queued without all deps",
str(ts),
str(ts.dependencies),
)
if ts.state == "processing":
assert all(dts.who_has for dts in ts.dependencies), (
"task processing without all deps",
str(ts),
str(ts.dependencies),
)
assert not ts.waiting_on
if ts.who_has:
assert ts.waiters or ts.who_wants, (
"unneeded task in memory",
str(ts),
str(ts.who_has),
)
if ts.run_spec: # was computed
assert ts.type
assert isinstance(ts.type, str)
assert not any(
[
ts in dts.waiting_on
for dts in ts.dependents
if dts.waiting_on is not None
]
)
for ws in ts.who_has:
assert ts in ws.has_what, (
"not in who_has' has_what",
str(ts),
str(ws),
str(ws.has_what),
)
for cs in ts.who_wants or ():
assert ts in cs.wants_what, (
"not in who_wants' wants_what",
str(ts),
str(cs),
str(cs.wants_what),
)
if ts.actor:
if ts.state == "memory":
assert ts.who_has
assert sum(ts in ws.actors for ws in ts.who_has) == 1
if ts.state == "processing":
assert ts.processing_on
assert ts in ts.processing_on.actors
assert ts.state != "queued"
def validate_unrunnable(unrunnable: dict[TaskState, float]) -> None:
prev_unrunnable_since: float | None = None
prev_ts: TaskState | None = None
for ts, unrunnable_since in unrunnable.items():
assert ts.state == "no-worker"
if prev_ts is not None:
assert prev_unrunnable_since is not None
# Ensure that unrunnable_since is monotonically increasing when iterating over unrunnable.
# _check_no_workers relies on this.
assert prev_unrunnable_since <= unrunnable_since, (
prev_ts,
ts,
prev_unrunnable_since,
unrunnable_since,
)
prev_ts = ts
prev_unrunnable_since = unrunnable_since
def validate_worker_state(ws: WorkerState) -> None:
for ts in ws.has_what or ():
assert ts.who_has
assert ws in ts.who_has, (
"not in has_what' who_has",
str(ws),
str(ts),
str(ts.who_has),
)
for ts in ws.actors:
assert ts.state in ("memory", "processing")
def validate_state(
tasks: dict[Key, TaskState],
workers: dict[str, WorkerState],
clients: dict[str, ClientState],
) -> None:
"""Validate a current runtime state.
This performs a sequence of checks on the entire graph, running in about linear
time. This raises assert errors if anything doesn't check out.
"""
for ts in tasks.values():
validate_task_state(ts)
for ws in workers.values():
validate_worker_state(ws)
for cs in clients.values():
for ts in cs.wants_what or ():
assert ts.who_wants
assert cs in ts.who_wants, (
"not in wants_what' who_wants",
str(cs),
str(ts),
str(ts.who_wants),
)
def heartbeat_interval(n: int) -> float:
"""Interval in seconds that we desire heartbeats based on number of workers"""
if n <= 10:
return 0.5
elif n < 50:
return 1
elif n < 200:
return 2
else:
# No more than 200 heartbeats a second scaled by workers
return n / 200 + 1
def _task_slots_available(ws: WorkerState, saturation_factor: float) -> int:
"""Number of tasks that can be sent to this worker without oversaturating it"""
assert not math.isinf(saturation_factor)
return max(math.ceil(saturation_factor * ws.nthreads), 1) - (
len(ws.processing) - len(ws.long_running)
)
def _worker_full(ws: WorkerState, saturation_factor: float) -> bool:
if math.isinf(saturation_factor):
return False
return _task_slots_available(ws, saturation_factor) <= 0
class KilledWorker(Exception):
def __init__(self, task: Key, last_worker: WorkerState, allowed_failures: int):
super().__init__(task, last_worker, allowed_failures)
@property
def task(self) -> Key:
return self.args[0]
@property
def last_worker(self) -> WorkerState:
return self.args[1]
@property
def allowed_failures(self) -> int:
return self.args[2]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} on {self.allowed_failures + 1} "
"different workers, but all those workers died while running it. "
f"The last worker that attempt to run the task was {self.last_worker.address}. "
"Inspecting worker logs is often a good next step to diagnose what went wrong. "
"For more information see https://distributed.dask.org/en/stable/killed.html."
)
class NoValidWorkerError(Exception):
def __init__(
self,
task: Key,
host_restrictions: set[str],
worker_restrictions: set[str],
resource_restrictions: dict[str, float],
timeout: float,
):
super().__init__(
task, host_restrictions, worker_restrictions, resource_restrictions, timeout
)
@property
def task(self) -> Key:
return self.args[0]
@property
def host_restrictions(self) -> Any:
return self.args[1]
@property
def worker_restrictions(self) -> Any:
return self.args[2]
@property
def resource_restrictions(self) -> Any:
return self.args[3]
@property
def timeout(self) -> float:
return self.args[4]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} but timed out after {format_time(self.timeout)} "
"waiting for a valid worker matching all restrictions.\n\nRestrictions:\n"
f"host_restrictions={self.host_restrictions!s}\n"
f"worker_restrictions={self.worker_restrictions!s}\n"
f"resource_restrictions={self.resource_restrictions!s}\n"
)
class NoWorkerError(Exception):
def __init__(self, task: Key, timeout: float):
super().__init__(task, timeout)
@property
def task(self) -> Key:
return self.args[0]
@property
def timeout(self) -> float:
return self.args[1]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} but timed out after {format_time(self.timeout)} "
"waiting without any running workers."
)
class WorkerStatusPlugin(SchedulerPlugin):
"""A plugin to share worker status with a remote observer
This is used in cluster managers to keep updated about the status of the scheduler.
"""
name: ClassVar[str] = "worker-status"
bcomm: BatchedSend
def __init__(self, scheduler: Scheduler, comm: Comm):
self.bcomm = BatchedSend(interval="5ms")
self.bcomm.start(comm)
scheduler.add_plugin(self)
def add_worker(self, scheduler: Scheduler, worker: str) -> None:
ident = scheduler.workers[worker].identity()
del ident["metrics"]
del ident["last_seen"]
try:
self.bcomm.send(["add", {"workers": {worker: ident}}])
except CommClosedError:
scheduler.remove_plugin(name=self.name)
def remove_worker(self, scheduler: Scheduler, worker: str, **kwargs: Any) -> None:
try:
self.bcomm.send(["remove", worker])
except CommClosedError:
scheduler.remove_plugin(name=self.name)
def teardown(self) -> None:
self.bcomm.close()
class CollectTaskMetaDataPlugin(SchedulerPlugin):
scheduler: Scheduler
name: str
keys: set[Key]
metadata: dict[Key, Any]
state: dict[Key, TaskStateState]
def __init__(self, scheduler: Scheduler, name: str):
self.scheduler = scheduler
self.name = name
self.keys = set()
self.metadata = {}
self.state = {}
def update_graph(
self,
scheduler: Scheduler,
*,
keys: set[Key],
**kwargs: Any,
) -> None:
self.keys.update(keys)
def transition(
self,
key: Key,
start: TaskStateState,
finish: TaskStateState,
*args: Any,
**kwargs: Any,
) -> None:
if finish in ("memory", "erred"):
ts = self.scheduler.tasks.get(key)
if ts is not None and ts.key in self.keys:
self.metadata[key] = ts.metadata
self.state[key] = finish
self.keys.discard(key)
def _materialize_graph(
expr: Expr,
global_annotations: dict[str, Any],
validate: bool,
) -> tuple[dict[Key, T_runspec], dict[Key, set[Key]], dict[str, dict[Key, Any]]]:
> dsk: dict = expr.__dask_graph__()
E AttributeError: 'dict' object has no attribute '__dask_graph__'
distributed/scheduler.py:9383: AttributeError
Check warning on line 0 in distributed.cli.tests.test_dask_worker
github-actions / Unit Test Results
1 out of 5 runs failed: test_respect_host_listen_address[0.0.0.0---nanny] (distributed.cli.tests.test_dask_worker)
artifacts/ubuntu-latest-mindeps-default-notci1/pytest.xml [took 1s]
Raw output
AttributeError: 'dict' object has no attribute '__dask_graph__'
c = <Client: No scheduler connected>
s = <Scheduler 'tcp://127.0.0.1:43723', workers: 0, cores: 0, tasks: 0>
nanny = '--nanny', host = '0.0.0.0'
@pytest.mark.slow
@pytest.mark.skipif(not LINUX, reason="Need 127.0.0.2 to mean localhost")
@pytest.mark.parametrize("nanny", ["--nanny", "--no-nanny"])
@pytest.mark.parametrize("host", ["127.0.0.2", "0.0.0.0"])
@gen_cluster(client=True, nthreads=[])
async def test_respect_host_listen_address(c, s, nanny, host):
with popen(
[
sys.executable,
"-m",
"dask",
"worker",
s.address,
nanny,
"--no-dashboard",
"--host",
host,
]
):
await c.wait_for_workers(1)
# roundtrip works
> assert await c.submit(lambda x: x + 1, 10) == 11
distributed/cli/tests/test_dask_worker.py:569:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
distributed/client.py:408: in _result
raise exc.with_traceback(tb)
distributed/utils.py:1507: in run_in_executor_with_context
return await loop.run_in_executor(
../../../miniconda3/envs/dask-distributed/lib/python3.10/concurrent/futures/thread.py:58: in run
result = self.fn(*self.args, **self.kwargs)
distributed/utils.py:1508: in <lambda>
executor, lambda: context.run(func, *args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
from __future__ import annotations
import asyncio
import contextlib
import dataclasses
import heapq
import inspect
import itertools
import json
import logging
import math
import operator
import os
import pickle
import random
import textwrap
import uuid
import warnings
import weakref
from abc import abstractmethod
from collections import defaultdict, deque
from collections.abc import (
Callable,
Collection,
Container,
Hashable,
Iterable,
Iterator,
Mapping,
Sequence,
Set,
)
from contextlib import suppress
from functools import partial
from typing import TYPE_CHECKING, Any, ClassVar, Literal, NamedTuple, cast, overload
import psutil
import tornado.web
from sortedcontainers import SortedDict, SortedSet
from tlz import (
concat,
first,
groupby,
merge,
merge_sorted,
merge_with,
partition,
pluck,
second,
take,
valmap,
)
from tornado.ioloop import IOLoop
import dask
import dask.utils
from dask._task_spec import DependenciesMapping, GraphNode, convert_legacy_graph
from dask.core import istask, validate_key
from dask.typing import Key, no_default
from dask.utils import (
_deprecated,
_deprecated_kwarg,
format_bytes,
format_time,
key_split,
parse_bytes,
parse_timedelta,
tmpfile,
)
from dask.widgets import get_template
from distributed import cluster_dump, preloading, profile
from distributed import versions as version_module
from distributed._asyncio import RLock
from distributed._stories import scheduler_story
from distributed.active_memory_manager import ActiveMemoryManagerExtension, RetireWorker
from distributed.batched import BatchedSend
from distributed.broker import Broker
from distributed.client import SourceCode
from distributed.collections import HeapSet
from distributed.comm import (
Comm,
CommClosedError,
get_address_host,
normalize_address,
resolve_address,
unparse_host_port,
)
from distributed.comm.addressing import addresses_from_user_args
from distributed.compatibility import PeriodicCallback
from distributed.core import (
ErrorMessage,
OKMessage,
Status,
clean_exception,
error_message,
rpc,
send_recv,
)
from distributed.diagnostics.memory_sampler import MemorySamplerExtension
from distributed.diagnostics.plugin import SchedulerPlugin, _get_plugin_name
from distributed.event import EventExtension
from distributed.gc import disable_gc_diagnosis, enable_gc_diagnosis
from distributed.http import get_handlers
from distributed.metrics import monotonic, time
from distributed.multi_lock import MultiLockExtension
from distributed.node import ServerNode
from distributed.proctitle import setproctitle
from distributed.protocol import deserialize
from distributed.protocol.pickle import dumps, loads
from distributed.protocol.serialize import Serialized, ToPickle, serialize
from distributed.publish import PublishExtension
from distributed.pubsub import PubSubSchedulerExtension
from distributed.queues import QueueExtension
from distributed.recreate_tasks import ReplayTaskScheduler
from distributed.security import Security
from distributed.semaphore import SemaphoreExtension
from distributed.shuffle import ShuffleSchedulerPlugin
from distributed.spans import SpanMetadata, SpansSchedulerExtension
from distributed.stealing import WorkStealing
from distributed.utils import (
All,
Deadline,
TimeoutError,
format_dashboard_link,
get_fileno_limit,
key_split_group,
log_errors,
offload,
recursive_to_dict,
wait_for,
)
from distributed.utils_comm import (
gather_from_workers,
retry_operation,
scatter_to_workers,
)
from distributed.variable import VariableExtension
if TYPE_CHECKING:
# TODO import from typing (requires Python >=3.10)
# TODO import from typing (requires Python >=3.11)
from typing_extensions import Self, TypeAlias
from dask._expr import Expr
# Not to be confused with distributed.worker_state_machine.TaskStateState
TaskStateState: TypeAlias = Literal[
"released",
"waiting",
"no-worker",
"queued",
"processing",
"memory",
"erred",
"forgotten",
]
ALL_TASK_STATES: Set[TaskStateState] = set(TaskStateState.__args__) # type: ignore
# {task key -> finish state}
# Not to be confused with distributed.worker_state_machine.Recs
Recs: TypeAlias = dict[Key, TaskStateState]
# {client or worker address: [{op: <key>, ...}, ...]}
Msgs: TypeAlias = dict[str, list[dict[str, Any]]]
# (recommendations, client messages, worker messages)
RecsMsgs: TypeAlias = tuple[Recs, Msgs, Msgs]
T_runspec: TypeAlias = GraphNode
logger = logging.getLogger(__name__)
LOG_PDB = dask.config.get("distributed.admin.pdb-on-err")
DEFAULT_DATA_SIZE = parse_bytes(
dask.config.get("distributed.scheduler.default-data-size")
)
STIMULUS_ID_UNSET = "<stimulus_id unset>"
DEFAULT_EXTENSIONS = {
"multi_locks": MultiLockExtension,
"publish": PublishExtension,
"replay-tasks": ReplayTaskScheduler,
"queues": QueueExtension,
"variables": VariableExtension,
"pubsub": PubSubSchedulerExtension,
"semaphores": SemaphoreExtension,
"events": EventExtension,
"amm": ActiveMemoryManagerExtension,
"memory_sampler": MemorySamplerExtension,
"shuffle": ShuffleSchedulerPlugin,
"spans": SpansSchedulerExtension,
"stealing": WorkStealing,
}
class ClientState:
"""A simple object holding information about a client."""
#: A unique identifier for this client. This is generally an opaque
#: string generated by the client itself.
client_key: str
#: Cached hash of :attr:`~ClientState.client_key`
_hash: int
#: A set of tasks this client wants to be kept in memory, so that it can download
#: its result when desired. This is the reverse mapping of
#: :class:`TaskState.who_wants`. Tasks are typically removed from this set when the
#: corresponding object in the client's space (for example a ``Future`` or a Dask
#: collection) gets garbage-collected.
wants_what: set[TaskState]
#: The last time we received a heartbeat from this client, in local scheduler time.
last_seen: float
#: Output of :func:`distributed.versions.get_versions` on the client
versions: dict[str, Any]
__slots__ = tuple(__annotations__)
def __init__(self, client: str, *, versions: dict[str, Any] | None = None):
self.client_key = client
self._hash = hash(client)
self.wants_what = set()
self.last_seen = time()
self.versions = versions or {}
def __hash__(self) -> int:
return self._hash
def __eq__(self, other: object) -> bool:
if not isinstance(other, ClientState):
return False
return self.client_key == other.client_key
def __repr__(self) -> str:
return f"<Client {self.client_key!r}>"
def __str__(self) -> str:
return self.client_key
def _to_dict_no_nest(self, *, exclude: Container[str] = ()) -> dict:
"""Dictionary representation for debugging purposes.
Not type stable and not intended for roundtrips.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
TaskState._to_dict
"""
return recursive_to_dict(
self,
exclude=set(exclude) | {"versions"}, # type: ignore
members=True,
)
class MemoryState:
"""Memory readings on a worker or on the whole cluster.
See :doc:`worker-memory`.
Attributes / properties:
managed_total
Sum of the output of sizeof() for all dask keys held by the worker in memory,
plus number of bytes spilled to disk
managed
Sum of the output of sizeof() for the dask keys held in RAM. Note that this may
be inaccurate, which may cause inaccurate unmanaged memory (see below).
spilled
Number of bytes for the dask keys spilled to the hard drive.
Note that this is the size on disk; size in memory may be different due to
compression and inaccuracies in sizeof(). In other words, given the same keys,
'managed' will change depending on the keys being in memory or spilled.
process
Total RSS memory measured by the OS on the worker process.
This is always exactly equal to managed + unmanaged.
unmanaged
process - managed. This is the sum of
- Python interpreter and modules
- global variables
- memory temporarily allocated by the dask tasks that are currently running
- memory fragmentation
- memory leaks
- memory not yet garbage collected
- memory not yet free()'d by the Python memory manager to the OS
unmanaged_old
Minimum of the 'unmanaged' measures over the last
``distributed.memory.recent-to-old-time`` seconds
unmanaged_recent
unmanaged - unmanaged_old; in other words process memory that has been recently
allocated but is not accounted for by dask; hopefully it's mostly a temporary
spike.
optimistic
managed + unmanaged_old; in other words the memory held long-term by
the process under the hopeful assumption that all unmanaged_recent memory is a
temporary spike
"""
process: int
unmanaged_old: int
managed: int
spilled: int
__slots__ = tuple(__annotations__)
def __init__(
self,
*,
process: int,
unmanaged_old: int,
managed: int,
spilled: int,
):
# Some data arrives with the heartbeat, some other arrives in realtime as the
# tasks progress. Also, sizeof() is not guaranteed to return correct results.
# This can cause glitches where a partial measure is larger than the whole, so
# we need to force all numbers to add up exactly by definition.
self.process = process
self.managed = min(self.process, managed)
self.spilled = spilled
# Subtractions between unsigned ints guaranteed by construction to be >= 0
self.unmanaged_old = min(unmanaged_old, process - self.managed)
@staticmethod
def sum(*infos: MemoryState) -> MemoryState:
process = 0
unmanaged_old = 0
managed = 0
spilled = 0
for ms in infos:
process += ms.process
unmanaged_old += ms.unmanaged_old
spilled += ms.spilled
managed += ms.managed
return MemoryState(
process=process,
unmanaged_old=unmanaged_old,
managed=managed,
spilled=spilled,
)
@property
def managed_total(self) -> int:
return self.managed + self.spilled
@property
def unmanaged(self) -> int:
# This is never negative thanks to __init__
return self.process - self.managed
@property
def unmanaged_recent(self) -> int:
# This is never negative thanks to __init__
return self.process - self.managed - self.unmanaged_old
@property
def optimistic(self) -> int:
return self.managed + self.unmanaged_old
@property
def managed_in_memory(self) -> int:
warnings.warn("managed_in_memory has been renamed to managed", FutureWarning)
return self.managed
@property
def managed_spilled(self) -> int:
warnings.warn("managed_spilled has been renamed to spilled", FutureWarning)
return self.spilled
def __repr__(self) -> str:
return (
f"Process memory (RSS) : {format_bytes(self.process)}\n"
f" - managed by Dask : {format_bytes(self.managed)}\n"
f" - unmanaged (old) : {format_bytes(self.unmanaged_old)}\n"
f" - unmanaged (recent): {format_bytes(self.unmanaged_recent)}\n"
f"Spilled to disk : {format_bytes(self.spilled)}\n"
)
def _to_dict(self, *, exclude: Container[str] = ()) -> dict:
"""Dictionary representation for debugging purposes.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
"""
return {
k: getattr(self, k)
for k in dir(self)
if not k.startswith("_")
and k not in {"sum", "managed_in_memory", "managed_spilled"}
}
class WorkerState:
"""A simple object holding information about a worker.
Not to be confused with :class:`distributed.worker_state_machine.WorkerState`.
"""
#: This worker's unique key. This can be its connected address
#: (such as ``"tcp://127.0.0.1:8891"``) or an alias (such as ``"alice"``).
address: str
pid: int
name: Hashable
#: The number of CPU threads made available on this worker
nthreads: int
#: Memory available to the worker, in bytes
memory_limit: int
local_directory: str
services: dict[str, int]
#: Output of :meth:`distributed.versions.get_versions` on the worker
versions: dict[str, Any]
#: Address of the associated :class:`~distributed.nanny.Nanny`, if present
nanny: str | None
#: Read-only worker status, synced one way from the remote Worker object
status: Status
#: Cached hash of :attr:`~WorkerState.server_id`
_hash: int
#: The total memory size, in bytes, used by the tasks this worker holds in memory
#: (i.e. the tasks in this worker's :attr:`~WorkerState.has_what`).
nbytes: int
#: Worker memory unknown to the worker, in bytes, which has been there for more than
#: 30 seconds. See :class:`MemoryState`.
_memory_unmanaged_old: int
#: History of the last 30 seconds' worth of unmanaged memory. Used to differentiate
#: between "old" and "new" unmanaged memory.
#: Format: ``[(timestamp, bytes), (timestamp, bytes), ...]``
_memory_unmanaged_history: deque[tuple[float, int]]
metrics: dict[str, Any]
#: The last time we received a heartbeat from this worker, in local scheduler time.
last_seen: float
time_delay: float
bandwidth: float
#: A set of all TaskStates on this worker that are actors. This only includes those
#: actors whose state actually lives on this worker, not actors to which this worker
#: has a reference.
actors: set[TaskState]
#: Underlying data of :meth:`WorkerState.has_what`
_has_what: dict[TaskState, None]
#: A set of tasks that have been submitted to this worker. Multiple tasks may be
# submitted to a worker in advance and the worker will run them eventually,
# depending on its execution resources (but see :doc:`work-stealing`).
#:
#: All the tasks here are in the "processing" state.
#: This attribute is kept in sync with :attr:`TaskState.processing_on`.
processing: set[TaskState]
#: Running tasks that invoked :func:`distributed.secede`
long_running: set[TaskState]
#: A dictionary of tasks that are currently being run on this worker.
#: Each task state is associated with the duration in seconds which the task has
#: been running.
executing: dict[TaskState, float]
#: The available resources on this worker, e.g. ``{"GPU": 2}``.
#: These are abstract quantities that constrain certain tasks from running at the
#: same time on this worker.
resources: dict[str, float]
#: The sum of each resource used by all tasks allocated to this worker.
#: The numbers in this dictionary can only be less or equal than those in this
#: worker's :attr:`~WorkerState.resources`.
used_resources: dict[str, float]
#: Arbitrary additional metadata to be added to :meth:`~WorkerState.identity`
extra: dict[str, Any]
# The unique server ID this WorkerState is referencing
server_id: str
# Reference to scheduler task_groups
scheduler_ref: weakref.ref[SchedulerState] | None
task_prefix_count: defaultdict[str, int]
_network_occ: int
_occupancy_cache: float | None
#: Keys that may need to be fetched to this worker, and the number of tasks that need them.
#: All tasks are currently in `memory` on a worker other than this one.
#: Much like `processing`, this does not exactly reflect worker state:
#: keys here may be queued to fetch, in flight, or already in memory
#: on the worker.
needs_what: dict[TaskState, int]
__slots__ = tuple(__annotations__)
def __init__(
self,
*,
address: str,
status: Status,
pid: int,
name: object,
nthreads: int = 0,
memory_limit: int,
local_directory: str,
nanny: str | None,
server_id: str,
services: dict[str, int] | None = None,
versions: dict[str, Any] | None = None,
extra: dict[str, Any] | None = None,
scheduler: SchedulerState | None = None,
):
self.server_id = server_id
self.address = address
self.pid = pid
self.name = name
self.nthreads = nthreads
self.memory_limit = memory_limit
self.local_directory = local_directory
self.services = services or {}
self.versions = versions or {}
self.nanny = nanny
self.status = status
self._hash = hash(self.server_id)
self.nbytes = 0
self._memory_unmanaged_old = 0
self._memory_unmanaged_history = deque()
self.metrics = {}
self.last_seen = time()
self.time_delay = 0
self.bandwidth = parse_bytes(dask.config.get("distributed.scheduler.bandwidth"))
self.actors = set()
self._has_what = {}
self.processing = set()
self.long_running = set()
self.executing = {}
self.resources = {}
self.used_resources = {}
self.extra = extra or {}
self.scheduler_ref = weakref.ref(scheduler) if scheduler else None
self.task_prefix_count = defaultdict(int)
self.needs_what = {}
self._network_occ = 0
self._occupancy_cache = None
def __hash__(self) -> int:
return self._hash
def __eq__(self, other: object) -> bool:
return self is other or (
isinstance(other, WorkerState) and other.server_id == self.server_id
)
@property
def has_what(self) -> Set[TaskState]:
"""An insertion-sorted set-like of tasks which currently reside on this worker.
All the tasks here are in the "memory" state.
This is the reverse mapping of :attr:`TaskState.who_has`.
This is a read-only public accessor. The data is implemented as a dict without
values, because rebalance() relies on dicts being insertion-sorted.
"""
return self._has_what.keys()
@property
def host(self) -> str:
return get_address_host(self.address)
@property
def memory(self) -> MemoryState:
"""Polished memory metrics for the worker.
**Design note on managed memory**
There are two measures available for managed memory:
- ``self.nbytes``
- ``self.metrics["managed_bytes"]``
At rest, the two numbers must be identical. However, ``self.nbytes`` is
immediately updated through the batched comms as soon as each task lands in
memory on the worker; ``self.metrics["managed_bytes"]`` instead is updated by
the heartbeat, which can lag several seconds behind.
Below we are mixing likely newer managed memory info from ``self.nbytes`` with
process and spilled memory from the heartbeat. This is deliberate, so that
managed memory total is updated more frequently.
Managed memory directly and immediately contributes to optimistic memory, which
is in turn used in Active Memory Manager heuristics (at the moment of writing;
more uses will likely be added in the future). So it's important to have it
up to date; much more than it is for process memory.
Having up-to-date managed memory info as soon as the scheduler learns about
task completion also substantially simplifies unit tests.
The flip side of this design is that it may cause some noise in the
unmanaged_recent measure. e.g.:
1. Delete 100MB of managed data
2. The updated managed memory reaches the scheduler faster than the
updated process memory
3. There's a blip where the scheduler thinks that there's a sudden 100MB
increase in unmanaged_recent, since process memory hasn't changed but managed
memory has decreased by 100MB
4. When the heartbeat arrives, process memory goes down and so does the
unmanaged_recent.
This is OK - one of the main reasons for the unmanaged_recent / unmanaged_old
split is exactly to concentrate all the noise in unmanaged_recent and exclude it
from optimistic memory, which is used for heuristics.
Something that is less OK, but also less frequent, is that the sudden deletion
of spilled keys will cause a negative blip in managed memory:
1. Delete 100MB of spilled data
2. The updated managed memory *total* reaches the scheduler faster than the
updated spilled portion
3. This causes the managed memory to temporarily plummet and be replaced by
unmanaged_recent, while spilled memory remains unaltered
4. When the heartbeat arrives, managed goes back up, unmanaged_recent
goes back down, and spilled goes down by 100MB as it should have to
begin with.
:issue:`6002` will let us solve this.
"""
return MemoryState(
process=self.metrics["memory"],
managed=max(0, self.nbytes - self.metrics["spilled_bytes"]["memory"]),
spilled=self.metrics["spilled_bytes"]["disk"],
unmanaged_old=self._memory_unmanaged_old,
)
def clean(self) -> WorkerState:
"""Return a version of this object that is appropriate for serialization"""
ws = WorkerState(
address=self.address,
status=self.status,
pid=self.pid,
name=self.name,
nthreads=self.nthreads,
memory_limit=self.memory_limit,
local_directory=self.local_directory,
services=self.services,
nanny=self.nanny,
extra=self.extra,
server_id=self.server_id,
)
ws._occupancy_cache = self.occupancy
ws.executing = {ts.key: duration for ts, duration in self.executing.items()} # type: ignore
return ws
def __repr__(self) -> str:
name = f", name: {self.name}" if self.name != self.address else ""
return (
f"<WorkerState {self.address!r}{name}, "
f"status: {self.status.name}, "
f"memory: {len(self.has_what)}, "
f"processing: {len(self.processing)}>"
)
def _repr_html_(self) -> str:
return get_template("worker_state.html.j2").render(
address=self.address,
name=self.name,
status=self.status.name,
has_what=self.has_what,
processing=self.processing,
)
def identity(self) -> dict[str, Any]:
return {
"type": "Worker",
"id": self.name,
"host": self.host,
"resources": self.resources,
"local_directory": self.local_directory,
"name": self.name,
"nthreads": self.nthreads,
"memory_limit": self.memory_limit,
"last_seen": self.last_seen,
"services": self.services,
"metrics": self.metrics,
"status": self.status.name,
"nanny": self.nanny,
**self.extra,
}
def _to_dict_no_nest(self, *, exclude: Container[str] = ()) -> dict[str, Any]:
"""Dictionary representation for debugging purposes.
Not type stable and not intended for roundtrips.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
TaskState._to_dict
"""
return recursive_to_dict(
self,
exclude=set(exclude) | {"versions"}, # type: ignore
members=True,
)
@property
def scheduler(self) -> SchedulerState:
assert self.scheduler_ref
s = self.scheduler_ref()
assert s
return s
def add_to_processing(self, ts: TaskState) -> None:
"""Assign a task to this worker for compute."""
if self.scheduler.validate:
assert ts not in self.processing
tp = ts.prefix
self.task_prefix_count[tp.name] += 1
self.scheduler._task_prefix_count_global[tp.name] += 1
self.processing.add(ts)
for dts in ts.dependencies:
assert dts.who_has
if self not in dts.who_has:
self._inc_needs_replica(dts)
def add_to_long_running(self, ts: TaskState) -> None:
if self.scheduler.validate:
assert ts in self.processing
assert ts not in self.long_running
self._remove_from_task_prefix_count(ts)
# Cannot remove from processing since we're using this for things like
# idleness detection. Idle workers are typically targeted for
# downscaling but we should not downscale workers with long running
# tasks
self.long_running.add(ts)
def remove_from_processing(self, ts: TaskState) -> None:
"""Remove a task from a workers processing"""
if self.scheduler.validate:
assert ts in self.processing
if ts in self.long_running:
self.long_running.discard(ts)
else:
self._remove_from_task_prefix_count(ts)
self.processing.remove(ts)
for dts in ts.dependencies:
if dts in self.needs_what:
self._dec_needs_replica(dts)
def _remove_from_task_prefix_count(self, ts: TaskState) -> None:
prefix_name = ts.prefix.name
count = self.task_prefix_count[prefix_name] - 1
tp_count = self.task_prefix_count
tp_count_global = self.scheduler._task_prefix_count_global
if count:
tp_count[prefix_name] = count
else:
del tp_count[prefix_name]
count = tp_count_global[prefix_name] - 1
if count:
tp_count_global[prefix_name] = count
else:
del tp_count_global[prefix_name]
def remove_replica(self, ts: TaskState) -> None:
"""The worker no longer has a task in memory"""
if self.scheduler.validate:
assert ts.who_has
assert self in ts.who_has
assert ts in self.has_what
assert ts not in self.needs_what
self.nbytes -= ts.get_nbytes()
del self._has_what[ts]
ts.who_has.remove(self) # type: ignore
if not ts.who_has:
ts.who_has = None
def _inc_needs_replica(self, ts: TaskState) -> None:
"""Assign a task fetch to this worker and up…cheduler, title="Scheduler Profile (administrative)"
)
task_stream = TabPanel(child=task_stream, title="Task Stream")
bandwidth_workers = TabPanel(
child=bandwidth_workers.root, title="Bandwidth (Workers)"
)
bandwidth_types = TabPanel(
child=bandwidth_types.root, title="Bandwidth (Types)"
)
system = TabPanel(child=sysmon.root, title="System")
logs = TabPanel(child=logs.root, title="Scheduler Logs")
tabs = Tabs(
tabs=[
html,
task_stream,
system,
logs,
compute,
workers,
scheduler,
bandwidth_workers,
bandwidth_types,
],
sizing_mode="stretch_both",
)
from bokeh.core.templates import get_env
from bokeh.plotting import output_file, save
with tmpfile(extension=".html") as fn:
output_file(filename=fn, title="Dask Performance Report", mode=mode)
template_directory = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "dashboard", "templates"
)
template_environment = get_env()
template_environment.loader.searchpath.append(template_directory)
template = template_environment.get_template("performance_report.html")
save(tabs, filename=fn, template=template)
with open(fn) as f:
data = f.read()
return data
async def get_worker_logs(self, n=None, workers=None, nanny=False):
results = await self.broadcast(
msg={"op": "get_logs", "n": n}, workers=workers, nanny=nanny
)
return results
def log_event(self, topic: str | Collection[str], msg: Any) -> None:
"""Log an event under a given topic
Parameters
----------
topic : str, list[str]
Name of the topic under which to log an event. To log the same
event under multiple topics, pass a list of topic names.
msg
Event message to log. Note this must be msgpack serializable.
See also
--------
Client.log_event
"""
self._broker.publish(topic, msg)
def subscribe_topic(self, topic: str, client: str) -> None:
self._broker.subscribe(topic, client)
def unsubscribe_topic(self, topic: str, client: str) -> None:
self._broker.unsubscribe(topic, client)
@overload
def get_events(self, topic: str) -> tuple[tuple[float, Any], ...]: ...
@overload
def get_events(self) -> dict[str, tuple[tuple[float, Any], ...]]: ...
def get_events(
self, topic: str | None = None
) -> tuple[tuple[float, Any], ...] | dict[str, tuple[tuple[float, Any], ...]]:
return self._broker.get_events(topic)
async def get_worker_monitor_info(self, recent=False, starts=None):
if starts is None:
starts = {}
results = await asyncio.gather(
*(
self.rpc(w).get_monitor_info(recent=recent, start=starts.get(w, 0))
for w in self.workers
)
)
return dict(zip(self.workers, results))
###########
# Cleanup #
###########
@log_errors
async def check_worker_ttl(self) -> None:
now = time()
stimulus_id = f"check-worker-ttl-{now}"
assert self.worker_ttl
ttl = max(self.worker_ttl, 10 * heartbeat_interval(len(self.workers)))
to_restart = []
for ws in self.workers.values():
last_seen = now - ws.last_seen
if last_seen > ttl:
to_restart.append(ws.address)
logger.warning(
f"Worker failed to heartbeat for {last_seen:.0f}s; "
f"{'attempting restart' if ws.nanny else 'removing'}: {ws}"
)
if to_restart:
self.log_event(
"scheduler",
{
"action": "worker-ttl-timed-out",
"workers": to_restart.copy(),
"ttl": ttl,
},
)
await self.restart_workers(
to_restart,
wait_for_workers=False,
stimulus_id=stimulus_id,
)
def check_idle(self) -> float | None:
if self.status in (Status.closing, Status.closed):
return None # pragma: nocover
if self.transition_counter != self._idle_transition_counter:
self._idle_transition_counter = self.transition_counter
self.idle_since = None
return None
if self._active_graph_updates > 0:
self.idle_since = None
return None
if (
self.queued
or self.unrunnable
or any(ws.processing for ws in self.workers.values())
):
self.idle_since = None
return None
if not self.idle_since:
self.idle_since = time()
return self.idle_since
if self.jupyter:
last_activity = (
self._jupyter_server_application.web_app.last_activity().timestamp()
)
if last_activity > self.idle_since:
self.idle_since = last_activity
return self.idle_since
if self.idle_timeout:
if time() > self.idle_since + self.idle_timeout:
assert self.idle_since
logger.info(
"Scheduler closing after being idle for %s",
format_time(self.idle_timeout),
)
self._ongoing_background_tasks.call_soon(
self.close, reason="idle-timeout-exceeded"
)
return self.idle_since
def _check_no_workers(self) -> None:
if (
self.status in (Status.closing, Status.closed)
or self.no_workers_timeout is None
):
return
now = monotonic()
stimulus_id = f"check-no-workers-timeout-{time()}"
recommendations: Recs = {}
self._refresh_no_workers_since(now)
affected = self._check_unrunnable_task_timeouts(
now, recommendations=recommendations, stimulus_id=stimulus_id
)
affected.update(
self._check_queued_task_timeouts(
now, recommendations=recommendations, stimulus_id=stimulus_id
)
)
self.transitions(recommendations, stimulus_id=stimulus_id)
if affected:
self.log_event(
"scheduler",
{"action": "no-workers-timeout-exceeded", "keys": affected},
)
def _check_unrunnable_task_timeouts(
self, timestamp: float, recommendations: Recs, stimulus_id: str
) -> set[Key]:
assert self.no_workers_timeout
unsatisfied = []
no_workers = []
for ts, unrunnable_since in self.unrunnable.items():
if timestamp <= unrunnable_since + self.no_workers_timeout:
# unrunnable is insertion-ordered, which means that unrunnable_since will
# be monotonically increasing in this loop.
break
if (
self._no_workers_since is None
or self._no_workers_since >= unrunnable_since
):
unsatisfied.append(ts)
else:
no_workers.append(ts)
if not unsatisfied and not no_workers:
return set()
for ts in unsatisfied:
e = pickle.dumps(
NoValidWorkerError(
task=ts.key,
host_restrictions=(ts.host_restrictions or set()).copy(),
worker_restrictions=(ts.worker_restrictions or set()).copy(),
resource_restrictions=(ts.resource_restrictions or {}).copy(),
timeout=self.no_workers_timeout,
),
)
r = self.transition(
ts.key,
"erred",
exception=e,
cause=ts.key,
stimulus_id=stimulus_id,
)
recommendations.update(r)
logger.error(
"Task %s marked as failed because it timed out waiting "
"for its restrictions to become satisfied.",
ts.key,
)
self._fail_tasks_after_no_workers_timeout(
no_workers, recommendations, stimulus_id
)
return {ts.key for ts in concat([unsatisfied, no_workers])}
def _check_queued_task_timeouts(
self, timestamp: float, recommendations: Recs, stimulus_id: str
) -> set[Key]:
assert self.no_workers_timeout
if self._no_workers_since is None:
return set()
if timestamp <= self._no_workers_since + self.no_workers_timeout:
return set()
affected = list(self.queued)
self._fail_tasks_after_no_workers_timeout(
affected, recommendations, stimulus_id
)
return {ts.key for ts in affected}
def _fail_tasks_after_no_workers_timeout(
self, timed_out: Iterable[TaskState], recommendations: Recs, stimulus_id: str
) -> None:
assert self.no_workers_timeout
for ts in timed_out:
e = pickle.dumps(
NoWorkerError(
task=ts.key,
timeout=self.no_workers_timeout,
),
)
r = self.transition(
ts.key,
"erred",
exception=e,
cause=ts.key,
stimulus_id=stimulus_id,
)
recommendations.update(r)
logger.error(
"Task %s marked as failed because it timed out waiting "
"without any running workers.",
ts.key,
)
def _refresh_no_workers_since(self, timestamp: float | None = None) -> None:
if self.running or not (self.queued or self.unrunnable):
self._no_workers_since = None
return
if not self._no_workers_since:
self._no_workers_since = timestamp or monotonic()
return
def adaptive_target(self, target_duration=None):
"""Desired number of workers based on the current workload
This looks at the current running tasks and memory use, and returns a
number of desired workers. This is often used by adaptive scheduling.
Parameters
----------
target_duration : str
A desired duration of time for computations to take. This affects
how rapidly the scheduler will ask to scale.
See Also
--------
distributed.deploy.Adaptive
"""
if target_duration is None:
target_duration = dask.config.get("distributed.adaptive.target-duration")
target_duration = parse_timedelta(target_duration)
# CPU
queued = take(100, concat([self.queued, self.unrunnable.keys()]))
queued_occupancy = 0
for ts in queued:
queued_occupancy += self._get_prefix_duration(ts.prefix)
tasks_ready = len(self.queued) + len(self.unrunnable)
if tasks_ready > 100:
queued_occupancy *= tasks_ready / 100
cpu = math.ceil((self.total_occupancy + queued_occupancy) / target_duration)
# Avoid a few long tasks from asking for many cores
for ws in self.workers.values():
if tasks_ready > cpu:
break
tasks_ready += len(ws.processing)
else:
cpu = min(tasks_ready, cpu)
# Divide by average nthreads per worker
if self.workers:
nthreads = sum(ws.nthreads for ws in self.workers.values())
cpu = math.ceil(cpu / nthreads * len(self.workers))
if (self.unrunnable or self.queued) and not self.workers:
cpu = max(1, cpu)
# add more workers if more than 60% of memory is used
limit = sum(ws.memory_limit for ws in self.workers.values())
used = sum(ws.nbytes for ws in self.workers.values())
memory = 0
if used > 0.6 * limit and limit > 0:
memory = 2 * len(self.workers)
target = max(memory, cpu)
if target >= len(self.workers):
return target
else: # Scale down?
to_close = self.workers_to_close()
return len(self.workers) - len(to_close)
def request_acquire_replicas(
self, addr: str, keys: Iterable[Key], *, stimulus_id: str
) -> None:
"""Asynchronously ask a worker to acquire a replica of the listed keys from
other workers. This is a fire-and-forget operation which offers no feedback for
success or failure, and is intended for housekeeping and not for computation.
"""
who_has = {}
nbytes = {}
for key in keys:
ts = self.tasks[key]
assert ts.who_has
who_has[key] = [ws.address for ws in ts.who_has or ()]
nbytes[key] = ts.nbytes
self.stream_comms[addr].send(
{
"op": "acquire-replicas",
"who_has": who_has,
"nbytes": nbytes,
"stimulus_id": stimulus_id,
},
)
def request_remove_replicas(
self, addr: str, keys: list[Key], *, stimulus_id: str
) -> None:
"""Asynchronously ask a worker to discard its replica of the listed keys.
This must never be used to destroy the last replica of a key. This is a
fire-and-forget operation, intended for housekeeping and not for computation.
The replica disappears immediately from TaskState.who_has on the Scheduler side;
if the worker refuses to delete, e.g. because the task is a dependency of
another task running on it, it will (also asynchronously) inform the scheduler
to re-add itself to who_has. If the worker agrees to discard the task, there is
no feedback.
"""
ws = self.workers[addr]
# The scheduler immediately forgets about the replica and suggests the worker to
# drop it. The worker may refuse, at which point it will send back an add-keys
# message to reinstate it.
for key in keys:
ts = self.tasks[key]
if self.validate:
# Do not destroy the last copy
assert ts.who_has
assert len(ts.who_has) > 1
self.remove_replica(ts, ws)
self.stream_comms[addr].send(
{
"op": "remove-replicas",
"keys": keys,
"stimulus_id": stimulus_id,
}
)
def _task_to_report_msg(ts: TaskState) -> dict[str, Any] | None:
if ts.state == "forgotten":
return {"op": "cancelled-keys", "keys": [ts.key]}
elif ts.state == "memory":
return {"op": "key-in-memory", "key": ts.key}
elif ts.state == "erred":
failing_ts = ts.exception_blame
assert failing_ts
return {
"op": "task-erred",
"key": ts.key,
"exception": failing_ts.exception,
"traceback": failing_ts.traceback,
}
else:
return None
def _task_to_client_msgs(ts: TaskState) -> Msgs:
if ts.who_wants:
report_msg = _task_to_report_msg(ts)
if report_msg is not None:
return {cs.client_key: [report_msg] for cs in ts.who_wants}
return {}
def decide_worker(
ts: TaskState,
all_workers: set[WorkerState],
valid_workers: set[WorkerState] | None,
objective: Callable[[WorkerState], Any],
) -> WorkerState | None:
"""
Decide which worker should take task *ts*.
We choose the worker that has the data on which *ts* depends.
If several workers have dependencies then we choose the less-busy worker.
Optionally provide *valid_workers* of where jobs are allowed to occur
(if all workers are allowed to take the task, pass None instead).
If the task requires data communication because no eligible worker has
all the dependencies already, then we choose to minimize the number
of bytes sent between workers. This is determined by calling the
*objective* function.
"""
assert all(dts.who_has for dts in ts.dependencies)
if ts.actor:
candidates = all_workers.copy()
else:
candidates = {wws for dts in ts.dependencies for wws in dts.who_has or ()}
candidates &= all_workers
if valid_workers is None:
if not candidates:
candidates = all_workers.copy()
else:
candidates &= valid_workers
if not candidates:
candidates = valid_workers
if not candidates:
if ts.loose_restrictions:
return decide_worker(ts, all_workers, None, objective)
if not candidates:
return None
elif len(candidates) == 1:
return next(iter(candidates))
else:
return min(candidates, key=objective)
def validate_task_state(ts: TaskState) -> None:
"""Validate the given TaskState"""
assert ts.state in ALL_TASK_STATES, ts
if ts.waiting_on:
assert ts.waiting_on.issubset(ts.dependencies), (
"waiting not subset of dependencies",
str(ts.waiting_on),
str(ts.dependencies),
)
if ts.waiters:
assert ts.waiters.issubset(ts.dependents), (
"waiters not subset of dependents",
str(ts.waiters),
str(ts.dependents),
)
for dts in ts.waiting_on or ():
assert not dts.who_has, ("waiting on in-memory dep", str(ts), str(dts))
assert dts.state != "released", ("waiting on released dep", str(ts), str(dts))
for dts in ts.dependencies:
assert ts in dts.dependents, (
"not in dependency's dependents",
str(ts),
str(dts),
str(dts.dependents),
)
if ts.state in ("waiting", "queued", "processing", "no-worker"):
assert ts.waiting_on and dts in ts.waiting_on or dts.who_has, (
"dep missing",
str(ts),
str(dts),
)
assert dts.state != "forgotten"
for dts in ts.waiters or ():
assert dts.state in ("waiting", "queued", "processing", "no-worker"), (
"waiter not in play",
str(ts),
str(dts),
)
for dts in ts.dependents:
assert ts in dts.dependencies, (
"not in dependent's dependencies",
str(ts),
str(dts),
str(dts.dependencies),
)
assert dts.state != "forgotten"
assert (ts.processing_on is not None) == (ts.state == "processing")
assert bool(ts.who_has) == (ts.state == "memory"), (ts, ts.who_has, ts.state)
if ts.state == "queued":
assert not ts.processing_on
assert not ts.who_has
assert all(dts.who_has for dts in ts.dependencies), (
"task queued without all deps",
str(ts),
str(ts.dependencies),
)
if ts.state == "processing":
assert all(dts.who_has for dts in ts.dependencies), (
"task processing without all deps",
str(ts),
str(ts.dependencies),
)
assert not ts.waiting_on
if ts.who_has:
assert ts.waiters or ts.who_wants, (
"unneeded task in memory",
str(ts),
str(ts.who_has),
)
if ts.run_spec: # was computed
assert ts.type
assert isinstance(ts.type, str)
assert not any(
[
ts in dts.waiting_on
for dts in ts.dependents
if dts.waiting_on is not None
]
)
for ws in ts.who_has:
assert ts in ws.has_what, (
"not in who_has' has_what",
str(ts),
str(ws),
str(ws.has_what),
)
for cs in ts.who_wants or ():
assert ts in cs.wants_what, (
"not in who_wants' wants_what",
str(ts),
str(cs),
str(cs.wants_what),
)
if ts.actor:
if ts.state == "memory":
assert ts.who_has
assert sum(ts in ws.actors for ws in ts.who_has) == 1
if ts.state == "processing":
assert ts.processing_on
assert ts in ts.processing_on.actors
assert ts.state != "queued"
def validate_unrunnable(unrunnable: dict[TaskState, float]) -> None:
prev_unrunnable_since: float | None = None
prev_ts: TaskState | None = None
for ts, unrunnable_since in unrunnable.items():
assert ts.state == "no-worker"
if prev_ts is not None:
assert prev_unrunnable_since is not None
# Ensure that unrunnable_since is monotonically increasing when iterating over unrunnable.
# _check_no_workers relies on this.
assert prev_unrunnable_since <= unrunnable_since, (
prev_ts,
ts,
prev_unrunnable_since,
unrunnable_since,
)
prev_ts = ts
prev_unrunnable_since = unrunnable_since
def validate_worker_state(ws: WorkerState) -> None:
for ts in ws.has_what or ():
assert ts.who_has
assert ws in ts.who_has, (
"not in has_what' who_has",
str(ws),
str(ts),
str(ts.who_has),
)
for ts in ws.actors:
assert ts.state in ("memory", "processing")
def validate_state(
tasks: dict[Key, TaskState],
workers: dict[str, WorkerState],
clients: dict[str, ClientState],
) -> None:
"""Validate a current runtime state.
This performs a sequence of checks on the entire graph, running in about linear
time. This raises assert errors if anything doesn't check out.
"""
for ts in tasks.values():
validate_task_state(ts)
for ws in workers.values():
validate_worker_state(ws)
for cs in clients.values():
for ts in cs.wants_what or ():
assert ts.who_wants
assert cs in ts.who_wants, (
"not in wants_what' who_wants",
str(cs),
str(ts),
str(ts.who_wants),
)
def heartbeat_interval(n: int) -> float:
"""Interval in seconds that we desire heartbeats based on number of workers"""
if n <= 10:
return 0.5
elif n < 50:
return 1
elif n < 200:
return 2
else:
# No more than 200 heartbeats a second scaled by workers
return n / 200 + 1
def _task_slots_available(ws: WorkerState, saturation_factor: float) -> int:
"""Number of tasks that can be sent to this worker without oversaturating it"""
assert not math.isinf(saturation_factor)
return max(math.ceil(saturation_factor * ws.nthreads), 1) - (
len(ws.processing) - len(ws.long_running)
)
def _worker_full(ws: WorkerState, saturation_factor: float) -> bool:
if math.isinf(saturation_factor):
return False
return _task_slots_available(ws, saturation_factor) <= 0
class KilledWorker(Exception):
def __init__(self, task: Key, last_worker: WorkerState, allowed_failures: int):
super().__init__(task, last_worker, allowed_failures)
@property
def task(self) -> Key:
return self.args[0]
@property
def last_worker(self) -> WorkerState:
return self.args[1]
@property
def allowed_failures(self) -> int:
return self.args[2]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} on {self.allowed_failures + 1} "
"different workers, but all those workers died while running it. "
f"The last worker that attempt to run the task was {self.last_worker.address}. "
"Inspecting worker logs is often a good next step to diagnose what went wrong. "
"For more information see https://distributed.dask.org/en/stable/killed.html."
)
class NoValidWorkerError(Exception):
def __init__(
self,
task: Key,
host_restrictions: set[str],
worker_restrictions: set[str],
resource_restrictions: dict[str, float],
timeout: float,
):
super().__init__(
task, host_restrictions, worker_restrictions, resource_restrictions, timeout
)
@property
def task(self) -> Key:
return self.args[0]
@property
def host_restrictions(self) -> Any:
return self.args[1]
@property
def worker_restrictions(self) -> Any:
return self.args[2]
@property
def resource_restrictions(self) -> Any:
return self.args[3]
@property
def timeout(self) -> float:
return self.args[4]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} but timed out after {format_time(self.timeout)} "
"waiting for a valid worker matching all restrictions.\n\nRestrictions:\n"
f"host_restrictions={self.host_restrictions!s}\n"
f"worker_restrictions={self.worker_restrictions!s}\n"
f"resource_restrictions={self.resource_restrictions!s}\n"
)
class NoWorkerError(Exception):
def __init__(self, task: Key, timeout: float):
super().__init__(task, timeout)
@property
def task(self) -> Key:
return self.args[0]
@property
def timeout(self) -> float:
return self.args[1]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} but timed out after {format_time(self.timeout)} "
"waiting without any running workers."
)
class WorkerStatusPlugin(SchedulerPlugin):
"""A plugin to share worker status with a remote observer
This is used in cluster managers to keep updated about the status of the scheduler.
"""
name: ClassVar[str] = "worker-status"
bcomm: BatchedSend
def __init__(self, scheduler: Scheduler, comm: Comm):
self.bcomm = BatchedSend(interval="5ms")
self.bcomm.start(comm)
scheduler.add_plugin(self)
def add_worker(self, scheduler: Scheduler, worker: str) -> None:
ident = scheduler.workers[worker].identity()
del ident["metrics"]
del ident["last_seen"]
try:
self.bcomm.send(["add", {"workers": {worker: ident}}])
except CommClosedError:
scheduler.remove_plugin(name=self.name)
def remove_worker(self, scheduler: Scheduler, worker: str, **kwargs: Any) -> None:
try:
self.bcomm.send(["remove", worker])
except CommClosedError:
scheduler.remove_plugin(name=self.name)
def teardown(self) -> None:
self.bcomm.close()
class CollectTaskMetaDataPlugin(SchedulerPlugin):
scheduler: Scheduler
name: str
keys: set[Key]
metadata: dict[Key, Any]
state: dict[Key, TaskStateState]
def __init__(self, scheduler: Scheduler, name: str):
self.scheduler = scheduler
self.name = name
self.keys = set()
self.metadata = {}
self.state = {}
def update_graph(
self,
scheduler: Scheduler,
*,
keys: set[Key],
**kwargs: Any,
) -> None:
self.keys.update(keys)
def transition(
self,
key: Key,
start: TaskStateState,
finish: TaskStateState,
*args: Any,
**kwargs: Any,
) -> None:
if finish in ("memory", "erred"):
ts = self.scheduler.tasks.get(key)
if ts is not None and ts.key in self.keys:
self.metadata[key] = ts.metadata
self.state[key] = finish
self.keys.discard(key)
def _materialize_graph(
expr: Expr,
global_annotations: dict[str, Any],
validate: bool,
) -> tuple[dict[Key, T_runspec], dict[Key, set[Key]], dict[str, dict[Key, Any]]]:
> dsk: dict = expr.__dask_graph__()
E AttributeError: 'dict' object has no attribute '__dask_graph__'
distributed/scheduler.py:9383: AttributeError
Check warning on line 0 in distributed.cli.tests.test_dask_worker
github-actions / Unit Test Results
1 out of 5 runs failed: test_respect_host_listen_address[0.0.0.0---no-nanny] (distributed.cli.tests.test_dask_worker)
artifacts/ubuntu-latest-mindeps-default-notci1/pytest.xml [took 0s]
Raw output
AttributeError: 'dict' object has no attribute '__dask_graph__'
c = <Client: No scheduler connected>
s = <Scheduler 'tcp://127.0.0.1:42257', workers: 0, cores: 0, tasks: 0>
nanny = '--no-nanny', host = '0.0.0.0'
@pytest.mark.slow
@pytest.mark.skipif(not LINUX, reason="Need 127.0.0.2 to mean localhost")
@pytest.mark.parametrize("nanny", ["--nanny", "--no-nanny"])
@pytest.mark.parametrize("host", ["127.0.0.2", "0.0.0.0"])
@gen_cluster(client=True, nthreads=[])
async def test_respect_host_listen_address(c, s, nanny, host):
with popen(
[
sys.executable,
"-m",
"dask",
"worker",
s.address,
nanny,
"--no-dashboard",
"--host",
host,
]
):
await c.wait_for_workers(1)
# roundtrip works
> assert await c.submit(lambda x: x + 1, 10) == 11
distributed/cli/tests/test_dask_worker.py:569:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
distributed/client.py:408: in _result
raise exc.with_traceback(tb)
distributed/utils.py:1507: in run_in_executor_with_context
return await loop.run_in_executor(
../../../miniconda3/envs/dask-distributed/lib/python3.10/concurrent/futures/thread.py:58: in run
result = self.fn(*self.args, **self.kwargs)
distributed/utils.py:1508: in <lambda>
executor, lambda: context.run(func, *args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
from __future__ import annotations
import asyncio
import contextlib
import dataclasses
import heapq
import inspect
import itertools
import json
import logging
import math
import operator
import os
import pickle
import random
import textwrap
import uuid
import warnings
import weakref
from abc import abstractmethod
from collections import defaultdict, deque
from collections.abc import (
Callable,
Collection,
Container,
Hashable,
Iterable,
Iterator,
Mapping,
Sequence,
Set,
)
from contextlib import suppress
from functools import partial
from typing import TYPE_CHECKING, Any, ClassVar, Literal, NamedTuple, cast, overload
import psutil
import tornado.web
from sortedcontainers import SortedDict, SortedSet
from tlz import (
concat,
first,
groupby,
merge,
merge_sorted,
merge_with,
partition,
pluck,
second,
take,
valmap,
)
from tornado.ioloop import IOLoop
import dask
import dask.utils
from dask._task_spec import DependenciesMapping, GraphNode, convert_legacy_graph
from dask.core import istask, validate_key
from dask.typing import Key, no_default
from dask.utils import (
_deprecated,
_deprecated_kwarg,
format_bytes,
format_time,
key_split,
parse_bytes,
parse_timedelta,
tmpfile,
)
from dask.widgets import get_template
from distributed import cluster_dump, preloading, profile
from distributed import versions as version_module
from distributed._asyncio import RLock
from distributed._stories import scheduler_story
from distributed.active_memory_manager import ActiveMemoryManagerExtension, RetireWorker
from distributed.batched import BatchedSend
from distributed.broker import Broker
from distributed.client import SourceCode
from distributed.collections import HeapSet
from distributed.comm import (
Comm,
CommClosedError,
get_address_host,
normalize_address,
resolve_address,
unparse_host_port,
)
from distributed.comm.addressing import addresses_from_user_args
from distributed.compatibility import PeriodicCallback
from distributed.core import (
ErrorMessage,
OKMessage,
Status,
clean_exception,
error_message,
rpc,
send_recv,
)
from distributed.diagnostics.memory_sampler import MemorySamplerExtension
from distributed.diagnostics.plugin import SchedulerPlugin, _get_plugin_name
from distributed.event import EventExtension
from distributed.gc import disable_gc_diagnosis, enable_gc_diagnosis
from distributed.http import get_handlers
from distributed.metrics import monotonic, time
from distributed.multi_lock import MultiLockExtension
from distributed.node import ServerNode
from distributed.proctitle import setproctitle
from distributed.protocol import deserialize
from distributed.protocol.pickle import dumps, loads
from distributed.protocol.serialize import Serialized, ToPickle, serialize
from distributed.publish import PublishExtension
from distributed.pubsub import PubSubSchedulerExtension
from distributed.queues import QueueExtension
from distributed.recreate_tasks import ReplayTaskScheduler
from distributed.security import Security
from distributed.semaphore import SemaphoreExtension
from distributed.shuffle import ShuffleSchedulerPlugin
from distributed.spans import SpanMetadata, SpansSchedulerExtension
from distributed.stealing import WorkStealing
from distributed.utils import (
All,
Deadline,
TimeoutError,
format_dashboard_link,
get_fileno_limit,
key_split_group,
log_errors,
offload,
recursive_to_dict,
wait_for,
)
from distributed.utils_comm import (
gather_from_workers,
retry_operation,
scatter_to_workers,
)
from distributed.variable import VariableExtension
if TYPE_CHECKING:
# TODO import from typing (requires Python >=3.10)
# TODO import from typing (requires Python >=3.11)
from typing_extensions import Self, TypeAlias
from dask._expr import Expr
# Not to be confused with distributed.worker_state_machine.TaskStateState
TaskStateState: TypeAlias = Literal[
"released",
"waiting",
"no-worker",
"queued",
"processing",
"memory",
"erred",
"forgotten",
]
ALL_TASK_STATES: Set[TaskStateState] = set(TaskStateState.__args__) # type: ignore
# {task key -> finish state}
# Not to be confused with distributed.worker_state_machine.Recs
Recs: TypeAlias = dict[Key, TaskStateState]
# {client or worker address: [{op: <key>, ...}, ...]}
Msgs: TypeAlias = dict[str, list[dict[str, Any]]]
# (recommendations, client messages, worker messages)
RecsMsgs: TypeAlias = tuple[Recs, Msgs, Msgs]
T_runspec: TypeAlias = GraphNode
logger = logging.getLogger(__name__)
LOG_PDB = dask.config.get("distributed.admin.pdb-on-err")
DEFAULT_DATA_SIZE = parse_bytes(
dask.config.get("distributed.scheduler.default-data-size")
)
STIMULUS_ID_UNSET = "<stimulus_id unset>"
DEFAULT_EXTENSIONS = {
"multi_locks": MultiLockExtension,
"publish": PublishExtension,
"replay-tasks": ReplayTaskScheduler,
"queues": QueueExtension,
"variables": VariableExtension,
"pubsub": PubSubSchedulerExtension,
"semaphores": SemaphoreExtension,
"events": EventExtension,
"amm": ActiveMemoryManagerExtension,
"memory_sampler": MemorySamplerExtension,
"shuffle": ShuffleSchedulerPlugin,
"spans": SpansSchedulerExtension,
"stealing": WorkStealing,
}
class ClientState:
"""A simple object holding information about a client."""
#: A unique identifier for this client. This is generally an opaque
#: string generated by the client itself.
client_key: str
#: Cached hash of :attr:`~ClientState.client_key`
_hash: int
#: A set of tasks this client wants to be kept in memory, so that it can download
#: its result when desired. This is the reverse mapping of
#: :class:`TaskState.who_wants`. Tasks are typically removed from this set when the
#: corresponding object in the client's space (for example a ``Future`` or a Dask
#: collection) gets garbage-collected.
wants_what: set[TaskState]
#: The last time we received a heartbeat from this client, in local scheduler time.
last_seen: float
#: Output of :func:`distributed.versions.get_versions` on the client
versions: dict[str, Any]
__slots__ = tuple(__annotations__)
def __init__(self, client: str, *, versions: dict[str, Any] | None = None):
self.client_key = client
self._hash = hash(client)
self.wants_what = set()
self.last_seen = time()
self.versions = versions or {}
def __hash__(self) -> int:
return self._hash
def __eq__(self, other: object) -> bool:
if not isinstance(other, ClientState):
return False
return self.client_key == other.client_key
def __repr__(self) -> str:
return f"<Client {self.client_key!r}>"
def __str__(self) -> str:
return self.client_key
def _to_dict_no_nest(self, *, exclude: Container[str] = ()) -> dict:
"""Dictionary representation for debugging purposes.
Not type stable and not intended for roundtrips.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
TaskState._to_dict
"""
return recursive_to_dict(
self,
exclude=set(exclude) | {"versions"}, # type: ignore
members=True,
)
class MemoryState:
"""Memory readings on a worker or on the whole cluster.
See :doc:`worker-memory`.
Attributes / properties:
managed_total
Sum of the output of sizeof() for all dask keys held by the worker in memory,
plus number of bytes spilled to disk
managed
Sum of the output of sizeof() for the dask keys held in RAM. Note that this may
be inaccurate, which may cause inaccurate unmanaged memory (see below).
spilled
Number of bytes for the dask keys spilled to the hard drive.
Note that this is the size on disk; size in memory may be different due to
compression and inaccuracies in sizeof(). In other words, given the same keys,
'managed' will change depending on the keys being in memory or spilled.
process
Total RSS memory measured by the OS on the worker process.
This is always exactly equal to managed + unmanaged.
unmanaged
process - managed. This is the sum of
- Python interpreter and modules
- global variables
- memory temporarily allocated by the dask tasks that are currently running
- memory fragmentation
- memory leaks
- memory not yet garbage collected
- memory not yet free()'d by the Python memory manager to the OS
unmanaged_old
Minimum of the 'unmanaged' measures over the last
``distributed.memory.recent-to-old-time`` seconds
unmanaged_recent
unmanaged - unmanaged_old; in other words process memory that has been recently
allocated but is not accounted for by dask; hopefully it's mostly a temporary
spike.
optimistic
managed + unmanaged_old; in other words the memory held long-term by
the process under the hopeful assumption that all unmanaged_recent memory is a
temporary spike
"""
process: int
unmanaged_old: int
managed: int
spilled: int
__slots__ = tuple(__annotations__)
def __init__(
self,
*,
process: int,
unmanaged_old: int,
managed: int,
spilled: int,
):
# Some data arrives with the heartbeat, some other arrives in realtime as the
# tasks progress. Also, sizeof() is not guaranteed to return correct results.
# This can cause glitches where a partial measure is larger than the whole, so
# we need to force all numbers to add up exactly by definition.
self.process = process
self.managed = min(self.process, managed)
self.spilled = spilled
# Subtractions between unsigned ints guaranteed by construction to be >= 0
self.unmanaged_old = min(unmanaged_old, process - self.managed)
@staticmethod
def sum(*infos: MemoryState) -> MemoryState:
process = 0
unmanaged_old = 0
managed = 0
spilled = 0
for ms in infos:
process += ms.process
unmanaged_old += ms.unmanaged_old
spilled += ms.spilled
managed += ms.managed
return MemoryState(
process=process,
unmanaged_old=unmanaged_old,
managed=managed,
spilled=spilled,
)
@property
def managed_total(self) -> int:
return self.managed + self.spilled
@property
def unmanaged(self) -> int:
# This is never negative thanks to __init__
return self.process - self.managed
@property
def unmanaged_recent(self) -> int:
# This is never negative thanks to __init__
return self.process - self.managed - self.unmanaged_old
@property
def optimistic(self) -> int:
return self.managed + self.unmanaged_old
@property
def managed_in_memory(self) -> int:
warnings.warn("managed_in_memory has been renamed to managed", FutureWarning)
return self.managed
@property
def managed_spilled(self) -> int:
warnings.warn("managed_spilled has been renamed to spilled", FutureWarning)
return self.spilled
def __repr__(self) -> str:
return (
f"Process memory (RSS) : {format_bytes(self.process)}\n"
f" - managed by Dask : {format_bytes(self.managed)}\n"
f" - unmanaged (old) : {format_bytes(self.unmanaged_old)}\n"
f" - unmanaged (recent): {format_bytes(self.unmanaged_recent)}\n"
f"Spilled to disk : {format_bytes(self.spilled)}\n"
)
def _to_dict(self, *, exclude: Container[str] = ()) -> dict:
"""Dictionary representation for debugging purposes.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
"""
return {
k: getattr(self, k)
for k in dir(self)
if not k.startswith("_")
and k not in {"sum", "managed_in_memory", "managed_spilled"}
}
class WorkerState:
"""A simple object holding information about a worker.
Not to be confused with :class:`distributed.worker_state_machine.WorkerState`.
"""
#: This worker's unique key. This can be its connected address
#: (such as ``"tcp://127.0.0.1:8891"``) or an alias (such as ``"alice"``).
address: str
pid: int
name: Hashable
#: The number of CPU threads made available on this worker
nthreads: int
#: Memory available to the worker, in bytes
memory_limit: int
local_directory: str
services: dict[str, int]
#: Output of :meth:`distributed.versions.get_versions` on the worker
versions: dict[str, Any]
#: Address of the associated :class:`~distributed.nanny.Nanny`, if present
nanny: str | None
#: Read-only worker status, synced one way from the remote Worker object
status: Status
#: Cached hash of :attr:`~WorkerState.server_id`
_hash: int
#: The total memory size, in bytes, used by the tasks this worker holds in memory
#: (i.e. the tasks in this worker's :attr:`~WorkerState.has_what`).
nbytes: int
#: Worker memory unknown to the worker, in bytes, which has been there for more than
#: 30 seconds. See :class:`MemoryState`.
_memory_unmanaged_old: int
#: History of the last 30 seconds' worth of unmanaged memory. Used to differentiate
#: between "old" and "new" unmanaged memory.
#: Format: ``[(timestamp, bytes), (timestamp, bytes), ...]``
_memory_unmanaged_history: deque[tuple[float, int]]
metrics: dict[str, Any]
#: The last time we received a heartbeat from this worker, in local scheduler time.
last_seen: float
time_delay: float
bandwidth: float
#: A set of all TaskStates on this worker that are actors. This only includes those
#: actors whose state actually lives on this worker, not actors to which this worker
#: has a reference.
actors: set[TaskState]
#: Underlying data of :meth:`WorkerState.has_what`
_has_what: dict[TaskState, None]
#: A set of tasks that have been submitted to this worker. Multiple tasks may be
# submitted to a worker in advance and the worker will run them eventually,
# depending on its execution resources (but see :doc:`work-stealing`).
#:
#: All the tasks here are in the "processing" state.
#: This attribute is kept in sync with :attr:`TaskState.processing_on`.
processing: set[TaskState]
#: Running tasks that invoked :func:`distributed.secede`
long_running: set[TaskState]
#: A dictionary of tasks that are currently being run on this worker.
#: Each task state is associated with the duration in seconds which the task has
#: been running.
executing: dict[TaskState, float]
#: The available resources on this worker, e.g. ``{"GPU": 2}``.
#: These are abstract quantities that constrain certain tasks from running at the
#: same time on this worker.
resources: dict[str, float]
#: The sum of each resource used by all tasks allocated to this worker.
#: The numbers in this dictionary can only be less or equal than those in this
#: worker's :attr:`~WorkerState.resources`.
used_resources: dict[str, float]
#: Arbitrary additional metadata to be added to :meth:`~WorkerState.identity`
extra: dict[str, Any]
# The unique server ID this WorkerState is referencing
server_id: str
# Reference to scheduler task_groups
scheduler_ref: weakref.ref[SchedulerState] | None
task_prefix_count: defaultdict[str, int]
_network_occ: int
_occupancy_cache: float | None
#: Keys that may need to be fetched to this worker, and the number of tasks that need them.
#: All tasks are currently in `memory` on a worker other than this one.
#: Much like `processing`, this does not exactly reflect worker state:
#: keys here may be queued to fetch, in flight, or already in memory
#: on the worker.
needs_what: dict[TaskState, int]
__slots__ = tuple(__annotations__)
def __init__(
self,
*,
address: str,
status: Status,
pid: int,
name: object,
nthreads: int = 0,
memory_limit: int,
local_directory: str,
nanny: str | None,
server_id: str,
services: dict[str, int] | None = None,
versions: dict[str, Any] | None = None,
extra: dict[str, Any] | None = None,
scheduler: SchedulerState | None = None,
):
self.server_id = server_id
self.address = address
self.pid = pid
self.name = name
self.nthreads = nthreads
self.memory_limit = memory_limit
self.local_directory = local_directory
self.services = services or {}
self.versions = versions or {}
self.nanny = nanny
self.status = status
self._hash = hash(self.server_id)
self.nbytes = 0
self._memory_unmanaged_old = 0
self._memory_unmanaged_history = deque()
self.metrics = {}
self.last_seen = time()
self.time_delay = 0
self.bandwidth = parse_bytes(dask.config.get("distributed.scheduler.bandwidth"))
self.actors = set()
self._has_what = {}
self.processing = set()
self.long_running = set()
self.executing = {}
self.resources = {}
self.used_resources = {}
self.extra = extra or {}
self.scheduler_ref = weakref.ref(scheduler) if scheduler else None
self.task_prefix_count = defaultdict(int)
self.needs_what = {}
self._network_occ = 0
self._occupancy_cache = None
def __hash__(self) -> int:
return self._hash
def __eq__(self, other: object) -> bool:
return self is other or (
isinstance(other, WorkerState) and other.server_id == self.server_id
)
@property
def has_what(self) -> Set[TaskState]:
"""An insertion-sorted set-like of tasks which currently reside on this worker.
All the tasks here are in the "memory" state.
This is the reverse mapping of :attr:`TaskState.who_has`.
This is a read-only public accessor. The data is implemented as a dict without
values, because rebalance() relies on dicts being insertion-sorted.
"""
return self._has_what.keys()
@property
def host(self) -> str:
return get_address_host(self.address)
@property
def memory(self) -> MemoryState:
"""Polished memory metrics for the worker.
**Design note on managed memory**
There are two measures available for managed memory:
- ``self.nbytes``
- ``self.metrics["managed_bytes"]``
At rest, the two numbers must be identical. However, ``self.nbytes`` is
immediately updated through the batched comms as soon as each task lands in
memory on the worker; ``self.metrics["managed_bytes"]`` instead is updated by
the heartbeat, which can lag several seconds behind.
Below we are mixing likely newer managed memory info from ``self.nbytes`` with
process and spilled memory from the heartbeat. This is deliberate, so that
managed memory total is updated more frequently.
Managed memory directly and immediately contributes to optimistic memory, which
is in turn used in Active Memory Manager heuristics (at the moment of writing;
more uses will likely be added in the future). So it's important to have it
up to date; much more than it is for process memory.
Having up-to-date managed memory info as soon as the scheduler learns about
task completion also substantially simplifies unit tests.
The flip side of this design is that it may cause some noise in the
unmanaged_recent measure. e.g.:
1. Delete 100MB of managed data
2. The updated managed memory reaches the scheduler faster than the
updated process memory
3. There's a blip where the scheduler thinks that there's a sudden 100MB
increase in unmanaged_recent, since process memory hasn't changed but managed
memory has decreased by 100MB
4. When the heartbeat arrives, process memory goes down and so does the
unmanaged_recent.
This is OK - one of the main reasons for the unmanaged_recent / unmanaged_old
split is exactly to concentrate all the noise in unmanaged_recent and exclude it
from optimistic memory, which is used for heuristics.
Something that is less OK, but also less frequent, is that the sudden deletion
of spilled keys will cause a negative blip in managed memory:
1. Delete 100MB of spilled data
2. The updated managed memory *total* reaches the scheduler faster than the
updated spilled portion
3. This causes the managed memory to temporarily plummet and be replaced by
unmanaged_recent, while spilled memory remains unaltered
4. When the heartbeat arrives, managed goes back up, unmanaged_recent
goes back down, and spilled goes down by 100MB as it should have to
begin with.
:issue:`6002` will let us solve this.
"""
return MemoryState(
process=self.metrics["memory"],
managed=max(0, self.nbytes - self.metrics["spilled_bytes"]["memory"]),
spilled=self.metrics["spilled_bytes"]["disk"],
unmanaged_old=self._memory_unmanaged_old,
)
def clean(self) -> WorkerState:
"""Return a version of this object that is appropriate for serialization"""
ws = WorkerState(
address=self.address,
status=self.status,
pid=self.pid,
name=self.name,
nthreads=self.nthreads,
memory_limit=self.memory_limit,
local_directory=self.local_directory,
services=self.services,
nanny=self.nanny,
extra=self.extra,
server_id=self.server_id,
)
ws._occupancy_cache = self.occupancy
ws.executing = {ts.key: duration for ts, duration in self.executing.items()} # type: ignore
return ws
def __repr__(self) -> str:
name = f", name: {self.name}" if self.name != self.address else ""
return (
f"<WorkerState {self.address!r}{name}, "
f"status: {self.status.name}, "
f"memory: {len(self.has_what)}, "
f"processing: {len(self.processing)}>"
)
def _repr_html_(self) -> str:
return get_template("worker_state.html.j2").render(
address=self.address,
name=self.name,
status=self.status.name,
has_what=self.has_what,
processing=self.processing,
)
def identity(self) -> dict[str, Any]:
return {
"type": "Worker",
"id": self.name,
"host": self.host,
"resources": self.resources,
"local_directory": self.local_directory,
"name": self.name,
"nthreads": self.nthreads,
"memory_limit": self.memory_limit,
"last_seen": self.last_seen,
"services": self.services,
"metrics": self.metrics,
"status": self.status.name,
"nanny": self.nanny,
**self.extra,
}
def _to_dict_no_nest(self, *, exclude: Container[str] = ()) -> dict[str, Any]:
"""Dictionary representation for debugging purposes.
Not type stable and not intended for roundtrips.
See also
--------
Client.dump_cluster_state
distributed.utils.recursive_to_dict
TaskState._to_dict
"""
return recursive_to_dict(
self,
exclude=set(exclude) | {"versions"}, # type: ignore
members=True,
)
@property
def scheduler(self) -> SchedulerState:
assert self.scheduler_ref
s = self.scheduler_ref()
assert s
return s
def add_to_processing(self, ts: TaskState) -> None:
"""Assign a task to this worker for compute."""
if self.scheduler.validate:
assert ts not in self.processing
tp = ts.prefix
self.task_prefix_count[tp.name] += 1
self.scheduler._task_prefix_count_global[tp.name] += 1
self.processing.add(ts)
for dts in ts.dependencies:
assert dts.who_has
if self not in dts.who_has:
self._inc_needs_replica(dts)
def add_to_long_running(self, ts: TaskState) -> None:
if self.scheduler.validate:
assert ts in self.processing
assert ts not in self.long_running
self._remove_from_task_prefix_count(ts)
# Cannot remove from processing since we're using this for things like
# idleness detection. Idle workers are typically targeted for
# downscaling but we should not downscale workers with long running
# tasks
self.long_running.add(ts)
def remove_from_processing(self, ts: TaskState) -> None:
"""Remove a task from a workers processing"""
if self.scheduler.validate:
assert ts in self.processing
if ts in self.long_running:
self.long_running.discard(ts)
else:
self._remove_from_task_prefix_count(ts)
self.processing.remove(ts)
for dts in ts.dependencies:
if dts in self.needs_what:
self._dec_needs_replica(dts)
def _remove_from_task_prefix_count(self, ts: TaskState) -> None:
prefix_name = ts.prefix.name
count = self.task_prefix_count[prefix_name] - 1
tp_count = self.task_prefix_count
tp_count_global = self.scheduler._task_prefix_count_global
if count:
tp_count[prefix_name] = count
else:
del tp_count[prefix_name]
count = tp_count_global[prefix_name] - 1
if count:
tp_count_global[prefix_name] = count
else:
del tp_count_global[prefix_name]
def remove_replica(self, ts: TaskState) -> None:
"""The worker no longer has a task in memory"""
if self.scheduler.validate:
assert ts.who_has
assert self in ts.who_has
assert ts in self.has_what
assert ts not in self.needs_what
self.nbytes -= ts.get_nbytes()
del self._has_what[ts]
ts.who_has.remove(self) # type: ignore
if not ts.who_has:
ts.who_has = None
def _inc_needs_replica(self, ts: TaskState) -> None:
"""Assign a task fetch to this worker and…cheduler, title="Scheduler Profile (administrative)"
)
task_stream = TabPanel(child=task_stream, title="Task Stream")
bandwidth_workers = TabPanel(
child=bandwidth_workers.root, title="Bandwidth (Workers)"
)
bandwidth_types = TabPanel(
child=bandwidth_types.root, title="Bandwidth (Types)"
)
system = TabPanel(child=sysmon.root, title="System")
logs = TabPanel(child=logs.root, title="Scheduler Logs")
tabs = Tabs(
tabs=[
html,
task_stream,
system,
logs,
compute,
workers,
scheduler,
bandwidth_workers,
bandwidth_types,
],
sizing_mode="stretch_both",
)
from bokeh.core.templates import get_env
from bokeh.plotting import output_file, save
with tmpfile(extension=".html") as fn:
output_file(filename=fn, title="Dask Performance Report", mode=mode)
template_directory = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "dashboard", "templates"
)
template_environment = get_env()
template_environment.loader.searchpath.append(template_directory)
template = template_environment.get_template("performance_report.html")
save(tabs, filename=fn, template=template)
with open(fn) as f:
data = f.read()
return data
async def get_worker_logs(self, n=None, workers=None, nanny=False):
results = await self.broadcast(
msg={"op": "get_logs", "n": n}, workers=workers, nanny=nanny
)
return results
def log_event(self, topic: str | Collection[str], msg: Any) -> None:
"""Log an event under a given topic
Parameters
----------
topic : str, list[str]
Name of the topic under which to log an event. To log the same
event under multiple topics, pass a list of topic names.
msg
Event message to log. Note this must be msgpack serializable.
See also
--------
Client.log_event
"""
self._broker.publish(topic, msg)
def subscribe_topic(self, topic: str, client: str) -> None:
self._broker.subscribe(topic, client)
def unsubscribe_topic(self, topic: str, client: str) -> None:
self._broker.unsubscribe(topic, client)
@overload
def get_events(self, topic: str) -> tuple[tuple[float, Any], ...]: ...
@overload
def get_events(self) -> dict[str, tuple[tuple[float, Any], ...]]: ...
def get_events(
self, topic: str | None = None
) -> tuple[tuple[float, Any], ...] | dict[str, tuple[tuple[float, Any], ...]]:
return self._broker.get_events(topic)
async def get_worker_monitor_info(self, recent=False, starts=None):
if starts is None:
starts = {}
results = await asyncio.gather(
*(
self.rpc(w).get_monitor_info(recent=recent, start=starts.get(w, 0))
for w in self.workers
)
)
return dict(zip(self.workers, results))
###########
# Cleanup #
###########
@log_errors
async def check_worker_ttl(self) -> None:
now = time()
stimulus_id = f"check-worker-ttl-{now}"
assert self.worker_ttl
ttl = max(self.worker_ttl, 10 * heartbeat_interval(len(self.workers)))
to_restart = []
for ws in self.workers.values():
last_seen = now - ws.last_seen
if last_seen > ttl:
to_restart.append(ws.address)
logger.warning(
f"Worker failed to heartbeat for {last_seen:.0f}s; "
f"{'attempting restart' if ws.nanny else 'removing'}: {ws}"
)
if to_restart:
self.log_event(
"scheduler",
{
"action": "worker-ttl-timed-out",
"workers": to_restart.copy(),
"ttl": ttl,
},
)
await self.restart_workers(
to_restart,
wait_for_workers=False,
stimulus_id=stimulus_id,
)
def check_idle(self) -> float | None:
if self.status in (Status.closing, Status.closed):
return None # pragma: nocover
if self.transition_counter != self._idle_transition_counter:
self._idle_transition_counter = self.transition_counter
self.idle_since = None
return None
if self._active_graph_updates > 0:
self.idle_since = None
return None
if (
self.queued
or self.unrunnable
or any(ws.processing for ws in self.workers.values())
):
self.idle_since = None
return None
if not self.idle_since:
self.idle_since = time()
return self.idle_since
if self.jupyter:
last_activity = (
self._jupyter_server_application.web_app.last_activity().timestamp()
)
if last_activity > self.idle_since:
self.idle_since = last_activity
return self.idle_since
if self.idle_timeout:
if time() > self.idle_since + self.idle_timeout:
assert self.idle_since
logger.info(
"Scheduler closing after being idle for %s",
format_time(self.idle_timeout),
)
self._ongoing_background_tasks.call_soon(
self.close, reason="idle-timeout-exceeded"
)
return self.idle_since
def _check_no_workers(self) -> None:
if (
self.status in (Status.closing, Status.closed)
or self.no_workers_timeout is None
):
return
now = monotonic()
stimulus_id = f"check-no-workers-timeout-{time()}"
recommendations: Recs = {}
self._refresh_no_workers_since(now)
affected = self._check_unrunnable_task_timeouts(
now, recommendations=recommendations, stimulus_id=stimulus_id
)
affected.update(
self._check_queued_task_timeouts(
now, recommendations=recommendations, stimulus_id=stimulus_id
)
)
self.transitions(recommendations, stimulus_id=stimulus_id)
if affected:
self.log_event(
"scheduler",
{"action": "no-workers-timeout-exceeded", "keys": affected},
)
def _check_unrunnable_task_timeouts(
self, timestamp: float, recommendations: Recs, stimulus_id: str
) -> set[Key]:
assert self.no_workers_timeout
unsatisfied = []
no_workers = []
for ts, unrunnable_since in self.unrunnable.items():
if timestamp <= unrunnable_since + self.no_workers_timeout:
# unrunnable is insertion-ordered, which means that unrunnable_since will
# be monotonically increasing in this loop.
break
if (
self._no_workers_since is None
or self._no_workers_since >= unrunnable_since
):
unsatisfied.append(ts)
else:
no_workers.append(ts)
if not unsatisfied and not no_workers:
return set()
for ts in unsatisfied:
e = pickle.dumps(
NoValidWorkerError(
task=ts.key,
host_restrictions=(ts.host_restrictions or set()).copy(),
worker_restrictions=(ts.worker_restrictions or set()).copy(),
resource_restrictions=(ts.resource_restrictions or {}).copy(),
timeout=self.no_workers_timeout,
),
)
r = self.transition(
ts.key,
"erred",
exception=e,
cause=ts.key,
stimulus_id=stimulus_id,
)
recommendations.update(r)
logger.error(
"Task %s marked as failed because it timed out waiting "
"for its restrictions to become satisfied.",
ts.key,
)
self._fail_tasks_after_no_workers_timeout(
no_workers, recommendations, stimulus_id
)
return {ts.key for ts in concat([unsatisfied, no_workers])}
def _check_queued_task_timeouts(
self, timestamp: float, recommendations: Recs, stimulus_id: str
) -> set[Key]:
assert self.no_workers_timeout
if self._no_workers_since is None:
return set()
if timestamp <= self._no_workers_since + self.no_workers_timeout:
return set()
affected = list(self.queued)
self._fail_tasks_after_no_workers_timeout(
affected, recommendations, stimulus_id
)
return {ts.key for ts in affected}
def _fail_tasks_after_no_workers_timeout(
self, timed_out: Iterable[TaskState], recommendations: Recs, stimulus_id: str
) -> None:
assert self.no_workers_timeout
for ts in timed_out:
e = pickle.dumps(
NoWorkerError(
task=ts.key,
timeout=self.no_workers_timeout,
),
)
r = self.transition(
ts.key,
"erred",
exception=e,
cause=ts.key,
stimulus_id=stimulus_id,
)
recommendations.update(r)
logger.error(
"Task %s marked as failed because it timed out waiting "
"without any running workers.",
ts.key,
)
def _refresh_no_workers_since(self, timestamp: float | None = None) -> None:
if self.running or not (self.queued or self.unrunnable):
self._no_workers_since = None
return
if not self._no_workers_since:
self._no_workers_since = timestamp or monotonic()
return
def adaptive_target(self, target_duration=None):
"""Desired number of workers based on the current workload
This looks at the current running tasks and memory use, and returns a
number of desired workers. This is often used by adaptive scheduling.
Parameters
----------
target_duration : str
A desired duration of time for computations to take. This affects
how rapidly the scheduler will ask to scale.
See Also
--------
distributed.deploy.Adaptive
"""
if target_duration is None:
target_duration = dask.config.get("distributed.adaptive.target-duration")
target_duration = parse_timedelta(target_duration)
# CPU
queued = take(100, concat([self.queued, self.unrunnable.keys()]))
queued_occupancy = 0
for ts in queued:
queued_occupancy += self._get_prefix_duration(ts.prefix)
tasks_ready = len(self.queued) + len(self.unrunnable)
if tasks_ready > 100:
queued_occupancy *= tasks_ready / 100
cpu = math.ceil((self.total_occupancy + queued_occupancy) / target_duration)
# Avoid a few long tasks from asking for many cores
for ws in self.workers.values():
if tasks_ready > cpu:
break
tasks_ready += len(ws.processing)
else:
cpu = min(tasks_ready, cpu)
# Divide by average nthreads per worker
if self.workers:
nthreads = sum(ws.nthreads for ws in self.workers.values())
cpu = math.ceil(cpu / nthreads * len(self.workers))
if (self.unrunnable or self.queued) and not self.workers:
cpu = max(1, cpu)
# add more workers if more than 60% of memory is used
limit = sum(ws.memory_limit for ws in self.workers.values())
used = sum(ws.nbytes for ws in self.workers.values())
memory = 0
if used > 0.6 * limit and limit > 0:
memory = 2 * len(self.workers)
target = max(memory, cpu)
if target >= len(self.workers):
return target
else: # Scale down?
to_close = self.workers_to_close()
return len(self.workers) - len(to_close)
def request_acquire_replicas(
self, addr: str, keys: Iterable[Key], *, stimulus_id: str
) -> None:
"""Asynchronously ask a worker to acquire a replica of the listed keys from
other workers. This is a fire-and-forget operation which offers no feedback for
success or failure, and is intended for housekeeping and not for computation.
"""
who_has = {}
nbytes = {}
for key in keys:
ts = self.tasks[key]
assert ts.who_has
who_has[key] = [ws.address for ws in ts.who_has or ()]
nbytes[key] = ts.nbytes
self.stream_comms[addr].send(
{
"op": "acquire-replicas",
"who_has": who_has,
"nbytes": nbytes,
"stimulus_id": stimulus_id,
},
)
def request_remove_replicas(
self, addr: str, keys: list[Key], *, stimulus_id: str
) -> None:
"""Asynchronously ask a worker to discard its replica of the listed keys.
This must never be used to destroy the last replica of a key. This is a
fire-and-forget operation, intended for housekeeping and not for computation.
The replica disappears immediately from TaskState.who_has on the Scheduler side;
if the worker refuses to delete, e.g. because the task is a dependency of
another task running on it, it will (also asynchronously) inform the scheduler
to re-add itself to who_has. If the worker agrees to discard the task, there is
no feedback.
"""
ws = self.workers[addr]
# The scheduler immediately forgets about the replica and suggests the worker to
# drop it. The worker may refuse, at which point it will send back an add-keys
# message to reinstate it.
for key in keys:
ts = self.tasks[key]
if self.validate:
# Do not destroy the last copy
assert ts.who_has
assert len(ts.who_has) > 1
self.remove_replica(ts, ws)
self.stream_comms[addr].send(
{
"op": "remove-replicas",
"keys": keys,
"stimulus_id": stimulus_id,
}
)
def _task_to_report_msg(ts: TaskState) -> dict[str, Any] | None:
if ts.state == "forgotten":
return {"op": "cancelled-keys", "keys": [ts.key]}
elif ts.state == "memory":
return {"op": "key-in-memory", "key": ts.key}
elif ts.state == "erred":
failing_ts = ts.exception_blame
assert failing_ts
return {
"op": "task-erred",
"key": ts.key,
"exception": failing_ts.exception,
"traceback": failing_ts.traceback,
}
else:
return None
def _task_to_client_msgs(ts: TaskState) -> Msgs:
if ts.who_wants:
report_msg = _task_to_report_msg(ts)
if report_msg is not None:
return {cs.client_key: [report_msg] for cs in ts.who_wants}
return {}
def decide_worker(
ts: TaskState,
all_workers: set[WorkerState],
valid_workers: set[WorkerState] | None,
objective: Callable[[WorkerState], Any],
) -> WorkerState | None:
"""
Decide which worker should take task *ts*.
We choose the worker that has the data on which *ts* depends.
If several workers have dependencies then we choose the less-busy worker.
Optionally provide *valid_workers* of where jobs are allowed to occur
(if all workers are allowed to take the task, pass None instead).
If the task requires data communication because no eligible worker has
all the dependencies already, then we choose to minimize the number
of bytes sent between workers. This is determined by calling the
*objective* function.
"""
assert all(dts.who_has for dts in ts.dependencies)
if ts.actor:
candidates = all_workers.copy()
else:
candidates = {wws for dts in ts.dependencies for wws in dts.who_has or ()}
candidates &= all_workers
if valid_workers is None:
if not candidates:
candidates = all_workers.copy()
else:
candidates &= valid_workers
if not candidates:
candidates = valid_workers
if not candidates:
if ts.loose_restrictions:
return decide_worker(ts, all_workers, None, objective)
if not candidates:
return None
elif len(candidates) == 1:
return next(iter(candidates))
else:
return min(candidates, key=objective)
def validate_task_state(ts: TaskState) -> None:
"""Validate the given TaskState"""
assert ts.state in ALL_TASK_STATES, ts
if ts.waiting_on:
assert ts.waiting_on.issubset(ts.dependencies), (
"waiting not subset of dependencies",
str(ts.waiting_on),
str(ts.dependencies),
)
if ts.waiters:
assert ts.waiters.issubset(ts.dependents), (
"waiters not subset of dependents",
str(ts.waiters),
str(ts.dependents),
)
for dts in ts.waiting_on or ():
assert not dts.who_has, ("waiting on in-memory dep", str(ts), str(dts))
assert dts.state != "released", ("waiting on released dep", str(ts), str(dts))
for dts in ts.dependencies:
assert ts in dts.dependents, (
"not in dependency's dependents",
str(ts),
str(dts),
str(dts.dependents),
)
if ts.state in ("waiting", "queued", "processing", "no-worker"):
assert ts.waiting_on and dts in ts.waiting_on or dts.who_has, (
"dep missing",
str(ts),
str(dts),
)
assert dts.state != "forgotten"
for dts in ts.waiters or ():
assert dts.state in ("waiting", "queued", "processing", "no-worker"), (
"waiter not in play",
str(ts),
str(dts),
)
for dts in ts.dependents:
assert ts in dts.dependencies, (
"not in dependent's dependencies",
str(ts),
str(dts),
str(dts.dependencies),
)
assert dts.state != "forgotten"
assert (ts.processing_on is not None) == (ts.state == "processing")
assert bool(ts.who_has) == (ts.state == "memory"), (ts, ts.who_has, ts.state)
if ts.state == "queued":
assert not ts.processing_on
assert not ts.who_has
assert all(dts.who_has for dts in ts.dependencies), (
"task queued without all deps",
str(ts),
str(ts.dependencies),
)
if ts.state == "processing":
assert all(dts.who_has for dts in ts.dependencies), (
"task processing without all deps",
str(ts),
str(ts.dependencies),
)
assert not ts.waiting_on
if ts.who_has:
assert ts.waiters or ts.who_wants, (
"unneeded task in memory",
str(ts),
str(ts.who_has),
)
if ts.run_spec: # was computed
assert ts.type
assert isinstance(ts.type, str)
assert not any(
[
ts in dts.waiting_on
for dts in ts.dependents
if dts.waiting_on is not None
]
)
for ws in ts.who_has:
assert ts in ws.has_what, (
"not in who_has' has_what",
str(ts),
str(ws),
str(ws.has_what),
)
for cs in ts.who_wants or ():
assert ts in cs.wants_what, (
"not in who_wants' wants_what",
str(ts),
str(cs),
str(cs.wants_what),
)
if ts.actor:
if ts.state == "memory":
assert ts.who_has
assert sum(ts in ws.actors for ws in ts.who_has) == 1
if ts.state == "processing":
assert ts.processing_on
assert ts in ts.processing_on.actors
assert ts.state != "queued"
def validate_unrunnable(unrunnable: dict[TaskState, float]) -> None:
prev_unrunnable_since: float | None = None
prev_ts: TaskState | None = None
for ts, unrunnable_since in unrunnable.items():
assert ts.state == "no-worker"
if prev_ts is not None:
assert prev_unrunnable_since is not None
# Ensure that unrunnable_since is monotonically increasing when iterating over unrunnable.
# _check_no_workers relies on this.
assert prev_unrunnable_since <= unrunnable_since, (
prev_ts,
ts,
prev_unrunnable_since,
unrunnable_since,
)
prev_ts = ts
prev_unrunnable_since = unrunnable_since
def validate_worker_state(ws: WorkerState) -> None:
for ts in ws.has_what or ():
assert ts.who_has
assert ws in ts.who_has, (
"not in has_what' who_has",
str(ws),
str(ts),
str(ts.who_has),
)
for ts in ws.actors:
assert ts.state in ("memory", "processing")
def validate_state(
tasks: dict[Key, TaskState],
workers: dict[str, WorkerState],
clients: dict[str, ClientState],
) -> None:
"""Validate a current runtime state.
This performs a sequence of checks on the entire graph, running in about linear
time. This raises assert errors if anything doesn't check out.
"""
for ts in tasks.values():
validate_task_state(ts)
for ws in workers.values():
validate_worker_state(ws)
for cs in clients.values():
for ts in cs.wants_what or ():
assert ts.who_wants
assert cs in ts.who_wants, (
"not in wants_what' who_wants",
str(cs),
str(ts),
str(ts.who_wants),
)
def heartbeat_interval(n: int) -> float:
"""Interval in seconds that we desire heartbeats based on number of workers"""
if n <= 10:
return 0.5
elif n < 50:
return 1
elif n < 200:
return 2
else:
# No more than 200 heartbeats a second scaled by workers
return n / 200 + 1
def _task_slots_available(ws: WorkerState, saturation_factor: float) -> int:
"""Number of tasks that can be sent to this worker without oversaturating it"""
assert not math.isinf(saturation_factor)
return max(math.ceil(saturation_factor * ws.nthreads), 1) - (
len(ws.processing) - len(ws.long_running)
)
def _worker_full(ws: WorkerState, saturation_factor: float) -> bool:
if math.isinf(saturation_factor):
return False
return _task_slots_available(ws, saturation_factor) <= 0
class KilledWorker(Exception):
def __init__(self, task: Key, last_worker: WorkerState, allowed_failures: int):
super().__init__(task, last_worker, allowed_failures)
@property
def task(self) -> Key:
return self.args[0]
@property
def last_worker(self) -> WorkerState:
return self.args[1]
@property
def allowed_failures(self) -> int:
return self.args[2]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} on {self.allowed_failures + 1} "
"different workers, but all those workers died while running it. "
f"The last worker that attempt to run the task was {self.last_worker.address}. "
"Inspecting worker logs is often a good next step to diagnose what went wrong. "
"For more information see https://distributed.dask.org/en/stable/killed.html."
)
class NoValidWorkerError(Exception):
def __init__(
self,
task: Key,
host_restrictions: set[str],
worker_restrictions: set[str],
resource_restrictions: dict[str, float],
timeout: float,
):
super().__init__(
task, host_restrictions, worker_restrictions, resource_restrictions, timeout
)
@property
def task(self) -> Key:
return self.args[0]
@property
def host_restrictions(self) -> Any:
return self.args[1]
@property
def worker_restrictions(self) -> Any:
return self.args[2]
@property
def resource_restrictions(self) -> Any:
return self.args[3]
@property
def timeout(self) -> float:
return self.args[4]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} but timed out after {format_time(self.timeout)} "
"waiting for a valid worker matching all restrictions.\n\nRestrictions:\n"
f"host_restrictions={self.host_restrictions!s}\n"
f"worker_restrictions={self.worker_restrictions!s}\n"
f"resource_restrictions={self.resource_restrictions!s}\n"
)
class NoWorkerError(Exception):
def __init__(self, task: Key, timeout: float):
super().__init__(task, timeout)
@property
def task(self) -> Key:
return self.args[0]
@property
def timeout(self) -> float:
return self.args[1]
def __str__(self) -> str:
return (
f"Attempted to run task {self.task!r} but timed out after {format_time(self.timeout)} "
"waiting without any running workers."
)
class WorkerStatusPlugin(SchedulerPlugin):
"""A plugin to share worker status with a remote observer
This is used in cluster managers to keep updated about the status of the scheduler.
"""
name: ClassVar[str] = "worker-status"
bcomm: BatchedSend
def __init__(self, scheduler: Scheduler, comm: Comm):
self.bcomm = BatchedSend(interval="5ms")
self.bcomm.start(comm)
scheduler.add_plugin(self)
def add_worker(self, scheduler: Scheduler, worker: str) -> None:
ident = scheduler.workers[worker].identity()
del ident["metrics"]
del ident["last_seen"]
try:
self.bcomm.send(["add", {"workers": {worker: ident}}])
except CommClosedError:
scheduler.remove_plugin(name=self.name)
def remove_worker(self, scheduler: Scheduler, worker: str, **kwargs: Any) -> None:
try:
self.bcomm.send(["remove", worker])
except CommClosedError:
scheduler.remove_plugin(name=self.name)
def teardown(self) -> None:
self.bcomm.close()
class CollectTaskMetaDataPlugin(SchedulerPlugin):
scheduler: Scheduler
name: str
keys: set[Key]
metadata: dict[Key, Any]
state: dict[Key, TaskStateState]
def __init__(self, scheduler: Scheduler, name: str):
self.scheduler = scheduler
self.name = name
self.keys = set()
self.metadata = {}
self.state = {}
def update_graph(
self,
scheduler: Scheduler,
*,
keys: set[Key],
**kwargs: Any,
) -> None:
self.keys.update(keys)
def transition(
self,
key: Key,
start: TaskStateState,
finish: TaskStateState,
*args: Any,
**kwargs: Any,
) -> None:
if finish in ("memory", "erred"):
ts = self.scheduler.tasks.get(key)
if ts is not None and ts.key in self.keys:
self.metadata[key] = ts.metadata
self.state[key] = finish
self.keys.discard(key)
def _materialize_graph(
expr: Expr,
global_annotations: dict[str, Any],
validate: bool,
) -> tuple[dict[Key, T_runspec], dict[Key, set[Key]], dict[str, dict[Key, Any]]]:
> dsk: dict = expr.__dask_graph__()
E AttributeError: 'dict' object has no attribute '__dask_graph__'
distributed/scheduler.py:9383: AttributeError
Check warning on line 0 in distributed.cli.tests.test_dask_worker
github-actions / Unit Test Results
All 5 runs failed: test_single_executable_works (distributed.cli.tests.test_dask_worker)
artifacts/ubuntu-latest-mindeps-default-notci1/pytest.xml [took 1s]
artifacts/windows-latest-3.10-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.11-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.12-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.13-default-notci1/pytest.xml [took 0s]
Check warning on line 0 in distributed.comm.tests.test_ws
github-actions / Unit Test Results
All 5 runs failed: test_roundtrip (distributed.comm.tests.test_ws)
artifacts/ubuntu-latest-mindeps-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.10-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.11-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.12-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.13-default-notci1/pytest.xml [took 0s]
Check warning on line 0 in distributed.comm.tests.test_ws
github-actions / Unit Test Results
All 5 runs failed: test_http_and_comm_server[True-ws://-None-8787] (distributed.comm.tests.test_ws)
artifacts/ubuntu-latest-mindeps-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.10-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.11-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.12-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.13-default-notci1/pytest.xml [took 0s]
Check warning on line 0 in distributed.comm.tests.test_ws
github-actions / Unit Test Results
All 5 runs failed: test_http_and_comm_server[True-wss://-True-8787] (distributed.comm.tests.test_ws)
artifacts/ubuntu-latest-mindeps-default-notci1/pytest.xml [took 1s]
artifacts/windows-latest-3.10-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.11-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.12-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.13-default-notci1/pytest.xml [took 0s]
Check warning on line 0 in distributed.comm.tests.test_ws
github-actions / Unit Test Results
All 5 runs failed: test_http_and_comm_server[False-ws://-None-8787] (distributed.comm.tests.test_ws)
artifacts/ubuntu-latest-mindeps-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.10-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.11-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.12-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.13-default-notci1/pytest.xml [took 0s]
Check warning on line 0 in distributed.comm.tests.test_ws
github-actions / Unit Test Results
All 5 runs failed: test_http_and_comm_server[False-wss://-True-8787] (distributed.comm.tests.test_ws)
artifacts/ubuntu-latest-mindeps-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.10-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.11-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.12-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.13-default-notci1/pytest.xml [took 0s]
Check warning on line 0 in distributed.comm.tests.test_ws
github-actions / Unit Test Results
All 5 runs failed: test_http_and_comm_server[True-ws://-None-8786] (distributed.comm.tests.test_ws)
artifacts/ubuntu-latest-mindeps-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.10-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.11-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.12-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.13-default-notci1/pytest.xml [took 0s]
Check warning on line 0 in distributed.comm.tests.test_ws
github-actions / Unit Test Results
All 5 runs failed: test_http_and_comm_server[True-wss://-True-8786] (distributed.comm.tests.test_ws)
artifacts/ubuntu-latest-mindeps-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.10-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.11-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.12-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.13-default-notci1/pytest.xml [took 0s]
Check warning on line 0 in distributed.comm.tests.test_ws
github-actions / Unit Test Results
All 5 runs failed: test_http_and_comm_server[False-ws://-None-8786] (distributed.comm.tests.test_ws)
artifacts/ubuntu-latest-mindeps-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.10-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.11-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.12-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.13-default-notci1/pytest.xml [took 0s]
Check warning on line 0 in distributed.comm.tests.test_ws
github-actions / Unit Test Results
All 5 runs failed: test_http_and_comm_server[False-wss://-True-8786] (distributed.comm.tests.test_ws)
artifacts/ubuntu-latest-mindeps-default-notci1/pytest.xml [took 1s]
artifacts/windows-latest-3.10-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.11-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.12-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.13-default-notci1/pytest.xml [took 0s]
Check warning on line 0 in distributed.dashboard.tests.test_components
github-actions / Unit Test Results
3 out of 4 runs failed: test_profile_plot (distributed.dashboard.tests.test_components)
artifacts/windows-latest-3.10-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.12-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.13-default-notci1/pytest.xml [took 0s]
Check warning on line 0 in distributed.dashboard.tests.test_components
github-actions / Unit Test Results
3 out of 4 runs failed: test_profile_time_plot (distributed.dashboard.tests.test_components)
artifacts/windows-latest-3.10-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.12-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.13-default-notci1/pytest.xml [took 0s]
Check warning on line 0 in distributed.dashboard.tests.test_scheduler_bokeh
github-actions / Unit Test Results
All 4 runs failed: test_simple (distributed.dashboard.tests.test_scheduler_bokeh)
artifacts/windows-latest-3.10-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.11-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.12-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.13-default-notci1/pytest.xml [took 0s]
Check warning on line 0 in distributed.dashboard.tests.test_scheduler_bokeh
github-actions / Unit Test Results
All 4 runs failed: test_stealing_events (distributed.dashboard.tests.test_scheduler_bokeh)
artifacts/windows-latest-3.10-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.11-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.12-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.13-default-notci1/pytest.xml [took 0s]
Check warning on line 0 in distributed.dashboard.tests.test_scheduler_bokeh
github-actions / Unit Test Results
All 4 runs failed: test_events (distributed.dashboard.tests.test_scheduler_bokeh)
artifacts/windows-latest-3.10-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.11-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.12-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.13-default-notci1/pytest.xml [took 0s]
Check warning on line 0 in distributed.dashboard.tests.test_scheduler_bokeh
github-actions / Unit Test Results
All 4 runs failed: test_task_stream (distributed.dashboard.tests.test_scheduler_bokeh)
artifacts/windows-latest-3.10-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.11-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.12-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.13-default-notci1/pytest.xml [took 0s]
Check warning on line 0 in distributed.dashboard.tests.test_scheduler_bokeh
github-actions / Unit Test Results
All 4 runs failed: test_task_stream_n_rectangles (distributed.dashboard.tests.test_scheduler_bokeh)
artifacts/windows-latest-3.10-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.11-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.12-default-notci1/pytest.xml [took 0s]
artifacts/windows-latest-3.13-default-notci1/pytest.xml [took 0s]