Skip to content

Commit cbdb6de

Browse files
committed
Tweaks to update_graph (backport from #8185)
1 parent 4425516 commit cbdb6de

File tree

2 files changed

+43
-23
lines changed

2 files changed

+43
-23
lines changed

distributed/client.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1576,7 +1576,7 @@ async def _handle_report(self):
15761576

15771577
breakout = False
15781578
for msg in msgs:
1579-
logger.debug("Client receives message %s", msg)
1579+
logger.debug("Client %s receives message %s", self.id, msg)
15801580

15811581
if "status" in msg and "error" in msg["status"]:
15821582
typ, exc, tb = clean_exception(**msg)

distributed/scheduler.py

+42-22
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@
5252
from tornado.ioloop import IOLoop
5353

5454
import dask
55-
from dask.core import get_deps, validate_key
55+
import dask.utils
56+
from dask.core import get_deps, iskey, validate_key
5657
from dask.typing import Key, no_default
5758
from dask.utils import (
5859
ensure_dict,
@@ -4721,6 +4722,7 @@ async def update_graph(
47214722
stimulus_id=stimulus_id or f"update-graph-{start}",
47224723
)
47234724
except RuntimeError as e:
4725+
logger.error(str(e))
47244726
err = error_message(e)
47254727
for key in keys:
47264728
self.report(
@@ -4729,7 +4731,10 @@ async def update_graph(
47294731
"key": key,
47304732
"exception": err["exception"],
47314733
"traceback": err["traceback"],
4732-
}
4734+
},
4735+
# This informs all clients in who_wants plus the current client
4736+
# (which may not have been added to who_wants yet)
4737+
client=client,
47334738
)
47344739
end = time()
47354740
self.digest_metric("update-graph-duration", end - start)
@@ -4755,8 +4760,21 @@ def _generate_taskstates(
47554760
if ts is None:
47564761
ts = self.new_task(k, dsk.get(k), "released", computation=computation)
47574762
new_tasks.append(ts)
4758-
elif not ts.run_spec:
4763+
# It is possible to create the TaskState object before its runspec is known
4764+
# to the scheduler. For instance, this is possible when using a Variable:
4765+
# `f = c.submit(foo); await Variable().set(f)` since the Variable uses a
4766+
# different comm channel, so the `client_desires_key` message could arrive
4767+
# before `update_graph`.
4768+
# There are also anti-pattern processes possible;
4769+
# see for example test_scatter_creates_ts
4770+
elif ts.run_spec is None:
47594771
ts.run_spec = dsk.get(k)
4772+
# run_spec in the submitted graph may be None. This happens
4773+
# when an already persisted future is part of the graph
4774+
elif k in dsk:
4775+
# TODO run a health check to verify that run_spec and dependencies
4776+
# did not change. See https://github.com/dask/distributed/pull/8185
4777+
pass
47604778

47614779
if ts.run_spec:
47624780
runnable.append(ts)
@@ -5538,28 +5556,28 @@ def report(
55385556
tasks: dict = self.tasks
55395557
ts = tasks.get(msg_key)
55405558

5541-
client_comms: dict = self.client_comms
5542-
if ts is None:
5559+
if ts is None and client is None:
55435560
# Notify all clients
5544-
client_keys = list(client_comms)
5545-
elif client:
5546-
# Notify clients interested in key
5547-
client_keys = [cs.client_key for cs in ts.who_wants or ()]
5561+
client_keys = list(self.client_comms)
5562+
elif ts is None:
5563+
client_keys = [client]
55485564
else:
55495565
# Notify clients interested in key (including `client`)
5566+
# Note that, if report() was called by update_graph(), `client` won't be in
5567+
# ts.who_wants yet.
55505568
client_keys = [
55515569
cs.client_key for cs in ts.who_wants or () if cs.client_key != client
55525570
]
5553-
client_keys.append(client)
5571+
if client is not None:
5572+
client_keys.append(client)
55545573

5555-
k: str
55565574
for k in client_keys:
5557-
c = client_comms.get(k)
5575+
c = self.client_comms.get(k)
55585576
if c is None:
55595577
continue
55605578
try:
55615579
c.send(msg)
5562-
# logger.debug("Scheduler sends message to client %s", msg)
5580+
# logger.debug("Scheduler sends message to client %s: %s", k, msg)
55635581
except CommClosedError:
55645582
if self.status == Status.running:
55655583
logger.critical(
@@ -8724,26 +8742,28 @@ def _materialize_graph(
87248742
dsk2 = {}
87258743
fut_deps = {}
87268744
for k, v in dsk.items():
8727-
dsk2[k], futs = unpack_remotedata(v, byte_keys=True)
8745+
v, futs = unpack_remotedata(v, byte_keys=True)
87288746
if futs:
87298747
fut_deps[k] = futs
8748+
8749+
# Remove aliases {x: x}.
8750+
# FIXME: This is an artifact generated by unpack_remotedata when using persisted
8751+
# collections. There should be a better way to achieve that tasks are not self
8752+
# referencing themselves.
8753+
if not iskey(v) or v != k:
8754+
dsk2[k] = v
8755+
87308756
dsk = dsk2
87318757

87328758
# - Add in deps for any tasks that depend on futures
87338759
for k, futures in fut_deps.items():
8734-
dependencies[k].update(f.key for f in futures)
8760+
dependencies[k].update(f.key for f in futures if f.key != k)
87358761

87368762
# Remove any self-dependencies (happens on test_publish_bag() and others)
87378763
for k, v in dependencies.items():
87388764
deps = set(v)
8739-
if k in deps:
8740-
deps.remove(k)
8765+
deps.discard(k)
87418766
dependencies[k] = deps
87428767

8743-
# Remove aliases
8744-
for k in list(dsk):
8745-
if dsk[k] is k:
8746-
del dsk[k]
87478768
dsk = valmap(_normalize_task, dsk)
8748-
87498769
return dsk, dependencies, annotations_by_type

0 commit comments

Comments
 (0)