Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(adaptive): fixed comparison in recommendations when scaling down #8273

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions distributed/deploy/adaptive.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@
self.cluster = cluster
self.worker_key = worker_key
self._workers_to_close_kwargs = kwargs
self._worker_name_mapping = {}

if interval is None:
interval = dask.config.get("distributed.adaptive.interval")
Expand Down Expand Up @@ -131,6 +132,26 @@
def observed(self):
return self.cluster.observed

@property
def observed_name_mapped(self):
self._assign_hosts_to_names()
return self._worker_name_mapping

def _assign_hosts_to_names(self) -> None:
unassigned_worker_names = self._unassigned_worker_names()
for worker_address in self.cluster.scheduler_info["workers"].keys():
if worker_address not in self._worker_name_mapping.values():
assert unassigned_worker_names
self._worker_name_mapping[
unassigned_worker_names.pop()
] = worker_address
for worker_name, worker_address in self._worker_name_mapping.copy().items():
if worker_address not in self.cluster.scheduler_info["workers"].keys():
del self._worker_name_mapping[worker_name]

Check warning on line 150 in distributed/deploy/adaptive.py

View check run for this annotation

Codecov / codecov/patch

distributed/deploy/adaptive.py#L150

Added line #L150 was not covered by tests

def _unassigned_worker_names(self) -> set:
return self.requested - self._worker_name_mapping.keys()

async def target(self):
"""
Determine target number of workers that should exist.
Expand Down
10 changes: 7 additions & 3 deletions distributed/deploy/adaptive_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class AdaptiveCore:
plan: set[WorkerState]
requested: set[WorkerState]
observed: set[WorkerState]
observed_name_mapped: dict[str, str]
close_counts: defaultdict[WorkerState, int]
_adapting: bool
log: deque[tuple[float, dict]]
Expand Down Expand Up @@ -130,6 +131,7 @@ async def _adapt():
self.plan = set()
self.requested = set()
self.observed = set()
self.observed_name_mapped = {}
except Exception:
pass

Expand Down Expand Up @@ -181,7 +183,7 @@ async def recommendations(self, target: int) -> dict:
"""
plan = self.plan
requested = self.requested
observed = self.observed
observed = self.observed_name_mapped

if target == len(plan):
self.close_counts.clear()
Expand All @@ -192,14 +194,16 @@ async def recommendations(self, target: int) -> dict:
return {"status": "up", "n": target}

# target < len(plan)
not_yet_arrived = requested - observed
not_yet_arrived = requested - observed.keys()
to_close = set()
if not_yet_arrived:
to_close.update(toolz.take(len(plan) - target, not_yet_arrived))

if target < len(plan) - len(to_close):
L = await self.workers_to_close(target=target)
to_close.update(L)
to_close.update(
[key for key, value in observed.items() for name in L if value == name]
)

firmly_close = set()
for w in to_close:
Expand Down
Loading