Skip to content

Commit d16a5a1

Browse files
committed
Hotfix negative occ
1 parent 55bb639 commit d16a5a1

File tree

1 file changed

+13
-5
lines changed

1 file changed

+13
-5
lines changed

distributed/scheduler.py

+13-5
Original file line numberDiff line numberDiff line change
@@ -823,8 +823,11 @@ def _dec_needs_replica(self, ts: TaskState) -> None:
823823
if self.needs_what[ts] == 0:
824824
del self.needs_what[ts]
825825
nbytes = ts.get_nbytes()
826-
self._network_occ -= nbytes
827-
self.scheduler._network_occ_global -= nbytes
826+
# FIXME: ts.get_nbytes may change if non-deterministic tasks get recomputed, causing drift
827+
self._network_occ -= min(nbytes, self._network_occ)
828+
self.scheduler._network_occ_global -= min(
829+
nbytes, self.scheduler._network_occ_global
830+
)
828831

829832
def add_replica(self, ts: TaskState) -> None:
830833
"""The worker acquired a replica of task"""
@@ -835,8 +838,11 @@ def add_replica(self, ts: TaskState) -> None:
835838
nbytes = ts.get_nbytes()
836839
if ts in self.needs_what:
837840
del self.needs_what[ts]
838-
self._network_occ -= nbytes
839-
self.scheduler._network_occ_global -= nbytes
841+
# FIXME: ts.get_nbytes may change if non-deterministic tasks get recomputed, causing drift
842+
self._network_occ -= min(nbytes, self._network_occ)
843+
self.scheduler._network_occ_global -= min(
844+
nbytes, self.scheduler._network_occ_global
845+
)
840846
ts.who_has.add(self)
841847
self.nbytes += nbytes
842848
self._has_what[ts] = None
@@ -1958,7 +1964,9 @@ def _calc_occupancy(
19581964
duration = self._get_prefix_duration(self.task_prefixes[prefix_name])
19591965
res += duration * count
19601966
occ = res + network_occ / self.bandwidth
1961-
assert occ >= 0, (occ, res, network_occ, self.bandwidth)
1967+
if self.validate:
1968+
assert occ >= 0, (occ, res, network_occ, self.bandwidth)
1969+
occ = max(occ, 0)
19621970
return occ
19631971

19641972
#####################

0 commit comments

Comments
 (0)