Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove dependency on dedicated dask-expr repo #8978

Merged
merged 1 commit into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions .github/workflows/conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,9 @@ jobs:

# conda search for the latest dask-core pre-release
dask_core_arr=($(conda search --override-channels -c dask/label/dev dask-core | tail -n 1))
dask_expr_arr=($(conda search --override-channels -c dask/label/dev dask-expr | tail -n 1))

# extract dask-core & dask-expr pre-release versions
# extract dask-core pre-release version
export DASK_CORE_VERSION=${dask_core_arr[1]}
export DASK_EXPR_VERSION=${dask_expr_arr[1]}

# distributed pre-release build
conda mambabuild continuous_integration/recipes/distributed \
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,12 @@ jobs:
- os: ubuntu-latest
environment: mindeps
label: pandas
extra_packages: [numpy=1.24, pandas=2.0, pyarrow=14.0.1, dask-expr]
extra_packages: [numpy=1.24, pandas=2.0, pyarrow=14.0.1]
partition: "ci1"
- os: ubuntu-latest
environment: mindeps
label: pandas
extra_packages: [numpy=1.24, pandas=2.0, pyarrow=14.0.1, dask-expr]
extra_packages: [numpy=1.24, pandas=2.0, pyarrow=14.0.1]
partition: "not ci1"

- os: ubuntu-latest
Expand Down
1 change: 0 additions & 1 deletion continuous_integration/environment-3.10.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ dependencies:
- httpx<0.28.0
- pip:
- git+https://github.com/dask/dask
- git+https://github.com/dask/dask-expr
- git+https://github.com/dask/zict
- git+https://github.com/dask/crick # Only tested here
# Revert after https://github.com/dask/distributed/issues/8614 is fixed
Expand Down
1 change: 0 additions & 1 deletion continuous_integration/environment-3.11.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ dependencies:
- httpx<0.28.0
- pip:
- git+https://github.com/dask/dask
- git+https://github.com/dask/dask-expr
- git+https://github.com/dask/zict
# Revert after https://github.com/dask/distributed/issues/8614 is fixed
# - git+https://github.com/dask/s3fs
Expand Down
1 change: 0 additions & 1 deletion continuous_integration/environment-3.12.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ dependencies:
- httpx<0.28.0
- pip:
- git+https://github.com/dask/dask
- git+https://github.com/dask/dask-expr
- git+https://github.com/dask/zict
# Revert after https://github.com/dask/distributed/issues/8614 is fixed
# - git+https://github.com/dask/s3fs
Expand Down
1 change: 0 additions & 1 deletion continuous_integration/environment-3.13.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ dependencies:
- httpx<0.28.0
- pip:
- git+https://github.com/dask/dask
- git+https://github.com/dask/dask-expr
- git+https://github.com/dask/zict
# Revert after https://github.com/dask/distributed/issues/8614 is fixed
# - git+https://github.com/dask/s3fs
Expand Down
3 changes: 0 additions & 3 deletions continuous_integration/recipes/dask/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
{% set new_patch = major_minor_patch[2] | int + 1 %}
{% set version = (major_minor_patch[:2] + [new_patch]) | join('.') + environ.get('VERSION_SUFFIX', '') %}
{% set dask_version = environ.get('DASK_CORE_VERSION', '0.0.0.dev') %}
{% set dask_expr_version = environ.get('DASK_EXPR_VERSION', '0.0.0.dev') %}


package:
Expand All @@ -21,12 +20,10 @@ requirements:
host:
- python >=3.10
- dask-core {{ dask_version }}
- dask-expr {{ dask_expr_version }}
- distributed {{ version }}
run:
- python >=3.10
- {{ pin_compatible('dask-core', max_pin='x.x.x.x') }}
- {{ pin_compatible('dask-expr', max_pin='x.x.x.x') }}
- {{ pin_compatible('distributed', exact=True) }}
- cytoolz >=0.11.2
- lz4 >=4.3.2
Expand Down
6 changes: 1 addition & 5 deletions continuous_integration/scripts/test_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def get_jobs(run, session, repo):
# this to the JXML results.

if repo.endswith("/dask"):
# example name: "test (windows-latest, 3.9)" or "test (ubuntu-latest, 3.12, dask-expr)"
# example name: "test (windows-latest, 3.9)" or "test (ubuntu-latest, 3.12)"
df_jobs = df_jobs[~df_jobs.name.str.contains("Event File")]

def format(row):
Expand Down Expand Up @@ -389,10 +389,6 @@ def download_and_parse_artifacts(
if a["name"].startswith("test-results") and repo.endswith("/dask"):
continue

# NOTE: Temporarily ignore reporting dask-expr related test cases
if "dask-expr" in a["name"]:
continue

# Note: we assign a column with the workflow run timestamp rather
# than the artifact timestamp so that artifacts triggered under
# the same workflow run can be aligned according to the same trigger
Expand Down
6 changes: 3 additions & 3 deletions distributed/shuffle/tests/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ async def test_basic_merge(c, s, a, b, how):
joined = a.merge(b, left_on="y", right_on="y", how=how)

# Ensure we're using a hash join
from dask_expr._merge import HashJoinP2P

assert any(isinstance(expr, HashJoinP2P) for expr in joined.optimize()._expr.walk())
assert any(
expr._name.startswith("hashjoinp2p") for expr in joined.optimize()._expr.walk()
)

expected = pd.merge(A, B, how, "y")
await list_eq(joined, expected)
Expand Down
27 changes: 0 additions & 27 deletions distributed/shuffle/tests/test_shuffle.py
Original file line number Diff line number Diff line change
Expand Up @@ -1637,33 +1637,6 @@ async def test_multi(c, s, a, b):
await assert_scheduler_cleanup(s)


@pytest.mark.skipif(reason="worker restrictions are not supported in dask-expr")
@gen_cluster(client=True)
async def test_restrictions(c, s, a, b):
df = dask.datasets.timeseries(
start="2000-01-01",
end="2000-01-10",
dtypes={"x": float, "y": float},
freq="10 s",
).persist(workers=a.address)
await df
assert a.data
assert not b.data

with dask.config.set({"dataframe.shuffle.method": "p2p"}):
x = df.shuffle("x")
y = df.shuffle("y")

x = x.persist(workers=b.address)
y = y.persist(workers=a.address)

await x
assert all(key in b.data for key in x.__dask_keys__())

await y
assert all(key in a.data for key in y.__dask_keys__())


@gen_cluster(client=True)
async def test_delete_some_results(c, s, a, b):
df = dask.datasets.timeseries(
Expand Down
4 changes: 1 addition & 3 deletions distributed/tests/test_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2829,9 +2829,7 @@ async def test_default_task_duration_splits(c, s, a, b):
await wait(fut)

split_prefix = [pre for pre in s.task_prefixes.keys() if "split" in pre]
# dask-expr enabled: ['split-taskshuffle', 'split-stage']
# dask-expr disabled: ['split-shuffle']
assert split_prefix
assert split_prefix == ["split-taskshuffle", "split-stage"]
default_times = dask.config.get("distributed.scheduler.default-task-durations")
for p in split_prefix:
default_time = parse_timedelta(default_times[p])
Expand Down
2 changes: 1 addition & 1 deletion distributed/tests/test_spans.py
Original file line number Diff line number Diff line change
Expand Up @@ -803,7 +803,7 @@ def f():

@gen_cluster(client=True)
async def test_span_on_persist(c, s, a, b):
"""As a workaround to lack of annotations support in dask-expr and loss of
"""As a workaround to lack of annotations support in dask.dataframe and loss of
annotations due to low level optimization in dask.array, you can use span() to wrap
calls to persist() and compute()
"""
Expand Down
Loading