Skip to content

Commit e09f4af

Browse files
authored
Working in pyodide! (#753)
- Add feature flags to the python distribution to remove async APIs and tokio code. - minimal pyodide docs - Add CI test that we can build in pyodide - Update pyo3-arrow to 0.2 ![image](https://github.com/user-attachments/assets/b0e68162-0f01-4ec5-be8e-18978834a9f8)
1 parent d00dfde commit e09f4af

26 files changed

+365
-212
lines changed

.github/workflows/python.yml

+27
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,33 @@ jobs:
3535
- name: Test
3636
run: cargo test
3737

38+
emscripten:
39+
name: Build pyodide wheel
40+
runs-on: ubuntu-latest
41+
steps:
42+
- uses: actions/checkout@v4
43+
- run: pip install pyodide-build
44+
- name: Get Emscripten and Python version info
45+
shell: bash
46+
run: |
47+
echo EMSCRIPTEN_VERSION=$(pyodide config get emscripten_version) >> $GITHUB_ENV
48+
echo PYTHON_VERSION=$(pyodide config get python_version | cut -d '.' -f 1-2) >> $GITHUB_ENV
49+
pip uninstall -y pyodide-build
50+
- uses: mymindstorm/setup-emsdk@v14
51+
with:
52+
version: ${{ env.EMSCRIPTEN_VERSION }}
53+
actions-cache-folder: emsdk-cache
54+
- uses: actions/setup-python@v5
55+
with:
56+
python-version: ${{ env.PYTHON_VERSION }}
57+
- run: pip install pyodide-build
58+
- name: Build wheels
59+
uses: PyO3/maturin-action@v1
60+
with:
61+
target: wasm32-unknown-emscripten
62+
args: --no-default-features --out dist -m python/core/Cargo.toml
63+
rust-toolchain: nightly
64+
3865
# lint-python:
3966
# name: Lint Python code
4067
# runs-on: ubuntu-latest

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
emsdk
12
vcpkg
23
vcpkg_installed
34
.pyodide*

python/core/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
emsdk/

python/core/Cargo.lock

+3-9
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

python/core/Cargo.toml

+32-13
Original file line numberDiff line numberDiff line change
@@ -16,45 +16,64 @@ rust-version = "1.80"
1616
name = "_rust"
1717
crate-type = ["cdylib"]
1818

19+
[features]
20+
default = ["async", "libc", "rayon"]
21+
async = [
22+
"dep:futures",
23+
"dep:object_store",
24+
"parquet/object_store",
25+
"dep:pyo3-asyncio-0-21",
26+
"geoarrow/flatgeobuf_async",
27+
"geoarrow/parquet_async",
28+
"geoarrow/postgis",
29+
"dep:sqlx",
30+
"dep:tokio",
31+
]
32+
libc = ["geoarrow/polylabel"]
33+
rayon = ["geoarrow/rayon"]
34+
1935
[dependencies]
2036
arrow = "52"
2137
arrow-array = "52"
2238
arrow-buffer = "52"
2339
bytes = "1"
2440
flatgeobuf = { version = "4.2.0", default-features = false }
25-
futures = "0.3"
26-
object_store = { version = "0.10", features = ["aws", "azure", "gcp", "http"] }
27-
parquet = { version = "52", features = ["object_store"] }
41+
futures = { version = "0.3", optional = true }
42+
object_store = { version = "0.10", features = [
43+
"aws",
44+
"azure",
45+
"gcp",
46+
"http",
47+
], optional = true }
48+
parquet = "52"
2849
pyo3 = { version = "0.21.0", features = [
2950
"abi3-py38",
30-
"multiple-pymethods",
3151
"hashbrown",
3252
"serde",
3353
"anyhow",
3454
] }
35-
pyo3-arrow = { git = "https://github.com/kylebarron/arro3", rev = "d0d737a03c141ff316e3e354d85828edb42338d4" }
36-
pyo3-asyncio-0-21 = { version = "0.21", features = ["tokio-runtime"] }
55+
pyo3-arrow = "0.2"
56+
pyo3-asyncio-0-21 = { version = "0.21", features = [
57+
"tokio-runtime",
58+
], optional = true }
3759
pythonize = "0.21"
3860
geo = "0.28"
3961
geoarrow = { path = "../../", features = [
4062
"csv",
41-
"flatgeobuf_async",
4263
"flatgeobuf",
4364
"geozero",
4465
"ipc_compression",
45-
"parquet_async",
4666
"parquet_compression",
4767
"parquet",
48-
"polylabel",
49-
"postgis",
50-
"rayon",
5168
] }
5269
geozero = { version = "0.13", features = ["with-svg"] }
5370
numpy = "0.21"
5471
serde_json = "1"
55-
sqlx = { version = "0.7", default-features = false, features = ["postgres"] }
72+
sqlx = { version = "0.7", default-features = false, features = [
73+
"postgres",
74+
], optional = true }
5675
thiserror = "1"
57-
tokio = { version = "1.9", features = ["rt"] }
76+
tokio = { version = "1.9", features = ["rt"], optional = true }
5877
url = "2.5"
5978

6079
# reqwest is pulled in by object store, but not used by python binding itself

python/core/DEVELOP.md

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
## Pyodide
2+
3+
4+
Install rust nightly and add wasm toolchain
5+
6+
```
7+
rustup toolchain install nightly
8+
rustup target add --toolchain nightly wasm32-unknown-emscripten
9+
```
10+
11+
Install dependencies. You need to set the `pyodide-build` version to the same version as the `pyodide` release you distribute for.
12+
13+
```
14+
pip install -U maturin
15+
pip install pyodide-build
16+
```
17+
18+
Install emsdk.
19+
20+
```
21+
git clone https://github.com/emscripten-core/emsdk.git
22+
cd emsdk
23+
PYODIDE_EMSCRIPTEN_VERSION=$(pyodide config get emscripten_version)
24+
./emsdk install ${PYODIDE_EMSCRIPTEN_VERSION}
25+
./emsdk activate ${PYODIDE_EMSCRIPTEN_VERSION}
26+
source emsdk_env.sh
27+
cd ..
28+
```
29+
30+
- The `RUSTFLAGS` is temporary to get around this compiler bug.
31+
- You must use rust nightly
32+
- You must use `--no-default-features` to remove any async support. `tokio` does not compile for emscripten.
33+
34+
```bash
35+
RUSTFLAGS='-Zinline-mir=no' /
36+
RUSTUP_TOOLCHAIN=nightly /
37+
maturin build /
38+
--no-default-features /
39+
--release /
40+
-o dist /
41+
--target wasm32-unknown-emscripten
42+
```

python/core/src/algorithm/geo/area.rs

+6-4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::sync::Arc;
2+
13
use crate::error::PyGeoArrowResult;
24
use crate::ffi::from_python::AnyGeometryInput;
35
use geoarrow::algorithm::geo::{Area, ChamberlainDuquetteArea, GeodesicArea};
@@ -38,7 +40,7 @@ pub fn area(py: Python, input: AnyGeometryInput, method: AreaMethod) -> PyGeoArr
3840
AreaMethod::Euclidean => arr.as_ref().unsigned_area()?,
3941
AreaMethod::Geodesic => arr.as_ref().geodesic_area_unsigned()?,
4042
};
41-
Ok(PyArray::from_array(out).to_arro3(py)?)
43+
Ok(PyArray::from_array_ref(Arc::new(out)).to_arro3(py)?)
4244
}
4345
AnyGeometryInput::Chunked(arr) => {
4446
let out = match method {
@@ -48,7 +50,7 @@ pub fn area(py: Python, input: AnyGeometryInput, method: AreaMethod) -> PyGeoArr
4850
AreaMethod::Euclidean => arr.as_ref().unsigned_area()?,
4951
AreaMethod::Geodesic => arr.as_ref().geodesic_area_unsigned()?,
5052
};
51-
Ok(PyChunkedArray::from_arrays(out.chunks())?.to_arro3(py)?)
53+
Ok(PyChunkedArray::from_array_refs(out.chunk_refs())?.to_arro3(py)?)
5254
}
5355
}
5456
}
@@ -72,7 +74,7 @@ pub fn signed_area(
7274
AreaMethod::Euclidean => arr.as_ref().signed_area()?,
7375
AreaMethod::Geodesic => arr.as_ref().geodesic_area_signed()?,
7476
};
75-
Ok(PyArray::from_array(out).to_arro3(py)?)
77+
Ok(PyArray::from_array_ref(Arc::new(out)).to_arro3(py)?)
7678
}
7779
AnyGeometryInput::Chunked(arr) => {
7880
let out = match method {
@@ -82,7 +84,7 @@ pub fn signed_area(
8284
AreaMethod::Euclidean => arr.as_ref().signed_area()?,
8385
AreaMethod::Geodesic => arr.as_ref().geodesic_area_signed()?,
8486
};
85-
Ok(PyChunkedArray::from_arrays(out.chunks())?.to_arro3(py)?)
87+
Ok(PyChunkedArray::from_array_refs(out.chunk_refs())?.to_arro3(py)?)
8688
}
8789
}
8890
}

python/core/src/algorithm/geo/dimensions.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::sync::Arc;
2+
13
use crate::error::PyGeoArrowResult;
24
use crate::ffi::from_python::AnyGeometryInput;
35
use geoarrow::algorithm::geo::HasDimensions;
@@ -9,11 +11,11 @@ pub fn is_empty(py: Python, input: AnyGeometryInput) -> PyGeoArrowResult<PyObjec
911
match input {
1012
AnyGeometryInput::Array(arr) => {
1113
let out = HasDimensions::is_empty(&arr.as_ref())?;
12-
Ok(PyArray::from_array(out).to_arro3(py)?)
14+
Ok(PyArray::from_array_ref(Arc::new(out)).to_arro3(py)?)
1315
}
1416
AnyGeometryInput::Chunked(arr) => {
1517
let out = HasDimensions::is_empty(&arr.as_ref())?;
16-
Ok(PyChunkedArray::from_arrays(out.chunks())?.to_arro3(py)?)
18+
Ok(PyChunkedArray::from_array_refs(out.chunk_refs())?.to_arro3(py)?)
1719
}
1820
}
1921
}

python/core/src/algorithm/geo/frechet_distance.rs

+6-4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::sync::Arc;
2+
13
use crate::error::PyGeoArrowResult;
24
use crate::ffi::from_python::input::AnyGeometryBroadcastInput;
35
use crate::ffi::from_python::AnyGeometryInput;
@@ -15,21 +17,21 @@ pub fn frechet_distance(
1517
match (input, other) {
1618
(AnyGeometryInput::Array(left), AnyGeometryBroadcastInput::Array(right)) => {
1719
let result = FrechetDistance::frechet_distance(&left.as_ref(), &right.as_ref())?;
18-
Ok(PyArray::from_array(result).to_arro3(py)?)
20+
Ok(PyArray::from_array_ref(Arc::new(result)).to_arro3(py)?)
1921
}
2022
(AnyGeometryInput::Chunked(left), AnyGeometryBroadcastInput::Chunked(right)) => {
2123
let result = FrechetDistance::frechet_distance(&left.as_ref(), &right.as_ref())?;
22-
Ok(PyChunkedArray::from_arrays(result.chunks())?.to_arro3(py)?)
24+
Ok(PyChunkedArray::from_array_refs(result.chunk_refs())?.to_arro3(py)?)
2325
}
2426
(AnyGeometryInput::Array(left), AnyGeometryBroadcastInput::Scalar(right)) => {
2527
let scalar = right.to_geo_line_string()?;
2628
let result = FrechetDistanceLineString::frechet_distance(&left.as_ref(), &scalar)?;
27-
Ok(PyArray::from_array(result).to_arro3(py)?)
29+
Ok(PyArray::from_array_ref(Arc::new(result)).to_arro3(py)?)
2830
}
2931
(AnyGeometryInput::Chunked(left), AnyGeometryBroadcastInput::Scalar(right)) => {
3032
let scalar = right.to_geo_line_string()?;
3133
let result = FrechetDistanceLineString::frechet_distance(&left.as_ref(), &scalar)?;
32-
Ok(PyChunkedArray::from_arrays(result.chunks())?.to_arro3(py)?)
34+
Ok(PyChunkedArray::from_array_refs(result.chunk_refs())?.to_arro3(py)?)
3335
}
3436
_ => Err(PyValueError::new_err("Unsupported input types.").into()),
3537
}

python/core/src/algorithm/geo/geodesic_area.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::sync::Arc;
2+
13
use crate::error::PyGeoArrowResult;
24
use crate::ffi::from_python::AnyGeometryInput;
35
use geoarrow::algorithm::geo::GeodesicArea;
@@ -9,11 +11,11 @@ pub fn geodesic_perimeter(py: Python, input: AnyGeometryInput) -> PyGeoArrowResu
911
match input {
1012
AnyGeometryInput::Array(arr) => {
1113
let out = arr.as_ref().geodesic_perimeter()?;
12-
Ok(PyArray::from_array(out).to_arro3(py)?)
14+
Ok(PyArray::from_array_ref(Arc::new(out)).to_arro3(py)?)
1315
}
1416
AnyGeometryInput::Chunked(arr) => {
1517
let out = arr.as_ref().geodesic_perimeter()?;
16-
Ok(PyChunkedArray::from_arrays(out.chunks())?.to_arro3(py)?)
18+
Ok(PyChunkedArray::from_array_refs(out.chunk_refs())?.to_arro3(py)?)
1719
}
1820
}
1921
}

python/core/src/algorithm/geo/length.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::sync::Arc;
2+
13
use crate::error::PyGeoArrowResult;
24
use crate::ffi::from_python::AnyGeometryInput;
35
use geoarrow::algorithm::geo::{EuclideanLength, GeodesicLength, HaversineLength, VincentyLength};
@@ -43,7 +45,7 @@ pub fn length(
4345
LengthMethod::Haversine => arr.as_ref().haversine_length()?,
4446
LengthMethod::Vincenty => arr.as_ref().vincenty_length()?,
4547
};
46-
Ok(PyArray::from_array(out).to_arro3(py)?)
48+
Ok(PyArray::from_array_ref(Arc::new(out)).to_arro3(py)?)
4749
}
4850
AnyGeometryInput::Chunked(arr) => {
4951
let out = match method {
@@ -52,7 +54,7 @@ pub fn length(
5254
LengthMethod::Haversine => arr.as_ref().haversine_length()?,
5355
LengthMethod::Vincenty => arr.as_ref().vincenty_length()?,
5456
};
55-
Ok(PyChunkedArray::from_arrays(out.chunks())?.to_arro3(py)?)
57+
Ok(PyChunkedArray::from_array_refs(out.chunk_refs())?.to_arro3(py)?)
5658
}
5759
}
5860
}

python/core/src/algorithm/geo/line_locate_point.rs

+6-4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::sync::Arc;
2+
13
use crate::error::PyGeoArrowResult;
24
use crate::ffi::from_python::input::AnyGeometryBroadcastInput;
35
use crate::ffi::from_python::AnyGeometryInput;
@@ -15,21 +17,21 @@ pub fn line_locate_point(
1517
match (input, point) {
1618
(AnyGeometryInput::Array(arr), AnyGeometryBroadcastInput::Array(point)) => {
1719
let result = LineLocatePoint::line_locate_point(&arr.as_ref(), point.as_ref())?;
18-
Ok(PyArray::from_array(result).to_arro3(py)?)
20+
Ok(PyArray::from_array_ref(Arc::new(result)).to_arro3(py)?)
1921
}
2022
(AnyGeometryInput::Chunked(arr), AnyGeometryBroadcastInput::Chunked(point)) => {
2123
let result = LineLocatePoint::line_locate_point(&arr.as_ref(), point.as_ref())?;
22-
Ok(PyChunkedArray::from_arrays(result.chunks())?.to_arro3(py)?)
24+
Ok(PyChunkedArray::from_array_refs(result.chunk_refs())?.to_arro3(py)?)
2325
}
2426
(AnyGeometryInput::Array(arr), AnyGeometryBroadcastInput::Scalar(point)) => {
2527
let scalar = point.to_geo_point()?;
2628
let result = LineLocatePointScalar::line_locate_point(&arr.as_ref(), &scalar)?;
27-
Ok(PyArray::from_array(result).to_arro3(py)?)
29+
Ok(PyArray::from_array_ref(Arc::new(result)).to_arro3(py)?)
2830
}
2931
(AnyGeometryInput::Chunked(arr), AnyGeometryBroadcastInput::Scalar(point)) => {
3032
let scalar = point.to_geo_point()?;
3133
let result = LineLocatePointScalar::line_locate_point(&arr.as_ref(), &scalar)?;
32-
Ok(PyChunkedArray::from_arrays(result.chunks())?.to_arro3(py)?)
34+
Ok(PyChunkedArray::from_array_refs(result.chunk_refs())?.to_arro3(py)?)
3335
}
3436
_ => Err(PyValueError::new_err("Unsupported input types.").into()),
3537
}

python/core/src/algorithm/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
pub mod geo;
22
pub mod native;
3+
4+
#[cfg(feature = "libc")]
35
pub mod polylabel;

0 commit comments

Comments
 (0)