Skip to content

Commit 725c960

Browse files
authored
perf: Extend functionality on BitmapBuilder and use in Growables (#20754)
1 parent acb20ee commit 725c960

File tree

12 files changed

+194
-64
lines changed

12 files changed

+194
-64
lines changed

crates/polars-arrow/src/array/growable/binary.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@ use super::utils::extend_offset_values;
44
use super::Growable;
55
use crate::array::growable::utils::{extend_validity, prepare_validity};
66
use crate::array::{Array, BinaryArray};
7-
use crate::bitmap::MutableBitmap;
7+
use crate::bitmap::BitmapBuilder;
88
use crate::datatypes::ArrowDataType;
99
use crate::offset::{Offset, Offsets};
1010

1111
/// Concrete [`Growable`] for the [`BinaryArray`].
1212
pub struct GrowableBinary<'a, O: Offset> {
1313
arrays: Vec<&'a BinaryArray<O>>,
1414
dtype: ArrowDataType,
15-
validity: Option<MutableBitmap>,
15+
validity: Option<BitmapBuilder>,
1616
values: Vec<u8>,
1717
offsets: Offsets<O>,
1818
}
@@ -49,7 +49,7 @@ impl<'a, O: Offset> GrowableBinary<'a, O> {
4949
dtype,
5050
offsets.into(),
5151
values.into(),
52-
validity.map(|v| v.into()),
52+
validity.map(|v| v.freeze()),
5353
)
5454
}
5555
}
@@ -97,7 +97,7 @@ impl<'a, O: Offset> From<GrowableBinary<'a, O>> for BinaryArray<O> {
9797
val.dtype,
9898
val.offsets.into(),
9999
val.values.into(),
100-
val.validity.map(|v| v.into()),
100+
val.validity.map(|v| v.freeze()),
101101
)
102102
}
103103
}

crates/polars-arrow/src/array/growable/binview.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,15 @@ use super::Growable;
88
use crate::array::binview::{BinaryViewArrayGeneric, ViewType};
99
use crate::array::growable::utils::{extend_validity, extend_validity_copies, prepare_validity};
1010
use crate::array::{Array, MutableBinaryViewArray, View};
11-
use crate::bitmap::{Bitmap, MutableBitmap};
11+
use crate::bitmap::BitmapBuilder;
1212
use crate::buffer::Buffer;
1313
use crate::datatypes::ArrowDataType;
1414

1515
/// Concrete [`Growable`] for the [`BinaryArray`].
1616
pub struct GrowableBinaryViewArray<'a, T: ViewType + ?Sized> {
1717
arrays: Vec<&'a BinaryViewArrayGeneric<T>>,
1818
dtype: ArrowDataType,
19-
validity: Option<MutableBitmap>,
19+
validity: Option<BitmapBuilder>,
2020
inner: MutableBinaryViewArray<T>,
2121
same_buffers: Option<&'a Arc<[Buffer<u8>]>>,
2222
total_same_buffers_len: usize, // Only valid if same_buffers is Some.
@@ -81,14 +81,14 @@ impl<'a, T: ViewType + ?Sized> GrowableBinaryViewArray<'a, T> {
8181
self.dtype.clone(),
8282
arr.views.into(),
8383
buffers.clone(),
84-
self.validity.take().map(Bitmap::from),
84+
self.validity.take().map(BitmapBuilder::freeze),
8585
arr.total_bytes_len,
8686
self.total_same_buffers_len,
8787
)
8888
}
8989
} else {
9090
arr.freeze_with_dtype(self.dtype.clone())
91-
.with_validity(self.validity.take().map(Bitmap::from))
91+
.with_validity(self.validity.take().map(BitmapBuilder::freeze))
9292
}
9393
}
9494
}

crates/polars-arrow/src/array/growable/boolean.rs

+12-12
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@ use std::sync::Arc;
33
use super::Growable;
44
use crate::array::growable::utils::{extend_validity, prepare_validity};
55
use crate::array::{Array, BooleanArray};
6-
use crate::bitmap::MutableBitmap;
6+
use crate::bitmap::BitmapBuilder;
77
use crate::datatypes::ArrowDataType;
88

99
/// Concrete [`Growable`] for the [`BooleanArray`].
1010
pub struct GrowableBoolean<'a> {
1111
arrays: Vec<&'a BooleanArray>,
1212
dtype: ArrowDataType,
13-
validity: Option<MutableBitmap>,
14-
values: MutableBitmap,
13+
validity: Option<BitmapBuilder>,
14+
values: BitmapBuilder,
1515
}
1616

1717
impl<'a> GrowableBoolean<'a> {
@@ -30,7 +30,7 @@ impl<'a> GrowableBoolean<'a> {
3030
Self {
3131
arrays,
3232
dtype,
33-
values: MutableBitmap::with_capacity(capacity),
33+
values: BitmapBuilder::with_capacity(capacity),
3434
validity: prepare_validity(use_validity, capacity),
3535
}
3636
}
@@ -41,8 +41,8 @@ impl<'a> GrowableBoolean<'a> {
4141

4242
BooleanArray::new(
4343
self.dtype.clone(),
44-
values.into(),
45-
validity.map(|v| v.into()),
44+
values.freeze(),
45+
validity.map(|v| v.freeze()),
4646
)
4747
}
4848
}
@@ -55,11 +55,7 @@ impl<'a> Growable<'a> for GrowableBoolean<'a> {
5555
let values = array.values();
5656

5757
let (slice, offset, _) = values.as_slice();
58-
// SAFETY: invariant offset + length <= slice.len()
59-
unsafe {
60-
self.values
61-
.extend_from_slice_unchecked(slice, start + offset, len);
62-
}
58+
self.values.extend_from_slice(slice, start + offset, len);
6359
}
6460

6561
fn extend_validity(&mut self, additional: usize) {
@@ -85,6 +81,10 @@ impl<'a> Growable<'a> for GrowableBoolean<'a> {
8581

8682
impl<'a> From<GrowableBoolean<'a>> for BooleanArray {
8783
fn from(val: GrowableBoolean<'a>) -> Self {
88-
BooleanArray::new(val.dtype, val.values.into(), val.validity.map(|v| v.into()))
84+
BooleanArray::new(
85+
val.dtype,
86+
val.values.freeze(),
87+
val.validity.map(|v| v.freeze()),
88+
)
8989
}
9090
}

crates/polars-arrow/src/array/growable/dictionary.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::sync::Arc;
33
use super::{make_growable, Growable};
44
use crate::array::growable::utils::{extend_validity, prepare_validity};
55
use crate::array::{Array, DictionaryArray, DictionaryKey, PrimitiveArray};
6-
use crate::bitmap::MutableBitmap;
6+
use crate::bitmap::BitmapBuilder;
77
use crate::datatypes::ArrowDataType;
88

99
/// Concrete [`Growable`] for the [`DictionaryArray`].
@@ -14,7 +14,7 @@ pub struct GrowableDictionary<'a, K: DictionaryKey> {
1414
dtype: ArrowDataType,
1515
keys: Vec<&'a PrimitiveArray<K>>,
1616
key_values: Vec<K>,
17-
validity: Option<MutableBitmap>,
17+
validity: Option<BitmapBuilder>,
1818
offsets: Vec<usize>,
1919
values: Box<dyn Array>,
2020
}
@@ -77,7 +77,7 @@ impl<'a, T: DictionaryKey> GrowableDictionary<'a, T> {
7777
let keys = PrimitiveArray::<T>::new(
7878
T::PRIMITIVE.into(),
7979
key_values.into(),
80-
validity.map(|v| v.into()),
80+
validity.map(|v| v.freeze()),
8181
);
8282

8383
// SAFETY: the invariant of this struct ensures that this is up-held

crates/polars-arrow/src/array/growable/fixed_binary.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@ use std::sync::Arc;
33
use super::Growable;
44
use crate::array::growable::utils::{extend_validity, prepare_validity};
55
use crate::array::{Array, FixedSizeBinaryArray};
6-
use crate::bitmap::MutableBitmap;
6+
use crate::bitmap::BitmapBuilder;
77

88
/// Concrete [`Growable`] for the [`FixedSizeBinaryArray`].
99
pub struct GrowableFixedSizeBinary<'a> {
1010
arrays: Vec<&'a FixedSizeBinaryArray>,
11-
validity: Option<MutableBitmap>,
11+
validity: Option<BitmapBuilder>,
1212
values: Vec<u8>,
1313
size: usize, // just a cache
1414
}
@@ -44,7 +44,7 @@ impl<'a> GrowableFixedSizeBinary<'a> {
4444
FixedSizeBinaryArray::new(
4545
self.arrays[0].dtype().clone(),
4646
values.into(),
47-
validity.map(|v| v.into()),
47+
validity.map(|v| v.freeze()),
4848
)
4949
}
5050
}
@@ -88,7 +88,7 @@ impl<'a> From<GrowableFixedSizeBinary<'a>> for FixedSizeBinaryArray {
8888
FixedSizeBinaryArray::new(
8989
val.arrays[0].dtype().clone(),
9090
val.values.into(),
91-
val.validity.map(|v| v.into()),
91+
val.validity.map(|v| v.freeze()),
9292
)
9393
}
9494
}

crates/polars-arrow/src/array/growable/fixed_size_list.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@ use std::sync::Arc;
33
use super::{make_growable, Growable};
44
use crate::array::growable::utils::{extend_validity, extend_validity_copies, prepare_validity};
55
use crate::array::{Array, FixedSizeListArray};
6-
use crate::bitmap::MutableBitmap;
6+
use crate::bitmap::BitmapBuilder;
77

88
/// Concrete [`Growable`] for the [`FixedSizeListArray`].
99
pub struct GrowableFixedSizeList<'a> {
1010
arrays: Vec<&'a FixedSizeListArray>,
11-
validity: Option<MutableBitmap>,
11+
validity: Option<BitmapBuilder>,
1212
values: Box<dyn Growable<'a> + 'a>,
1313
size: usize,
1414
length: usize,
@@ -61,7 +61,7 @@ impl<'a> GrowableFixedSizeList<'a> {
6161
self.arrays[0].dtype().clone(),
6262
self.length,
6363
values,
64-
validity.map(|v| v.into()),
64+
validity.map(|v| v.freeze()),
6565
)
6666
}
6767
}
@@ -122,7 +122,7 @@ impl<'a> From<GrowableFixedSizeList<'a>> for FixedSizeListArray {
122122
val.arrays[0].dtype().clone(),
123123
val.length,
124124
values,
125-
val.validity.map(|v| v.into()),
125+
val.validity.map(|v| v.freeze()),
126126
)
127127
}
128128
}

crates/polars-arrow/src/array/growable/list.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::sync::Arc;
33
use super::{make_growable, Growable};
44
use crate::array::growable::utils::{extend_validity, prepare_validity};
55
use crate::array::{Array, ListArray};
6-
use crate::bitmap::MutableBitmap;
6+
use crate::bitmap::BitmapBuilder;
77
use crate::offset::{Offset, Offsets};
88

99
unsafe fn extend_offset_values<O: Offset>(
@@ -29,7 +29,7 @@ unsafe fn extend_offset_values<O: Offset>(
2929
/// Concrete [`Growable`] for the [`ListArray`].
3030
pub struct GrowableList<'a, O: Offset> {
3131
arrays: Vec<&'a ListArray<O>>,
32-
validity: Option<MutableBitmap>,
32+
validity: Option<BitmapBuilder>,
3333
values: Box<dyn Growable<'a> + 'a>,
3434
offsets: Offsets<O>,
3535
}
@@ -68,7 +68,7 @@ impl<'a, O: Offset> GrowableList<'a, O> {
6868
self.arrays[0].dtype().clone(),
6969
offsets.into(),
7070
values,
71-
validity.map(|v| v.into()),
71+
validity.map(|v| v.freeze()),
7272
)
7373
}
7474
}

crates/polars-arrow/src/array/growable/primitive.rs

+8-4
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@ use std::sync::Arc;
33
use super::Growable;
44
use crate::array::growable::utils::{extend_validity, extend_validity_copies, prepare_validity};
55
use crate::array::{Array, PrimitiveArray};
6-
use crate::bitmap::MutableBitmap;
6+
use crate::bitmap::BitmapBuilder;
77
use crate::datatypes::ArrowDataType;
88
use crate::types::NativeType;
99

1010
/// Concrete [`Growable`] for the [`PrimitiveArray`].
1111
pub struct GrowablePrimitive<'a, T: NativeType> {
1212
dtype: ArrowDataType,
1313
arrays: Vec<&'a PrimitiveArray<T>>,
14-
validity: Option<MutableBitmap>,
14+
validity: Option<BitmapBuilder>,
1515
values: Vec<T>,
1616
}
1717

@@ -48,7 +48,7 @@ impl<'a, T: NativeType> GrowablePrimitive<'a, T> {
4848
PrimitiveArray::<T>::new(
4949
self.dtype.clone(),
5050
values.into(),
51-
validity.map(|v| v.into()),
51+
validity.map(|v| v.freeze()),
5252
)
5353
}
5454
}
@@ -105,6 +105,10 @@ impl<'a, T: NativeType> Growable<'a> for GrowablePrimitive<'a, T> {
105105
impl<'a, T: NativeType> From<GrowablePrimitive<'a, T>> for PrimitiveArray<T> {
106106
#[inline]
107107
fn from(val: GrowablePrimitive<'a, T>) -> Self {
108-
PrimitiveArray::<T>::new(val.dtype, val.values.into(), val.validity.map(|v| v.into()))
108+
PrimitiveArray::<T>::new(
109+
val.dtype,
110+
val.values.into(),
111+
val.validity.map(|v| v.freeze()),
112+
)
109113
}
110114
}

crates/polars-arrow/src/array/growable/structure.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@ use std::sync::Arc;
33
use super::{make_growable, Growable};
44
use crate::array::growable::utils::{extend_validity, prepare_validity};
55
use crate::array::{Array, StructArray};
6-
use crate::bitmap::MutableBitmap;
6+
use crate::bitmap::BitmapBuilder;
77

88
/// Concrete [`Growable`] for the [`StructArray`].
99
pub struct GrowableStruct<'a> {
1010
arrays: Vec<&'a StructArray>,
1111
length: usize,
12-
validity: Option<MutableBitmap>,
12+
validity: Option<BitmapBuilder>,
1313
values: Vec<Box<dyn Growable<'a> + 'a>>,
1414
}
1515

@@ -62,7 +62,7 @@ impl<'a> GrowableStruct<'a> {
6262
self.arrays[0].dtype().clone(),
6363
self.length,
6464
values,
65-
validity.map(|v| v.into()),
65+
validity.map(|v| v.freeze()),
6666
)
6767
}
6868
}
@@ -129,7 +129,7 @@ impl<'a> From<GrowableStruct<'a>> for StructArray {
129129
val.arrays[0].dtype().clone(),
130130
val.length,
131131
values,
132-
val.validity.map(|v| v.into()),
132+
val.validity.map(|v| v.freeze()),
133133
)
134134
}
135135
}

crates/polars-arrow/src/array/growable/utf8.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@ use super::utils::extend_offset_values;
44
use super::Growable;
55
use crate::array::growable::utils::{extend_validity, prepare_validity};
66
use crate::array::{Array, Utf8Array};
7-
use crate::bitmap::MutableBitmap;
7+
use crate::bitmap::BitmapBuilder;
88
use crate::offset::{Offset, Offsets};
99

1010
/// Concrete [`Growable`] for the [`Utf8Array`].
1111
pub struct GrowableUtf8<'a, O: Offset> {
1212
arrays: Vec<&'a Utf8Array<O>>,
13-
validity: Option<MutableBitmap>,
13+
validity: Option<BitmapBuilder>,
1414
values: Vec<u8>,
1515
offsets: Offsets<O>,
1616
}
@@ -49,7 +49,7 @@ impl<'a, O: Offset> GrowableUtf8<'a, O> {
4949
self.arrays[0].dtype().clone(),
5050
offsets.into(),
5151
values.into(),
52-
validity.map(|v| v.into()),
52+
validity.map(|v| v.freeze()),
5353
)
5454
}
5555
}

0 commit comments

Comments
 (0)