Skip to content

Commit 14c673e

Browse files
authored
chore: Fix the verify_dict_indices codegen (#20920)
1 parent 64a01e7 commit 14c673e

File tree

2 files changed

+10
-45
lines changed

2 files changed

+10
-45
lines changed

crates/polars-parquet/src/arrow/read/deserialize/dictionary_encoded/mod.rs

+7-39
Original file line numberDiff line numberDiff line change
@@ -179,52 +179,20 @@ fn no_more_bitpacked_values() -> ParquetError {
179179
}
180180

181181
#[inline(always)]
182-
#[cfg(feature = "simd")]
183-
fn verify_dict_indices(indices: &[u32; 32], dict_size: usize) -> ParquetResult<()> {
184-
// You would think that the compiler can do this itself, but it does not always do this
185-
// properly. So we help it a bit.
182+
fn verify_dict_indices(indices: &[u32], dict_size: usize) -> ParquetResult<()> {
183+
debug_assert!(dict_size <= u32::MAX as usize);
184+
let dict_size = dict_size as u32;
186185

187-
use std::simd::cmp::SimdPartialOrd;
188-
use std::simd::u32x32;
189-
190-
let dict_size = u32x32::splat(dict_size as u32);
191-
let indices = u32x32::from_slice(indices);
192-
193-
let is_invalid = indices.simd_ge(dict_size);
194-
if is_invalid.any() {
195-
Err(oob_dict_idx())
196-
} else {
197-
Ok(())
198-
}
199-
}
200-
201-
#[inline(always)]
202-
#[cfg(not(feature = "simd"))]
203-
fn verify_dict_indices(indices: &[u32; 32], dict_size: usize) -> ParquetResult<()> {
204186
let mut is_valid = true;
205187
for &idx in indices {
206-
is_valid &= (idx as usize) < dict_size;
188+
is_valid &= idx < dict_size;
207189
}
208190

209191
if is_valid {
210-
return Ok(());
211-
}
212-
213-
Err(oob_dict_idx())
214-
}
215-
216-
#[inline(always)]
217-
fn verify_dict_indices_slice(indices: &[u32], dict_size: usize) -> ParquetResult<()> {
218-
let mut is_valid = true;
219-
for &idx in indices {
220-
is_valid &= (idx as usize) < dict_size;
221-
}
222-
223-
if is_valid {
224-
return Ok(());
192+
Ok(())
193+
} else {
194+
Err(oob_dict_idx())
225195
}
226-
227-
Err(oob_dict_idx())
228196
}
229197

230198
/// Skip over entire chunks in a [`HybridRleDecoder`] as long as all skipped chunks do not include

crates/polars-parquet/src/arrow/read/deserialize/dictionary_encoded/required.rs

+3-6
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
use arrow::types::AlignedBytes;
22

3-
use super::{
4-
oob_dict_idx, required_skip_whole_chunks, verify_dict_indices, verify_dict_indices_slice,
5-
IndexMapping,
6-
};
3+
use super::{oob_dict_idx, required_skip_whole_chunks, verify_dict_indices, IndexMapping};
74
use crate::parquet::encoding::hybrid_rle::{HybridRleChunk, HybridRleDecoder};
85
use crate::parquet::error::ParquetResult;
96

@@ -55,7 +52,7 @@ pub fn decode<B: AlignedBytes, D: IndexMapping<Output = B>>(
5552

5653
if let Some((chunk, chunk_size)) = decoder.chunked().next_inexact() {
5754
let chunk = &chunk[num_rows_to_skip..chunk_size];
58-
verify_dict_indices_slice(chunk, dict.len())?;
55+
verify_dict_indices(chunk, dict.len())?;
5956
target.extend(chunk.iter().map(|&idx| {
6057
// SAFETY: The dict indices were verified before.
6158
unsafe { dict.get_unchecked(idx) }
@@ -73,7 +70,7 @@ pub fn decode<B: AlignedBytes, D: IndexMapping<Output = B>>(
7370
}
7471

7572
if let Some((chunk, chunk_size)) = chunked.remainder() {
76-
verify_dict_indices_slice(&chunk[..chunk_size], dict.len())?;
73+
verify_dict_indices(&chunk[..chunk_size], dict.len())?;
7774
target.extend(chunk[..chunk_size].iter().map(|&idx| {
7875
// SAFETY: The dict indices were verified before.
7976
unsafe { dict.get_unchecked(idx) }

0 commit comments

Comments
 (0)