Skip to content

Commit f0879ed

Browse files
apacheGH-35718: [Go][Parquet] Fix for null-only encoding panic (apache#39497)
### Rationale for this change closes: apache#35718 ### What changes are included in this PR? Fix painc writing with DeltaBinaryPacked or DeltaByteArray when column only has nulls ### Are these changes tested? Yes - add a test writing nulls to columns with DeltaBinaryPacked / DeltaByteArray / DeltaLengthByteArray encodings ### Are there any user-facing changes? No * Closes: apache#35718 Lead-authored-by: yufanmo <[email protected]> Co-authored-by: Matt Topol <[email protected]> Signed-off-by: Matt Topol <[email protected]>
1 parent eade938 commit f0879ed

File tree

2 files changed

+67
-1
lines changed

2 files changed

+67
-1
lines changed

go/parquet/internal/encoding/delta_byte_array.go

+9-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,15 @@ type DeltaByteArrayEncoder struct {
4040
}
4141

4242
func (enc *DeltaByteArrayEncoder) EstimatedDataEncodedSize() int64 {
43-
return enc.prefixEncoder.EstimatedDataEncodedSize() + enc.suffixEncoder.EstimatedDataEncodedSize()
43+
prefixEstimatedSize := int64(0)
44+
if enc.prefixEncoder != nil {
45+
prefixEstimatedSize = enc.prefixEncoder.EstimatedDataEncodedSize()
46+
}
47+
suffixEstimatedSize := int64(0)
48+
if enc.suffixEncoder != nil {
49+
suffixEstimatedSize = enc.suffixEncoder.EstimatedDataEncodedSize()
50+
}
51+
return prefixEstimatedSize + suffixEstimatedSize
4452
}
4553

4654
func (enc *DeltaByteArrayEncoder) initEncoders() {

go/parquet/pqarrow/encode_arrow_test.go

+58
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,64 @@ func TestWriteEmptyLists(t *testing.T) {
473473
require.NoError(t, err)
474474
}
475475

476+
func TestWriteAllNullsWithDeltaEncoding(t *testing.T) {
477+
sc := arrow.NewSchema([]arrow.Field{
478+
{Name: "f1", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
479+
{Name: "f2", Type: arrow.ListOf(arrow.FixedWidthTypes.Date32)},
480+
{Name: "f3", Type: arrow.BinaryTypes.String, Nullable: true},
481+
{Name: "f4", Type: arrow.ListOf(arrow.BinaryTypes.String)},
482+
{Name: "f5", Type: arrow.BinaryTypes.LargeString, Nullable: true},
483+
{Name: "f6", Type: arrow.ListOf(arrow.BinaryTypes.LargeString)},
484+
{Name: "f7", Type: arrow.PrimitiveTypes.Float64, Nullable: true},
485+
{Name: "f8", Type: arrow.ListOf(arrow.FixedWidthTypes.Date64)},
486+
{Name: "f9", Type: arrow.BinaryTypes.String, Nullable: true},
487+
{Name: "f10", Type: arrow.ListOf(arrow.BinaryTypes.LargeString)},
488+
{Name: "f11", Type: arrow.FixedWidthTypes.Boolean, Nullable: true},
489+
{Name: "f12", Type: arrow.ListOf(arrow.FixedWidthTypes.Boolean)},
490+
{Name: "f13", Type: arrow.PrimitiveTypes.Int32, Nullable: true},
491+
{Name: "f14", Type: arrow.ListOf(arrow.PrimitiveTypes.Float32)},
492+
}, nil)
493+
bldr := array.NewRecordBuilder(memory.DefaultAllocator, sc)
494+
defer bldr.Release()
495+
for _, b := range bldr.Fields() {
496+
b.AppendNull()
497+
}
498+
499+
rec := bldr.NewRecord()
500+
defer rec.Release()
501+
502+
props := parquet.NewWriterProperties(
503+
parquet.WithVersion(parquet.V1_0),
504+
parquet.WithDictionaryDefault(false),
505+
parquet.WithDictionaryFor("f9", true),
506+
parquet.WithDictionaryFor("f10", true),
507+
parquet.WithDictionaryFor("f13", true),
508+
parquet.WithDictionaryFor("f14", true),
509+
parquet.WithEncodingFor("f1", parquet.Encodings.DeltaBinaryPacked),
510+
parquet.WithEncodingFor("f2", parquet.Encodings.DeltaBinaryPacked),
511+
parquet.WithEncodingFor("f3", parquet.Encodings.DeltaByteArray),
512+
parquet.WithEncodingFor("f4", parquet.Encodings.DeltaByteArray),
513+
parquet.WithEncodingFor("f5", parquet.Encodings.DeltaLengthByteArray),
514+
parquet.WithEncodingFor("f6", parquet.Encodings.DeltaLengthByteArray),
515+
parquet.WithEncodingFor("f7", parquet.Encodings.Plain),
516+
parquet.WithEncodingFor("f8", parquet.Encodings.Plain),
517+
parquet.WithEncodingFor("f9", parquet.Encodings.Plain),
518+
parquet.WithEncodingFor("f10", parquet.Encodings.Plain),
519+
parquet.WithEncodingFor("f11", parquet.Encodings.RLE),
520+
parquet.WithEncodingFor("f12", parquet.Encodings.RLE),
521+
parquet.WithEncodingFor("f13", parquet.Encodings.RLE),
522+
parquet.WithEncodingFor("f14", parquet.Encodings.RLE),
523+
)
524+
arrprops := pqarrow.DefaultWriterProps()
525+
var buf bytes.Buffer
526+
fw, err := pqarrow.NewFileWriter(sc, &buf, props, arrprops)
527+
require.NoError(t, err)
528+
err = fw.Write(rec)
529+
require.NoError(t, err)
530+
err = fw.Close()
531+
require.NoError(t, err)
532+
}
533+
476534
func TestArrowReadWriteTableChunkedCols(t *testing.T) {
477535
chunkSizes := []int{2, 4, 10, 2}
478536
const totalLen = int64(18)

0 commit comments

Comments
 (0)