Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 2735683

Browse files
TheNeuralBitwesm
authored andcommittedMay 17, 2019
ARROW-2412: [Integration] Add nested dictionary test case, skipped for now
Adds a test case that contains a dictionary-encoded struct and list, both with dictionary-encoded children. As expected Java, C++ and JS all complain about the generated JSON. Author: Brian Hulette <hulettbh@gmail.com> Author: Brian Hulette <brian.hulette@ccri.com> Closes apache#1848 from TheNeuralBit/nested-dictionary-integration and squashes the following commits: eac016b <Brian Hulette> Remove checks from IPC readers preventing nested dictionaries 0118362 <Brian Hulette> skip nested dictionary test 5e52763 <Brian Hulette> Add nested dictionary test case to the integration test
1 parent 9517d92 commit 2735683

File tree

3 files changed

+35
-4
lines changed

3 files changed

+35
-4
lines changed
 

‎integration/integration_test.py

+31
Original file line numberDiff line numberDiff line change
@@ -994,6 +994,35 @@ def generate_dictionary_case():
994994
dictionaries=[dict0, dict1, dict2])
995995

996996

997+
def generate_nested_dictionary_case():
998+
str_type = StringType('str')
999+
dict0 = Dictionary(0, str_type, str_type.generate_column(10, name='DICT0'))
1000+
1001+
list_type = ListType(
1002+
'list',
1003+
DictionaryType('str_dict', get_field('', 'int8'), dict0))
1004+
dict1 = Dictionary(1,
1005+
list_type,
1006+
list_type.generate_column(30, name='DICT1'))
1007+
1008+
struct_type = StructType('struct', [
1009+
DictionaryType('str_dict_a', get_field('', 'int8'), dict0),
1010+
DictionaryType('str_dict_b', get_field('', 'int8'), dict0)
1011+
])
1012+
dict2 = Dictionary(2,
1013+
struct_type,
1014+
struct_type.generate_column(30, name='DICT2'))
1015+
1016+
fields = [
1017+
DictionaryType('list_dict', get_field('', 'int8'), dict1),
1018+
DictionaryType('struct_dict', get_field('', 'int8'), dict2)
1019+
]
1020+
1021+
batch_sizes = [10, 13]
1022+
return _generate_file("nested_dictionary", fields, batch_sizes,
1023+
dictionaries=[dict0, dict1, dict2])
1024+
1025+
9971026
def get_generated_json_files(tempdir=None, flight=False):
9981027
tempdir = tempdir or tempfile.mkdtemp()
9991028

@@ -1008,6 +1037,8 @@ def _temp_path():
10081037
generate_interval_case(),
10091038
generate_nested_case(),
10101039
generate_dictionary_case().skip_category(SKIP_FLIGHT),
1040+
generate_nested_dictionary_case().skip_category(SKIP_ARROW)
1041+
.skip_category(SKIP_FLIGHT),
10111042
]
10121043

10131044
if flight:

‎js/src/ipc/metadata/json.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ export function fieldFromJSON(_field: any, dictionaries?: Map<number, DataType>,
103103
let dictType: Dictionary;
104104
let dictField: Field<Dictionary>;
105105

106-
// If no dictionary encoding, or in the process of decoding the children of a dictionary-encoded field
106+
// If no dictionary encoding
107107
if (!dictionaries || !dictionaryFields || !(dictMeta = _field['dictionary'])) {
108108
type = typeFromJSON(_field, fieldChildrenFromJSON(_field, dictionaries, dictionaryFields));
109109
field = new Field(_field['name'], type, _field['nullable'], customMetadataFromJSON(_field['customMetadata']));
@@ -115,7 +115,7 @@ export function fieldFromJSON(_field: any, dictionaries?: Map<number, DataType>,
115115
else if (!dictionaries.has(id = dictMeta['id'])) {
116116
// a dictionary index defaults to signed 32 bit int if unspecified
117117
keys = (keys = dictMeta['indexType']) ? indexTypeFromJSON(keys) as TKeys : new Int32();
118-
dictionaries.set(id, type = typeFromJSON(_field, fieldChildrenFromJSON(_field)));
118+
dictionaries.set(id, type = typeFromJSON(_field, fieldChildrenFromJSON(_field, dictionaries, dictionaryFields)));
119119
dictType = new Dictionary(type, keys, id, dictMeta['isOrdered']);
120120
dictField = new Field(_field['name'], dictType, _field['nullable'], customMetadataFromJSON(_field['customMetadata']));
121121
dictionaryFields.set(id, [field = dictField]);

‎js/src/ipc/metadata/message.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ function decodeField(f: _Field, dictionaries?: Map<number, DataType>, dictionary
351351
let dictMeta: _DictionaryEncoding | null;
352352
let dictField: Field<Dictionary>;
353353

354-
// If no dictionary encoding, or in the process of decoding the children of a dictionary-encoded field
354+
// If no dictionary encoding
355355
if (!dictionaries || !dictionaryFields || !(dictMeta = f.dictionary())) {
356356
type = decodeFieldType(f, decodeFieldChildren(f, dictionaries, dictionaryFields));
357357
field = new Field(f.name()!, type, f.nullable(), decodeCustomMetadata(f));
@@ -363,7 +363,7 @@ function decodeField(f: _Field, dictionaries?: Map<number, DataType>, dictionary
363363
else if (!dictionaries.has(id = dictMeta.id().low)) {
364364
// a dictionary index defaults to signed 32 bit int if unspecified
365365
keys = (keys = dictMeta.indexType()) ? decodeIndexType(keys) as TKeys : new Int32();
366-
dictionaries.set(id, type = decodeFieldType(f, decodeFieldChildren(f)));
366+
dictionaries.set(id, type = decodeFieldType(f, decodeFieldChildren(f, dictionaries, dictionaryFields)));
367367
dictType = new Dictionary(type, keys, id, dictMeta.isOrdered());
368368
dictField = new Field(f.name()!, dictType, f.nullable(), decodeCustomMetadata(f));
369369
dictionaryFields.set(id, [field = dictField]);

0 commit comments

Comments
 (0)
Please sign in to comment.