Skip to content

Commit 96c45c4

Browse files
authored
feat: form from type functions (#106)
* form from type functions * add check on iterable type * add test * correct numpy type * fix offsets type * add test * add module * move function out * add more functions * try to fix Symbol issue * add more tests * cleanup and add more tests * add tree form test function * add tests * fix FieldError * use FieldError for nightly only * fix typo * one more * last one
1 parent 3993678 commit 96c45c4

File tree

4 files changed

+346
-3
lines changed

4 files changed

+346
-3
lines changed

src/AwkwardArray.jl

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import Tables
99

1010
include("./all_implementations.jl")
1111
include("./tables.jl")
12+
include("./form_utils.jl")
1213

1314
include("./AwkwardPythonCallExt.jl")
1415
using .AwkwardPythonCallExt: convert

src/form_utils.jl

+232
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
2+
# Define a dictionary mapping Julia types to NumPy types
3+
const julia_to_numpy = Dict(
4+
Int8 => "int8",
5+
UInt8 => "uint8",
6+
Int16 => "int16",
7+
UInt16 => "uint16",
8+
Int32 => "int32",
9+
UInt32 => "uint32",
10+
Int64 => "int64",
11+
UInt64 => "uint64",
12+
Float16 => "float16",
13+
Float32 => "float32",
14+
Float64 => "float64",
15+
Bool => "bool",
16+
Complex{Float32} => "complex64",
17+
Complex{Float64} => "complex128",
18+
String => "str"
19+
)
20+
21+
# Function to get the corresponding NumPy type
22+
function julia_to_numpy_type(julia_type::Type)
23+
result = get(julia_to_numpy, julia_type, "unknown")
24+
return String(result)
25+
end
26+
27+
# Function to generate form key
28+
function _generate_form_key!(form_key_id_ref::Base.RefValue{Int64})
29+
form_key_id = form_key_id_ref[]
30+
form_key_id_ref[] += 1
31+
return "node$form_key_id"
32+
end
33+
34+
function json_numpy_form(parameters::String, form_key::String)
35+
return "{\"class\": \"NumpyArray\", \"primitive\": \"" * parameters *
36+
"\"form_key\": \"" * form_key * "\"}"
37+
end
38+
39+
# Function for handling primitive types
40+
function type_to_form(::Type{T}, form_key_id::Int64=0) where {T <: Integer}
41+
form_key = "node$(form_key_id)"
42+
form_key_id += 1
43+
44+
parameters = julia_to_numpy_type(T) * "\", "
45+
46+
return json_numpy_form(parameters, form_key)
47+
end
48+
49+
function type_to_form(::Type{T}, form_key_id_ref::Base.RefValue{Int64}) where {T <: Integer}
50+
form_key = _generate_form_key!(form_key_id_ref)
51+
52+
parameters = julia_to_numpy_type(T) * "\", "
53+
54+
return json_numpy_form(parameters, form_key)
55+
end
56+
57+
function type_to_form(::Type{T}, form_key_id::Int64=0) where {T <: AbstractFloat}
58+
form_key = "node$(form_key_id)"
59+
form_key_id += 1
60+
61+
parameters = julia_to_numpy_type(T) * "\", "
62+
63+
return json_numpy_form(parameters, form_key)
64+
end
65+
66+
function type_to_form(::Type{T}, form_key_id_ref::Base.RefValue{Int64}) where {T <: AbstractFloat}
67+
form_key = _generate_form_key!(form_key_id_ref)
68+
69+
parameters = julia_to_numpy_type(T) * "\", "
70+
71+
return json_numpy_form(parameters, form_key)
72+
end
73+
74+
function type_to_form(::Type{T}, form_key_id::Int64=0) where {T <: Bool}
75+
form_key = "node$(form_key_id)"
76+
form_key_id += 1
77+
78+
parameters = julia_to_numpy_type(T) * "\", "
79+
80+
return json_numpy_form(parameters, form_key)
81+
end
82+
83+
function type_to_form(::Type{T}, form_key_id_ref::Base.RefValue{Int64}) where {T <: Bool}
84+
form_key = _generate_form_key!(form_key_id_ref)
85+
86+
parameters = julia_to_numpy_type(T) * "\", "
87+
88+
return json_numpy_form(parameters, form_key)
89+
end
90+
91+
function type_to_form(::Type{T}, form_key_id::Int64=0) where {T <: Char}
92+
form_key = "node$(form_key_id)"
93+
form_key_id += 1
94+
95+
parameters = "uint8\", \"parameters\": { \"__array__\": \"char\" }, "
96+
97+
return json_numpy_form(parameters, form_key)
98+
end
99+
100+
function type_to_form(::Type{T}, form_key_id_ref::Base.RefValue{Int64}) where {T <: Char}
101+
form_key = _generate_form_key!(form_key_id_ref)
102+
103+
parameters = "uint8\", \"parameters\": { \"__array__\": \"char\" }, "
104+
105+
return json_numpy_form(parameters, form_key)
106+
end
107+
108+
function type_to_form(::Type{T}, form_key_id::Int64=0) where {T <: String}
109+
value_type = eltype(T)
110+
form_key = "node$(form_key_id)"
111+
form_key_id += 1
112+
113+
parameters = " \"parameters\": { \"__array__\": \"string\" }, "
114+
115+
content = type_to_form(value_type, form_key_id)
116+
117+
return "{\"class\": \"ListOffsetArray\", \"offsets\": \"" *
118+
type_to_numpy_like(T) * "\", " *
119+
"\"content\": " * content * ", " * parameters *
120+
"\"form_key\": \"" * form_key * "\"}"
121+
end
122+
123+
function type_to_form(::Type{T}, form_key_id_ref::Base.RefValue{Int64}) where {T <: String}
124+
value_type = eltype(T)
125+
form_key = _generate_form_key!(form_key_id_ref)
126+
127+
parameters = " \"parameters\": { \"__array__\": \"string\" }, "
128+
129+
content = type_to_form(value_type, form_key_id_ref)
130+
131+
return "{\"class\": \"ListOffsetArray\", \"offsets\": \"" *
132+
type_to_numpy_like(T) * "\", " *
133+
"\"content\": " * content * ", " * parameters *
134+
"\"form_key\": \"" * form_key * "\"}"
135+
end
136+
137+
# Function to handle specific Vector types
138+
function type_to_form(::Type{Vector{T}}, form_key_id::Int64=0) where {T}
139+
element_type = T
140+
content_form = type_to_form(element_type, form_key_id + 1)
141+
return "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", " *
142+
"\"content\": " * content_form * ", " *
143+
"\"form_key\": \"node$(form_key_id)\"}"
144+
end
145+
146+
function type_to_form(::Type{Vector{T}}, form_key_id_ref::Base.RefValue{Int64}) where {T}
147+
element_type = T
148+
form_key = _generate_form_key!(form_key_id_ref)
149+
150+
content_form = type_to_form(element_type, form_key_id_ref)
151+
return "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", " *
152+
"\"content\": " * content_form * ", " *
153+
"\"form_key\": \"" * form_key * "\"}"
154+
end
155+
156+
# Function for handling iterable types
157+
function type_to_form(::Type{T}, form_key_id::Int64=0) where {T <: AbstractVector}
158+
value_type = eltype(T)
159+
form_key = "node$(form_key_id)"
160+
form_key_id += 1
161+
162+
parameters = ""
163+
if value_type == Char
164+
parameters = " \"parameters\": { \"__array__\": \"string\" }, "
165+
end
166+
167+
content = type_to_form(value_type, form_key_id)
168+
169+
return "{\"class\": \"ListOffsetArray\", \"offsets\": \"" *
170+
type_to_numpy_like(T) * "\", " *
171+
"\"content\": " * content * ", " * parameters *
172+
"\"form_key\": \"" * form_key * "\"}"
173+
end
174+
175+
function type_to_form(::Type{T}, form_key_id_ref::Base.RefValue{Int64}) where {T <: AbstractVector}
176+
value_type = eltype(T)
177+
form_key = _generate_form_key!(form_key_id_ref)
178+
179+
parameters = ""
180+
if value_type == Char
181+
parameters = " \"parameters\": { \"__array__\": \"string\" }, "
182+
end
183+
184+
content = type_to_form(value_type, form_key_id_ref)
185+
186+
return "{\"class\": \"ListOffsetArray\", \"offsets\": \"" *
187+
type_to_numpy_like(T) * "\", " *
188+
"\"content\": " * content * ", " * parameters *
189+
"\"form_key\": \"" * form_key * "\"}"
190+
end
191+
192+
# Fallback function for unsupported types
193+
function type_to_form(::Type{T}, ::Int64) where {T}
194+
error("Type '$T' is not supported yet.")
195+
end
196+
197+
function type_to_form(::Type{T}, ::Base.RefValue{Int64}) where {T}
198+
error("Type '$T' is not supported yet.")
199+
end
200+
201+
# Helper function for type_to_numpy_like (placeholder implementation)
202+
function type_to_numpy_like(::Type{T}) where {T}
203+
return "int64" # Placeholder implementation
204+
end
205+
206+
# A RecordArray form of all tree brunches
207+
function tree_branches_type(tree, form_key_id::Int64=0)
208+
form = """{"class": "RecordArray", "fields": ["""
209+
form_fields = ""
210+
form_contents = ""
211+
212+
id = form_key_id
213+
id_ref = Ref(id)
214+
215+
for name in propertynames(tree)
216+
form_fields *= """$name, """
217+
branch = getproperty(tree, name)
218+
branch_type = eltype(branch)
219+
form_contents *= type_to_form(branch_type, id_ref) * ", "
220+
end
221+
222+
# Removing the trailing comma and space
223+
form_fields = replace(rstrip(form_fields), r",\s*$" => "")
224+
form_contents = replace(rstrip(form_contents), r",\s*$" => "")
225+
226+
form *= form_fields * """], "contents": [""" * form_contents
227+
form *= """], "parameters": {}, "form_key": \"""" *
228+
_generate_form_key!(id_ref) * "\"}"
229+
230+
return form
231+
end
232+

test/runtests.jl

+20-3
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,13 @@ end
313313
layout = AwkwardArray.ListOffsetArray([1, 2, 5], content_layout)
314314

315315
@test layout[:a] == [[2], [3, 4, 5]]
316-
@test_throws ErrorException getindex(layout, :invalid)
316+
317+
if VERSION >= v"1.12.0-DEV"
318+
@test_throws FieldError getindex(layout, :invalid)
319+
else
320+
@test_throws ErrorException getindex(layout, :invalid)
321+
end
322+
317323
@test_throws AssertionError getindex(layout[:a], :invalid)
318324
end
319325

@@ -490,7 +496,12 @@ end
490496
layout = AwkwardArray.ListArray([1, 2, 5], [2, 5, 5], content_layout)
491497

492498
@test layout[:a] == [[2], [3, 4, 5], []]
493-
@test_throws ErrorException getindex(layout, :invalid)
499+
if VERSION >= v"1.12.0-DEV"
500+
@test_throws FieldError getindex(layout, :invalid)
501+
else
502+
@test_throws ErrorException getindex(layout, :invalid)
503+
end
504+
494505
@test_throws AssertionError getindex(layout[:a], :invalid)
495506
end
496507
end
@@ -709,7 +720,12 @@ end
709720
layout = AwkwardArray.RegularArray(content_layout, 2)
710721

711722
@test layout[:a] == [[1, 2], [3, 4]]
712-
@test_throws ErrorException getindex(layout, :invalid)
723+
if VERSION >= v"1.12.0-DEV"
724+
@test_throws FieldError getindex(layout, :invalid)
725+
else
726+
@test_throws ErrorException getindex(layout, :invalid)
727+
end
728+
713729
@test_throws AssertionError getindex(layout[:a], :invalid)
714730
end
715731
end
@@ -3721,3 +3737,4 @@ end # @testset "AwkwardArray.jl"
37213737
end # @testset "Tables.jl"
37223738

37233739
include("./runpytests.jl")
3740+
include("./test_106_form_from_type.jl")

test/test_106_form_from_type.jl

+93
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
2+
@testset "Form from type" begin
3+
4+
@test AwkwardArray.type_to_form(Bool, 1) == """{"class": "NumpyArray", "primitive": "bool", "form_key": "node1"}"""
5+
@test AwkwardArray.type_to_form(Int, 1) == """{"class": "NumpyArray", "primitive": "int64", "form_key": "node1"}"""
6+
@test AwkwardArray.type_to_form(Int32, 1) == """{"class": "NumpyArray", "primitive": "int32", "form_key": "node1"}"""
7+
@test AwkwardArray.type_to_form(Int32, 0) == """{"class": "NumpyArray", "primitive": "int32", "form_key": "node0"}"""
8+
@test AwkwardArray.type_to_form(Int64, 1) == "{\"class\": \"NumpyArray\", \"primitive\": \"int64\", \"form_key\": \"node1\"}"
9+
@test AwkwardArray.type_to_form(Char, 1) == "{\"class\": \"NumpyArray\", \"primitive\": \"uint8\", \"parameters\": { \"__array__\": \"char\" }, \"form_key\": \"node1\"}"
10+
@test AwkwardArray.type_to_form(String, 1) == "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", \"content\": {\"class\": \"NumpyArray\", \"primitive\": \"uint8\", \"parameters\": { \"__array__\": \"char\" }, \"form_key\": \"node2\"}, \"parameters\": { \"__array__\": \"string\" }, \"form_key\": \"node1\"}"
11+
12+
@test AwkwardArray.type_to_form(Vector{Int}, 1) == "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", \"content\": {\"class\": \"NumpyArray\", \"primitive\": \"int64\", \"form_key\": \"node2\"}, \"form_key\": \"node1\"}"
13+
@test AwkwardArray.type_to_form(Vector{Int32}, 1) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int32", "form_key": "node2"}, "form_key": "node1"}"""
14+
@test AwkwardArray.type_to_form(Vector{Int64}, 1) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int64", "form_key": "node2"}, "form_key": "node1"}"""
15+
@test AwkwardArray.type_to_form(Vector{Float32}, 1) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float32", "form_key": "node2"}, "form_key": "node1"}"""
16+
@test AwkwardArray.type_to_form(Vector{Float64}, 1) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float64", "form_key": "node2"}, "form_key": "node1"}"""
17+
18+
@test AwkwardArray.type_to_form(Vector{Vector{Int}}, 0) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int64", "form_key": "node2"}, "form_key": "node1"}, "form_key": "node0"}"""
19+
@test AwkwardArray.type_to_form(Vector{Vector{Int32}}, 0) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int32", "form_key": "node2"}, "form_key": "node1"}, "form_key": "node0"}"""
20+
@test AwkwardArray.type_to_form(Vector{Vector{Int64}}, 0) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int64", "form_key": "node2"}, "form_key": "node1"}, "form_key": "node0"}"""
21+
@test AwkwardArray.type_to_form(Vector{Vector{Float32}}, 0) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float32", "form_key": "node2"}, "form_key": "node1"}, "form_key": "node0"}"""
22+
@test AwkwardArray.type_to_form(Vector{Vector{Float64}}, 0) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float64", "form_key": "node2"}, "form_key": "node1"}, "form_key": "node0"}"""
23+
24+
@test AwkwardArray.type_to_form(SubArray{Int32, 1, nothing}, 1) == "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", \"content\": {\"class\": \"NumpyArray\", \"primitive\": \"int32\", \"form_key\": \"node2\"}, \"form_key\": \"node1\"}"
25+
26+
end
27+
28+
@testset "Form from type with an id reference" begin
29+
id = 1
30+
id_ref = Ref(id)
31+
32+
@test AwkwardArray.type_to_form(Bool, id_ref) == """{"class": "NumpyArray", "primitive": "bool", "form_key": "node1"}"""
33+
@test AwkwardArray.type_to_form(Int, id_ref) == """{"class": "NumpyArray", "primitive": "int64", "form_key": "node2"}"""
34+
@test AwkwardArray.type_to_form(Int32, id_ref) == """{"class": "NumpyArray", "primitive": "int32", "form_key": "node3"}"""
35+
@test AwkwardArray.type_to_form(Int32, id_ref) == """{"class": "NumpyArray", "primitive": "int32", "form_key": "node4"}"""
36+
@test AwkwardArray.type_to_form(Int64, id_ref) == "{\"class\": \"NumpyArray\", \"primitive\": \"int64\", \"form_key\": \"node5\"}"
37+
@test AwkwardArray.type_to_form(Char, id_ref) == "{\"class\": \"NumpyArray\", \"primitive\": \"uint8\", \"parameters\": { \"__array__\": \"char\" }, \"form_key\": \"node6\"}"
38+
@test AwkwardArray.type_to_form(String, id_ref) == "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", \"content\": {\"class\": \"NumpyArray\", \"primitive\": \"uint8\", \"parameters\": { \"__array__\": \"char\" }, \"form_key\": \"node8\"}, \"parameters\": { \"__array__\": \"string\" }, \"form_key\": \"node7\"}"
39+
40+
@test AwkwardArray.type_to_form(Vector{Int}, id_ref) == "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", \"content\": {\"class\": \"NumpyArray\", \"primitive\": \"int64\", \"form_key\": \"node10\"}, \"form_key\": \"node9\"}"
41+
@test AwkwardArray.type_to_form(Vector{Int32}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int32", "form_key": "node12"}, "form_key": "node11"}"""
42+
@test AwkwardArray.type_to_form(Vector{Int64}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int64", "form_key": "node14"}, "form_key": "node13"}"""
43+
@test AwkwardArray.type_to_form(Vector{Float32}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float32", "form_key": "node16"}, "form_key": "node15"}"""
44+
@test AwkwardArray.type_to_form(Vector{Float64}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float64", "form_key": "node18"}, "form_key": "node17"}"""
45+
46+
@test AwkwardArray.type_to_form(Vector{Vector{Int}}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int64", "form_key": "node21"}, "form_key": "node20"}, "form_key": "node19"}"""
47+
@test AwkwardArray.type_to_form(Vector{Vector{Int32}}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int32", "form_key": "node24"}, "form_key": "node23"}, "form_key": "node22"}"""
48+
@test AwkwardArray.type_to_form(Vector{Vector{Int64}}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "int64", "form_key": "node27"}, "form_key": "node26"}, "form_key": "node25"}"""
49+
@test AwkwardArray.type_to_form(Vector{Vector{Float32}}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float32", "form_key": "node30"}, "form_key": "node29"}, "form_key": "node28"}"""
50+
@test AwkwardArray.type_to_form(Vector{Vector{Float64}}, id_ref) == """{"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "ListOffsetArray", "offsets": "int64", "content": {"class": "NumpyArray", "primitive": "float64", "form_key": "node33"}, "form_key": "node32"}, "form_key": "node31"}"""
51+
52+
@test AwkwardArray.type_to_form(SubArray{Int32, 1, nothing}, id_ref) == "{\"class\": \"ListOffsetArray\", \"offsets\": \"int64\", \"content\": {\"class\": \"NumpyArray\", \"primitive\": \"int32\", \"form_key\": \"node35\"}, \"form_key\": \"node34\"}"
53+
54+
end
55+
56+
@testset "type_to_form error tests" begin
57+
try
58+
AwkwardArray.type_to_form(Any, 1)
59+
@test false # This line should never be reached
60+
catch e
61+
@test isa(e, ErrorException)
62+
@test occursin("Type 'Any' is not supported yet.", e.msg)
63+
end
64+
65+
try
66+
AwkwardArray.type_to_form(Nothing, 1)
67+
@test false # This line should never be reached
68+
catch e
69+
@test isa(e, ErrorException)
70+
@test occursin("Type 'Nothing' is not supported yet.", e.msg)
71+
end
72+
end
73+
74+
@testset "Key generate" begin
75+
begin
76+
id = 1
77+
id_ref = Ref(id)
78+
node_key = AwkwardArray._generate_form_key!(id_ref)
79+
@test node_key == "node1"
80+
@test id_ref[] == 2
81+
end
82+
end
83+
84+
@testset "tree_branches_type tests" begin
85+
mutable struct TestTree
86+
field1::Vector{Int}
87+
field2::Vector{Int}
88+
end
89+
90+
tree = TestTree([1, 2, 3], [4, 5, 6])
91+
expected_form = """{"class": "RecordArray", "fields": [field1, field2], "contents": [{"class": "NumpyArray", "primitive": "int64", "form_key": "node0"}, {"class": "NumpyArray", "primitive": "int64", "form_key": "node1"}], "parameters": {}, "form_key": "node2"}"""
92+
@test AwkwardArray.tree_branches_type(tree) == expected_form
93+
end

0 commit comments

Comments
 (0)