Skip to content

Commit fd1144b

Browse files
JoaoApariciojoao-maven
authored andcommitted
Add @testsets for misc tests (apache#421)
Tests under "misc" aren't bundled into testsets. As a consequence, tests terminate when the first misc test fails, making it difficult to get a good picture of how many failing misc tests remain. In this commit I've bundled misc tests into testsets. For example, in (unrelated) work that I'm doing I can see what's still missing: ![with_testsets](https://user-images.githubusercontent.com/5380486/230786895-94854bda-b14d-4744-87a7-6cb315010282.jpeg) Now I can see I have 5 errors out of 127 total misc tests. Before it would just show 1 error out of 69 misc tests, as it was stopping early. Does anyone else like this? Co-authored-by: Joao Aparicio <[email protected]>
1 parent 3e0fff3 commit fd1144b

File tree

1 file changed

+86
-37
lines changed

1 file changed

+86
-37
lines changed

test/runtests.jl

Lines changed: 86 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ end # @testset "abstract path"
110110

111111
@testset "misc" begin
112112

113-
# multiple record batches
113+
@testset "# multiple record batches" begin
114114
t = Tables.partitioner(((col1=Union{Int64, Missing}[1,2,3,4,5,6,7,8,9,missing],), (col1=Union{Int64, Missing}[missing,11],)))
115115
io = Arrow.tobuffer(t)
116116
tt = Arrow.Table(io)
@@ -138,8 +138,9 @@ tt, st = state
138138

139139
@test isequal(collect(str)[1].col1, [1,2,3,4,5,6,7,8,9,missing])
140140
@test isequal(collect(str)[2].col1, [missing,11])
141+
end
141142

142-
# dictionary batch isDelta
143+
@testset "# dictionary batch isDelta" begin
143144
t = (
144145
col1=Int64[1,2,3,4],
145146
col2=Union{String, Missing}["hey", "there", "sailor", missing],
@@ -155,7 +156,9 @@ tt = Arrow.Table(Arrow.tobuffer(tt; dictencode=true, dictencodenested=true))
155156
@test tt.col1 == [1,2,3,4,1,2,5,6]
156157
@test isequal(tt.col2, ["hey", "there", "sailor", missing, "hey", "there", "sailor2", missing])
157158
@test isequal(tt.col3, vcat(NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(3), b=(c="sailor",)), (a=Int64(4), b=(c="jo-bob",))], NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(5), b=(c="sailor2",)), (a=Int64(4), b=(c="jo-bob",))]))
159+
end
158160

161+
@testset "metadata" begin
159162
t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
160163
meta = Dict("key1" => "value1", "key2" => "value2")
161164
meta2 = Dict("colkey1" => "colvalue1", "colkey2" => "colvalue2")
@@ -176,8 +179,9 @@ tt = Arrow.Table(Arrow.tobuffer(t; colmetadata=Dict(:col2 => meta2, :col3 => met
176179
@test Arrow.getmetadata(tt.col2)["colkey1"] == "colvalue1"
177180
@test Arrow.getmetadata(tt.col2)["colkey2"] == "colvalue2"
178181
@test Arrow.getmetadata(tt.col3)["colkey3"] == "colvalue3"
182+
end
179183

180-
# custom compressors
184+
@testset "# custom compressors" begin
181185
lz4 = Arrow.CodecLz4.LZ4FrameCompressor(; compressionlevel=8)
182186
Arrow.CodecLz4.TranscodingStreams.initialize(lz4)
183187
t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
@@ -191,69 +195,80 @@ t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
191195
tt = Arrow.Table(Arrow.tobuffer(t; compress=zstd))
192196
@test length(tt) == length(t)
193197
@test all(isequal.(values(t), values(tt)))
198+
end
194199

195-
# custom alignment
200+
@testset "# custom alignment" begin
196201
t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
197202
tt = Arrow.Table(Arrow.tobuffer(t; alignment=64))
198203
@test length(tt) == length(t)
199204
@test all(isequal.(values(t), values(tt)))
205+
end
200206

201-
# 53
207+
@testset "# 53" begin
202208
s = "a" ^ 100
203209
t = (a=[SubString(s, 1:10), SubString(s, 11:20)],)
204210
tt = Arrow.Table(Arrow.tobuffer(t))
205211
@test tt.a == ["aaaaaaaaaa", "aaaaaaaaaa"]
212+
end
206213

207-
# 49
214+
@testset "# 49" begin
208215
@test_throws SystemError Arrow.Table("file_that_doesnt_exist")
209216
@test_throws SystemError Arrow.Table(p"file_that_doesnt_exist")
217+
end
210218

211-
# 52
219+
@testset "# 52" begin
212220
t = (a=Arrow.DictEncode(string.(1:129)),)
213221
tt = Arrow.Table(Arrow.tobuffer(t))
222+
end
214223

215-
# 60: unequal column lengths
224+
@testset "# 60: unequal column lengths" begin
216225
io = IOBuffer()
217226
@test_throws ArgumentError Arrow.write(io, (a = Int[], b = ["asd"], c=collect(1:100)))
227+
end
218228

219-
# nullability of custom extension types
229+
@testset "# nullability of custom extension types" begin
220230
t = (a=['a', missing],)
221231
tt = Arrow.Table(Arrow.tobuffer(t))
222232
@test isequal(tt.a, ['a', missing])
233+
end
223234

224-
# automatic custom struct serialization/deserialization
235+
@testset "# automatic custom struct serialization/deserialization" begin
225236
t = (col1=[CustomStruct(1, 2.3, "hey"), CustomStruct(4, 5.6, "there")],)
226237

227238
Arrow.ArrowTypes.arrowname(::Type{CustomStruct}) = Symbol("JuliaLang.CustomStruct")
228239
Arrow.ArrowTypes.JuliaType(::Val{Symbol("JuliaLang.CustomStruct")}, S) = CustomStruct
229240
tt = Arrow.Table(Arrow.tobuffer(t))
230241
@test length(tt) == length(t)
231242
@test all(isequal.(values(t), values(tt)))
243+
end
232244

233-
# 76
245+
@testset "# 76" begin
234246
t = (col1=NamedTuple{(:a,),Tuple{Union{Int,String}}}[(a=1,), (a="x",)],)
235247
tt = Arrow.Table(Arrow.tobuffer(t))
236248
@test length(tt) == length(t)
237249
@test all(isequal.(values(t), values(tt)))
250+
end
238251

239-
# 89 etc. - UUID FixedSizeListKind overloads
252+
@testset "# 89 etc. - UUID FixedSizeListKind overloads" begin
240253
@test Arrow.ArrowTypes.gettype(Arrow.ArrowTypes.ArrowKind(UUID)) == UInt8
241254
@test Arrow.ArrowTypes.getsize(Arrow.ArrowTypes.ArrowKind(UUID)) == 16
255+
end
242256

243-
# 98
257+
@testset "# 98" begin
244258
t = (a = [Nanosecond(0), Nanosecond(1)], b = [uuid4(), uuid4()], c = [missing, Nanosecond(1)])
245259
tt = Arrow.Table(Arrow.tobuffer(t))
246260
@test copy(tt.a) isa Vector{Nanosecond}
247261
@test copy(tt.b) isa Vector{UUID}
248262
@test copy(tt.c) isa Vector{Union{Missing,Nanosecond}}
263+
end
249264

250-
# copy on DictEncoding w/ missing values
265+
@testset "# copy on DictEncoding w/ missing values" begin
251266
x = PooledArray(["hey", missing])
252267
x2 = Arrow.toarrowvector(x)
253268
@test isequal(copy(x2), x)
269+
end
254270

255-
# some dict encoding coverage
256-
271+
@testset "# some dict encoding coverage" begin
257272
# signed indices for DictEncodedKind #112 #113 #114
258273
av = Arrow.toarrowvector(PooledArray(repeat(["a", "b"], inner = 5)))
259274
@test isa(first(av.indices), Signed)
@@ -270,31 +285,36 @@ av = Arrow.toarrowvector(CategoricalArray(["a", "bb", "ccc"]))
270285
@test isa(first(av.indices), Signed)
271286
@test length(av) == 3
272287
@test eltype(av) == String
288+
end
273289

274-
# 120
290+
@testset "# 120" begin
275291
x = PooledArray(["hey", missing])
276292
x2 = Arrow.toarrowvector(x)
277293
@test eltype(DataAPI.refpool(x2)) == Union{Missing, String}
278294
@test eltype(DataAPI.levels(x2)) == String
279295
@test DataAPI.refarray(x2) == [1, 2]
296+
end
280297

281-
# 121
298+
@testset "# 121" begin
282299
a = PooledArray(repeat(string.('S', 1:130), inner=5), compress=true)
283300
@test eltype(a.refs) == UInt8
284301
av = Arrow.toarrowvector(a)
285302
@test eltype(av.indices) == Int16
303+
end
286304

287-
# 123
305+
@testset "# 123" begin
288306
t = (x = collect(zip(rand(10), rand(10))),)
289307
tt = Arrow.Table(Arrow.tobuffer(t))
290308
@test tt.x == t.x
309+
end
291310

292-
# 144
311+
@testset "# 144" begin
293312
t = Tables.partitioner(((a=Arrow.DictEncode([1,2,3]),), (a=Arrow.DictEncode(fill(1, 129)),)))
294313
tt = Arrow.Table(Arrow.tobuffer(t))
295314
@test length(tt.a) == 132
315+
end
296316

297-
# 126
317+
@testset "# 126" begin
298318
t = Tables.partitioner(
299319
(
300320
(a=Arrow.toarrowvector(PooledArray([1,2,3 ])),),
@@ -316,19 +336,24 @@ io = IOBuffer()
316336
@test_logs (:error, "error writing arrow data on partition = 2") begin
317337
@test_throws ErrorException Arrow.write(io, t)
318338
end
339+
end
319340

320-
# 75
341+
@testset "# 75" begin
321342
tbl = Arrow.Table(Arrow.tobuffer((sets = [Set([1,2,3]), Set([1,2,3])],)))
322343
@test eltype(tbl.sets) <: Set
344+
end
323345

324-
# 85
346+
@testset "# 85" begin
325347
tbl = Arrow.Table(Arrow.tobuffer((tups = [(1, 3.14, "hey"), (1, 3.14, "hey")],)))
326348
@test eltype(tbl.tups) <: Tuple
349+
end
327350

328-
# Nothing
351+
@testset "Nothing" begin
329352
tbl = Arrow.Table(Arrow.tobuffer((nothings=[nothing, nothing, nothing],)))
330353
@test tbl.nothings == [nothing, nothing, nothing]
354+
end
331355

356+
@testset "arrowmetadata" begin
332357
# arrowmetadata
333358
t = (col1=[CustomStruct2{:hey}(1), CustomStruct2{:hey}(2)],)
334359
ArrowTypes.arrowname(::Type{<:CustomStruct2}) = Symbol("CustomStruct2")
@@ -340,23 +365,26 @@ ArrowTypes.arrowmetadata(::Type{CustomStruct2{sym}}) where {sym} = sym
340365
ArrowTypes.JuliaType(::Val{:CustomStruct2}, S, meta) = CustomStruct2{Symbol(meta)}
341366
tbl = Arrow.Table(Arrow.tobuffer(t))
342367
@test eltype(tbl.col1) == CustomStruct2{:hey}
368+
end
343369

344-
# 166
370+
@testset "# 166" begin
345371
t = (
346372
col1=[zero(Arrow.Timestamp{Arrow.Meta.TimeUnits.NANOSECOND, nothing})],
347373
)
348374
tbl = Arrow.Table(Arrow.tobuffer(t))
349375
@test_logs (:warn, r"automatically converting Arrow.Timestamp with precision = NANOSECOND") begin
350376
@test tbl.col1[1] == Dates.DateTime(1970)
351377
end
378+
end
352379

353-
# 95; Arrow.ToTimestamp
380+
@testset "# 95; Arrow.ToTimestamp" begin
354381
x = [ZonedDateTime(Dates.DateTime(2020), tz"Europe/Paris")]
355382
c = Arrow.ToTimestamp(x)
356383
@test eltype(c) == Arrow.Timestamp{Arrow.Flatbuf.TimeUnits.MILLISECOND, Symbol("Europe/Paris")}
357384
@test c[1] == Arrow.Timestamp{Arrow.Flatbuf.TimeUnits.MILLISECOND, Symbol("Europe/Paris")}(1577833200000)
385+
end
358386

359-
# 158
387+
@testset "# 158" begin
360388
# arrow ipc stream generated from pyarrow with no record batches
361389
bytes = UInt8[0xff, 0xff, 0xff, 0xff, 0x78, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00,
362390
0x06, 0x00, 0x05, 0x00, 0x08, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00,
@@ -368,8 +396,9 @@ bytes = UInt8[0xff, 0xff, 0xff, 0xff, 0x78, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00,
368396
tbl = Arrow.Table(bytes)
369397
@test length(tbl.a) == 0
370398
@test eltype(tbl.a) == Union{Int64, Missing}
399+
end
371400

372-
# 181
401+
@testset "# 181" begin
373402
d = Dict{Int,Int}()
374403
for i in 1:9
375404
d = Dict(i => d)
@@ -378,16 +407,17 @@ tbl = (x = [d],)
378407
msg = "reached nested serialization level (20) deeper than provided max depth argument (19); to increase allowed nesting level, pass `maxdepth=X`"
379408
@test_throws ErrorException(msg) Arrow.tobuffer(tbl; maxdepth=19)
380409
@test Arrow.Table(Arrow.tobuffer(tbl; maxdepth=20)).x == tbl.x
410+
end
381411

382-
# 167
412+
@testset "# 167" begin
383413
t = (
384414
col1=[["boop", "she"], ["boop", "she"], ["boo"]],
385415
)
386416
tbl = Arrow.Table(Arrow.tobuffer(t))
387417
@test eltype(tbl.col1) == Vector{String}
418+
end
388419

389-
# 200
390-
@testset "VersionNumber" begin
420+
@testset "# 200 VersionNumber" begin
391421
t = (
392422
col1=[v"1"],
393423
)
@@ -396,6 +426,7 @@ tbl = Arrow.Table(Arrow.tobuffer(t))
396426
end
397427

398428
@testset "`show`" begin
429+
str = nothing
399430
table = (; a = 1:5, b = fill(1.0, 5))
400431
arrow_table = Arrow.Table(Arrow.tobuffer(table))
401432
# 2 and 3-arg show with no metadata
@@ -425,11 +456,11 @@ end
425456

426457
end
427458

428-
#194
459+
@testset "# 194" begin
429460
@test isempty(Arrow.Table(Arrow.tobuffer(Dict{Symbol, Vector}())))
461+
end
430462

431-
432-
#229
463+
@testset "# 229" begin
433464
struct Foo229{x}
434465
y::String
435466
z::Int
@@ -443,8 +474,9 @@ cols = (k1=[Foo229{:a}("a", 1), Foo229{:b}("b", 2)], k2=[Foo229{:c}("c", 3), Foo
443474
tbl = Arrow.Table(Arrow.tobuffer(cols))
444475
@test tbl.k1 == cols.k1
445476
@test tbl.k2 == cols.k2
477+
end
446478

447-
# PR 234
479+
@testset "# PR 234" begin
448480
# bugfix parsing primitive arrays
449481
buf = [
450482
0x14,0x00,0x00,0x00,0x00,0x00,0x0e,0x00,0x14,0x00,0x00,0x00,0x10,0x00,0x0c,0x00,0x08,
@@ -471,8 +503,9 @@ end
471503

472504
d = Arrow.FlatBuffers.getrootas(TestData, buf, 0);
473505
@test d.DataInt32 == UInt32[1,2,3]
506+
end
474507

475-
# test multiple inputs treated as one table
508+
@testset "# test multiple inputs treated as one table" begin
476509
t = (
477510
col1=[1, 2, 3, 4, 5],
478511
col2=[1.2, 2.3, 3.4, 4.5, 5.6],
@@ -497,18 +530,24 @@ t2 = (
497530
col1=[1.2, 2.3, 3.4, 4.5, 5.6],
498531
)
499532
@test_throws ArgumentError collect(Arrow.Stream([Arrow.tobuffer(t), Arrow.tobuffer(t2)]))
533+
end
500534

535+
@testset "# 253" begin
501536
# https://github.com/apache/arrow-julia/issues/253
502537
@test Arrow.toidict(Pair{String, String}[]) == Base.ImmutableDict{String, String}()
538+
end
503539

540+
@testset "# 232" begin
504541
# https://github.com/apache/arrow-julia/issues/232
505542
t = (; x=[Dict(true => 1.32, 1.2 => 0.53495216)])
506543
@test_throws ArgumentError("`keytype(d)` must be concrete to serialize map-like `d`, but `keytype(d) == Real`") Arrow.tobuffer(t)
507544
t = (; x=[Dict(32.0 => true, 1.2 => 0.53495216)])
508545
@test_throws ArgumentError("`valtype(d)` must be concrete to serialize map-like `d`, but `valtype(d) == Real`") Arrow.tobuffer(t)
509546
t = (; x=[Dict(true => 1.32, 1.2 => true)])
510547
@test_throws ArgumentError("`keytype(d)` must be concrete to serialize map-like `d`, but `keytype(d) == Real`") Arrow.tobuffer(t)
548+
end
511549

550+
@testset "# 214" begin
512551
# https://github.com/apache/arrow-julia/issues/214
513552
t1 = (; x = [(Nanosecond(42),)])
514553
t2 = Arrow.Table(Arrow.tobuffer(t1))
@@ -519,6 +558,7 @@ t1 = (; x = [(; a=Nanosecond(i), b=Nanosecond(i+1)) for i = 1:5])
519558
t2 = Arrow.Table(Arrow.tobuffer(t1))
520559
t3 = Arrow.Table(Arrow.tobuffer(t2))
521560
@test t3.x == t1.x
561+
end
522562

523563
@testset "Writer" begin
524564
io = IOBuffer()
@@ -538,15 +578,19 @@ t3 = Arrow.Table(Arrow.tobuffer(t2))
538578
@test table.b == collect(b)
539579
end
540580

541-
# Empty input
581+
@testset "# Empty input" begin
542582
@test Arrow.Table(UInt8[]) isa Arrow.Table
543583
@test isempty(Tables.rows(Arrow.Table(UInt8[])))
544584
@test Arrow.Stream(UInt8[]) isa Arrow.Stream
545585
@test isempty(Tables.partitions(Arrow.Stream(UInt8[])))
586+
end
546587

588+
@testset "# 324" begin
547589
# https://github.com/apache/arrow-julia/issues/324
548590
@test_throws ArgumentError filter!(x -> x > 1, Arrow.toarrowvector([1, 2, 3]))
591+
end
549592

593+
@testset "# 327" begin
550594
# https://github.com/apache/arrow-julia/issues/327
551595
zdt = ZonedDateTime(DateTime(2020, 11, 1, 6), tz"America/New_York"; from_utc=true)
552596
arrow_zdt = ArrowTypes.toarrow(zdt)
@@ -557,20 +601,25 @@ zdt_again = ArrowTypes.fromarrow(ZonedDateTime, arrow_zdt)
557601
original_table = (; col = [ ZonedDateTime(DateTime(1, 2, 3, 4, 5, 6), tz"UTC+3") for _ in 1:5])
558602
table = Arrow.Table(joinpath(@__DIR__, "old_zdt.arrow"))
559603
@test original_table.col == table.col
604+
end
560605

606+
@testset "# 243" begin
561607
if pkgversion(ArrowTypes) >= v"2.0.1" # need the ArrowTypes bugfix to pass this test
562608
# https://github.com/apache/arrow-julia/issues/243
563609
table = (; col = [(; v=v"1"), (; v=v"2"), missing])
564610
@test isequal(Arrow.Table(Arrow.tobuffer(table)).col, table.col)
565611
end
612+
end
566613

614+
@testset "# 367" begin
567615
# https://github.com/apache/arrow-julia/issues/367
568616
if pkgversion(ArrowTypes) >= v"2.0.2"
569617
t = (; x=Union{ZonedDateTime,Missing}[missing])
570618
a = Arrow.Table(Arrow.tobuffer(t))
571619
@test Tables.schema(a) == Tables.schema(t)
572620
@test isequal(a.x, t.x)
573621
end
622+
end
574623

575624
# https://github.com/apache/arrow-julia/issues/414
576625
df = DataFrame(("$i" => rand(1000) for i in 1:65536)...)

0 commit comments

Comments
 (0)