From e15c3bcca104dbc97ecf4a145a27174c6afcb294 Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Fri, 14 Aug 2020 01:09:19 -0600 Subject: [PATCH 1/3] Fix iteration issue when chains have zero-length Fixes #27. The issue here is that `iterate` for `ChainedVector` was assuming that underlying array chains wouldn't have zero-length. If a user is doing a lot of filtering/deleting, however, it might be the case that certain chunks end up with zero-length (though we do try to prune those out when possible, so I'm still a little unsure how we get in this state). Nevertheless, this assumption is a bit optimistic, and we can do better by just checking if the next chunk is zero-length when iterating and moving on to the next chunk if so. --- src/chainedvector.jl | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/chainedvector.jl b/src/chainedvector.jl index 7b6beb2..ef6b4e4 100644 --- a/src/chainedvector.jl +++ b/src/chainedvector.jl @@ -50,18 +50,29 @@ end # efficient iteration @inline function Base.iterate(A::ChainedVector) length(A) == 0 && return nothing - i = 2 + i = 1 chunk = 1 chunk_i = 1 chunk_len = A.inds[1] - if i > chunk_len + while i > chunk_len chunk += 1 - chunk_i = 1 - @inbounds chunk_len = A.inds[min(length(A.inds), chunk)] + @inbounds chunk_len = A.inds[chunk] + i <= chunk_len && break + end + x = A.arrays[chunk][1] + # find next valid index + i += 1 + if i > chunk_len + while true + chunk += 1 + chunk > length(A.inds) && break + @inbounds chunk_len = A.inds[chunk] + i <= chunk_len && break + end else chunk_i += 1 end - return A.arrays[1][1], (i, chunk, chunk_i, chunk_len, length(A)) + return x, (i, chunk, chunk_i, chunk_len, length(A)) end @inline function Base.iterate(A::ChainedVector, (i, chunk, chunk_i, chunk_len, len)) @@ -69,9 +80,13 @@ end @inbounds x = A.arrays[chunk][chunk_i] i += 1 if i > chunk_len - chunk += 1 chunk_i = 1 - @inbounds chunk_len = A.inds[min(length(A.inds), chunk)] + while true + chunk += 1 + chunk > length(A.inds) && break + @inbounds chunk_len = A.inds[chunk] + i <= chunk_len && break + end else chunk_i += 1 end From 63e252f44e3a3cdf508efb2af81ffd743c5eb7f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 14 Aug 2020 12:39:57 +0200 Subject: [PATCH 2/3] minor code fixes, bump version, and added tests --- Project.toml | 2 +- src/chainedvector.jl | 2 +- test/runtests.jl | 24 ++++++++++++++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index f45f63b..c6dfd55 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "SentinelArrays" uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c" authors = ["Jacob Quinn "] -version = "1.2.10" +version = "1.2.11" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" diff --git a/src/chainedvector.jl b/src/chainedvector.jl index ef6b4e4..20be359 100644 --- a/src/chainedvector.jl +++ b/src/chainedvector.jl @@ -19,6 +19,7 @@ function ChainedVector(arrays::Vector{A}) where {A <: AbstractVector{T}} where { inds = Vector{Int}(undef, n) x = 0 @inbounds for i = 1:n + # note that arrays[i] can have zero length x += length(arrays[i]) inds[i] = x end @@ -57,7 +58,6 @@ end while i > chunk_len chunk += 1 @inbounds chunk_len = A.inds[chunk] - i <= chunk_len && break end x = A.arrays[chunk][1] # find next valid index diff --git a/test/runtests.jl b/test/runtests.jl index 8a3b3e9..0c98517 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -542,4 +542,28 @@ c2 = copy(c) deleteat!(c2, Int[]) @test length(c2) == 15 +@testset "iteration protocol on ChainedVector" begin + @test isnothing(iterate(ChainedVector([[]]))) + for len in 0:6 + for j in 0:len + cv = ChainedVector([1:j, j+1:len]) + for (i, v) in enumerate(cv) + @test i == v + end + for k in j:len + cv = ChainedVector([1:j, j+1:k, k+1:len]) + for (i, v) in enumerate(cv) + @test i == v + end + for l in k:len + cv = ChainedVector([1:j, j+1:k, k+1:l, l+1:len]) + for (i, v) in enumerate(cv) + @test i == v + end + end + end + end + end +end + end From 75cdb4aa756b9e4483b56cc0e7d740477b6e5c05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 14 Aug 2020 12:48:19 +0200 Subject: [PATCH 3/3] improve tests --- test/runtests.jl | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 0c98517..7f00faa 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -543,23 +543,43 @@ deleteat!(c2, Int[]) @test length(c2) == 15 @testset "iteration protocol on ChainedVector" begin - @test isnothing(iterate(ChainedVector([[]]))) for len in 0:6 + cv = ChainedVector([1:len]) + @test length(cv) == len + c = 0 + for (i, v) in enumerate(cv) + c += 1 + @test i == v + end + @test c == len for j in 0:len cv = ChainedVector([1:j, j+1:len]) + @test length(cv) == len + c = 0 for (i, v) in enumerate(cv) + c += 1 @test i == v end + @test c == len for k in j:len cv = ChainedVector([1:j, j+1:k, k+1:len]) + @test length(cv) == len + c = 0 for (i, v) in enumerate(cv) + c += 1 @test i == v end + @test c == len + for l in k:len cv = ChainedVector([1:j, j+1:k, k+1:l, l+1:len]) + @test length(cv) == len + c = 0 for (i, v) in enumerate(cv) + c += 1 @test i == v end + @test c == len end end end