Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fix in UniqueElementsEncoding #418

Merged
merged 7 commits into from
Jul 21, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -2,7 +2,7 @@ name = "ComplexityMeasures"
uuid = "ab4b797d-85ee-42ba-b621-05d793b346a2"
authors = "Kristian Agasøster Haaga <[email protected]>, George Datseries <[email protected]>"
repo = "https://github.com/juliadynamics/ComplexityMeasures.jl.git"
version = "3.6.4"
version = "3.6.5"

[deps]
Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
27 changes: 14 additions & 13 deletions src/encoding_implementations/unique_elements_encoding.jl
Original file line number Diff line number Diff line change
@@ -25,20 +25,21 @@
struct UniqueElementsEncoding{T, I <: Integer} <: Encoding
encode_dict::Dict{T, I}
decode_dict::Dict{I, T}
function UniqueElementsEncoding(x)

# Ecode in order of first appearance, because `sort` doesn't work if we mix types,
# e.g. `String` and `Int`.
x_unique = unique(x)
encode_dict = Dict{eltype(x), Int}()
decode_dict = Dict{Int, eltype(x)}()
for (i, xu) in enumerate(x_unique)
encode_dict[xu] = i
decode_dict[i] = xu
end
new{eltype(x), Int}(encode_dict, decode_dict)
end
function UniqueElementsEncoding(x)
# Encode in order of first appearance, because `sort` doesn't work if we mix types,
# e.g. `String` and `Int`.
x_unique = unique(vec(x))
T = eltype(x_unique)
encode_dict = Dict{T, Int}()
decode_dict = Dict{Int, T}()
for (i, xu) in enumerate(x_unique)
encode_dict[xu] = i
decode_dict[i] = xu
end
return UniqueElementsEncoding(encode_dict, decode_dict)
end

function UniqueElementsEncoding()
throw(ArgumentError("`UniqueElementsEncoding` can't be initialized without input data."))
end
@@ -47,6 +48,6 @@
return encoding.encode_dict[x]
end

function decode(encoding::UniqueElementsEncoding, ω::Integer)
function decode(encoding::UniqueElementsEncoding, ω::I) where I <: Integer

Check warning on line 51 in src/encoding_implementations/unique_elements_encoding.jl

Codecov / codecov/patch

src/encoding_implementations/unique_elements_encoding.jl#L51

Added line #L51 was not covered by tests
return encoding.decode_dict[ω]
end
5 changes: 3 additions & 2 deletions src/outcome_spaces/unique_elements.jl
Original file line number Diff line number Diff line change
@@ -35,6 +35,7 @@ probabilities(x) = probabilities(UniqueElements(), x)
outcome_space(::UniqueElements, x) = sort!(unique(x))

function codify(o::UniqueElements, x)
encoding = UniqueElementsEncoding(x)
encode.(Ref(encoding), x)
xv = vec(x)
encoding = UniqueElementsEncoding(xv)
encode.(Ref(encoding), xv)
end
15 changes: 13 additions & 2 deletions test/encodings/encodings/unique_elements_encoding.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
using Test
using ComplexityMeasures
using StateSpaceSets

x = ['a', 2, 5, 2, 5, 'a']
e = UniqueElementsEncoding(x)
x = ['a', 2, 5, 2, 5, 'a']; e = UniqueElementsEncoding(x)
@test encode.(Ref(e), x) == [1, 2, 3, 2, 3, 1]

y = ["a", "b", "c", "b", "a"]; ey = UniqueElementsEncoding(y)
@test encode.(Ref(ey), y) == [1, 2, 3, 2, 1]

z = vec(StateSpaceSet(y)); ez = UniqueElementsEncoding(z)
@test encode.(Ref(ez), z) == [1, 2, 3, 2, 1]

# TODO: this should really work (but broadcasting fails). The error is not in this package,
# but is due to lacking broadcasting implementation in StateSpaceSets.jl
# w = StateSpaceSet(y); ew = UniqueElementsEncoding(w)
# @test encode.(Ref(ew), w) == [1, 2, 3, 2, 1]
3 changes: 3 additions & 0 deletions test/outcome_spaces/implementations/unique_elements.jl
Original file line number Diff line number Diff line change
@@ -38,4 +38,7 @@ end

# Codification of vector inputs (time series)
x = [1, 3, 2, 1, 2, 2, 1, 3, 1]
y = StateSpaceSet(["a", "b", "c", "b", "a"])

@test codify(UniqueElements(), x) isa Vector{Int}
@test codify(UniqueElements(), y) isa Vector{Int}