-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathunique_elements.jl
41 lines (31 loc) · 1.13 KB
/
unique_elements.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
export UniqueElements
"""
UniqueElements()
An [`OutcomeSpace`](@ref) based on straight-forward counting of distinct elements in
a univariate time series or multivariate dataset. This is the same as giving no
estimator to [`probabilities`](@ref).
## Outcome space
The outcome space is the unique sorted values of the input.
Hence, input `x` is needed for a well-defined [`outcome_space`](@ref).
## Implements
- [`codify`](@ref). Used for encoding inputs where ordering matters (e.g. time series).
"""
struct UniqueElements <: CountBasedOutcomeSpace end
is_counting_based(o::UniqueElements) = true
counts(::UniqueElements, x) = counts(x)
function counts_and_outcomes(::UniqueElements, x)
z = copy(x)
cts = fasthist!(z)
# notice that `z` is now sorted within `fasthist!` so we can skip sorting
outs = unique!(z)
cts = Counts(cts, (outs, ))
return cts, outcomes(cts)
end
# Convenience.
probabilities(x) = probabilities(UniqueElements(), x)
outcome_space(::UniqueElements, x) = sort!(unique(x))
function codify(o::UniqueElements, x)
xv = vec(x)
encoding = UniqueElementsEncoding(xv)
encode.(Ref(encoding), xv)
end