Skip to content
This repository was archived by the owner on May 4, 2019. It is now read-only.

reorder and `reorder!' functions #178

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/DataArrays.jl
Original file line number Diff line number Diff line change
@@ -46,6 +46,7 @@ module DataArrays
PooledDataVector,
reldiff,
reorder,
reorder!,
rep,
replace!,
setlevels!,
63 changes: 62 additions & 1 deletion src/pooleddataarray.jl
Original file line number Diff line number Diff line change
@@ -433,9 +433,70 @@ function setlevels!{T,R}(x::PooledDataArray{T,R}, d::Dict{T,Any}) # this version
setlevels!(x, newpool)
end

##############################################################################
##
## reorder()
##
##############################################################################

"""
`reorder(pda,newpool)` reorders the current pool and references related to that pool using alphabetical order of the `newpool`.

Input:
- `pda` reference object to be used to contruct a new one
- `newpool` to replace the current one

Output:
A new PooledDataObject object
"""
reorder(x::PooledDataArray) = PooledDataArray(x, sort(levels(x))) # just re-sort the pool

reorder(x::PooledDataArray, y::AbstractVector...) = reorder(mean, x, y...)
"""
`reorder(pda,newpool)` reorders the current pool and references related to that pool. A new pool should be a subset of the
old one(see `inclusioncheck` argument). If you want to change pool identifiers, use `setlevels` first, before using `reorder`.

Input:
- `pda` reference object to be used to contruct a new one
- `newpool` to replace the current one
- `inclusioncheck` (default true) checks whether `newpoll` ⊆ `pda.pool`

Output:
A new PooledDataObject object
"""
reorder(pda::PooledDataArray, newpool::AbstractVector, inclusioncheck=true) = begin
inclusioncheck && !issubset(newpool, pda.pool) && throw(ArgumentError("A new pool must be a subset of the current one."))

PooledDataArray(pda, newpool)
end

"""
`reorder!(pda,newpool)` reorders the current pool and references related to that pool. A new pool should be a subset of the
old one(see `inclusioncheck` argument). If you want to change pool identifiers, use `setlevels` first, before using `reorder!`.

Input:
- `pda` PooledDataArray to be changed
- `newpool` to replace the current one
- `inclusioncheck` (default true) checks whether `newpoll` ⊆ `pda.pool`

Output:
Current `pda` object
"""
reorder!{T,R<:Integer,N}(pda::PooledDataArray{T,R,N}, newpool::Vector{T}, inclusioncheck=true) = begin
inclusioncheck && !issubset(newpool, pda.pool) && throw(ArgumentError("A new pool must be a subset of the current one."))

tidx::Array{R} = findat(newpool, pda.pool)
oldrefs = pda.refs
for i in 1:length(oldrefs)
if oldrefs[i] != 0
oldrefs[i] = tidx[oldrefs[i]]
end
end
pda.pool = newpool
return pda
end

# commented due to #167 issue
#reorder(x::PooledDataArray, y::AbstractVector...) = reorder(mean, x, y...)

### FIXME: this can't work because we don't know about DataFrames
# reorder(fun::Function, x::PooledDataArray, y::AbstractVector...) =
23 changes: 23 additions & 0 deletions test/pooleddataarray.jl
Original file line number Diff line number Diff line change
@@ -51,6 +51,7 @@ module TestPDA
pim = @pdata [1 + im, 2 + im, 3 + im, 2 + im, 1 + im]
@assert levels(pim) == [1 + im, 2 + im, 3 + im]


# Test explicitly setting refs type
testarray = [1, 1, 2, 2, 0, 0, 3, 3]
testdata = @data [1, 1, 2, 2, 0, 0, 3, 3]
@@ -107,4 +108,26 @@ module TestPDA
pda = @pdata([NA, "A", "B", "C", "A", "B"])
@test isequal(Base.permute!!(copy(pda), [2, 5, 3, 6, 4, 1]), @pdata(["A", "A", "B", "B", "C", NA]))
@test isequal(Base.ipermute!!(copy(pda), [6, 1, 3, 5, 2, 4]), @pdata(["A", "A", "B", "B", "C", NA]))

#1. reordering levels
pda = @pdata(["high" , "medium" , "low" , "high" , NA, "medium"])
#1.1 positive scenarios
@test isequal(pda.pool, Vector{eltype(pda.pool)}(["high", "low", "medium"])) #alphabetically
@test isequal(pda.refs, Vector{eltype(pda.refs)}([1,3,2,1,0,3])) #high is 1, medium is 3, low is 1 according to alphabetical order

reorder!(pda, ["low","medium","high"]) #reorder according to e.g. visual plot needs
@test isequal(pda.pool, Vector{eltype(pda.pool)}(["low", "medium", "high"])) #semantic order
@test isequal(pda.refs, Vector{eltype(pda.refs)}([3,2,1,3,0,2]))

reorder!(pda, ["low","medium"])
@test isequal(pda.pool, Vector{eltype(pda.pool)}(["low", "medium"])) #semantic order
@test isequal(pda.refs, Vector{eltype(pda.refs)}([0,2,1,0,0,2]))

newpda = reorder(pda, ["low"])
@test newpda !== pda
#1.2 negative scenarios
pda = @pdata(["high" , "medium" , "low" , "high" , NA, "medium"])
@test_throws ArgumentError reorder(pda, ["very low","very high"]) #new levels must be a subset of the original one
reorder!(pda, ["new low","new medium"], false) #don't check inclusion and change level names
@test isequal(pda.refs, Vector{eltype(pda.refs)}([0,0,0,0,0,0])) #we have a mess, it's not reordering
end