JuliaStats · MaciekLeks · Jan 17, 2016 · Jan 17, 2016 · Jan 17, 2016 · Jan 18, 2016
diff --git a/src/DataArrays.jl b/src/DataArrays.jl
@@ -46,6 +46,7 @@ module DataArrays
            PooledDataVector,
            reldiff,
            reorder,
+           reorder!,
            rep,
            replace!,
            setlevels!,

diff --git a/src/pooleddataarray.jl b/src/pooleddataarray.jl
@@ -433,9 +433,70 @@ function setlevels!{T,R}(x::PooledDataArray{T,R}, d::Dict{T,Any}) # this version
     setlevels!(x, newpool)
 end
 
+##############################################################################
+##
+## reorder()
+##
+##############################################################################
+
+"""
+    `reorder(pda,newpool)` reorders the current pool and references related to that pool using alphabetical order of the `newpool`.
+
+    Input:
+    - `pda` reference object to be used to contruct a new one
+    - `newpool` to replace the current one
+
+    Output:
+    A new PooledDataObject object
+"""
 reorder(x::PooledDataArray) = PooledDataArray(x, sort(levels(x)))  # just re-sort the pool
 
-reorder(x::PooledDataArray, y::AbstractVector...) = reorder(mean, x, y...)
+"""
+    `reorder(pda,newpool)` reorders the current pool and references related to that pool. A new pool should be a subset of the 
+    old one(see `inclusioncheck` argument). If you want to change pool identifiers, use `setlevels` first, before using `reorder`.
+
+    Input:
+    - `pda` reference object to be used to contruct a new one
+    - `newpool` to replace the current one
+    - `inclusioncheck` (default true) checks whether `newpoll` ⊆ `pda.pool`
+
+    Output:
+    A new PooledDataObject object
+"""
+reorder(pda::PooledDataArray, newpool::AbstractVector, inclusioncheck=true) = begin
+    inclusioncheck && !issubset(newpool, pda.pool) && throw(ArgumentError("A new pool must be a subset of the current one."))
+
+    PooledDataArray(pda, newpool)
+end
+
+"""
+    `reorder!(pda,newpool)` reorders the current pool and references related to that pool. A new pool should be a subset of the 
+    old one(see `inclusioncheck` argument). If you want to change pool identifiers, use `setlevels` first, before using `reorder!`.
+
+    Input:
+    - `pda` PooledDataArray to be changed
+    - `newpool` to replace the current one
+    - `inclusioncheck` (default true) checks whether `newpoll` ⊆ `pda.pool`
+
+    Output:
+    Current `pda` object
+"""
+reorder!{T,R<:Integer,N}(pda::PooledDataArray{T,R,N}, newpool::Vector{T}, inclusioncheck=true) = begin
+    inclusioncheck && !issubset(newpool, pda.pool) && throw(ArgumentError("A new pool must be a subset of the current one."))
+
+    tidx::Array{R} = findat(newpool, pda.pool)
+    oldrefs = pda.refs
+    for i in 1:length(oldrefs)
+        if oldrefs[i] != 0
+            oldrefs[i] = tidx[oldrefs[i]]
+        end
+    end
+    pda.pool = newpool
+    return pda
+end
+
+# commented due to #167 issue 
+#reorder(x::PooledDataArray, y::AbstractVector...) = reorder(mean, x, y...)
 
 ### FIXME: this can't work because we don't know about DataFrames
 # reorder(fun::Function, x::PooledDataArray, y::AbstractVector...) =

diff --git a/test/pooleddataarray.jl b/test/pooleddataarray.jl
@@ -51,6 +51,7 @@ module TestPDA
     pim = @pdata [1 + im, 2 + im, 3 + im, 2 + im, 1 + im]
     @assert levels(pim) == [1 + im, 2 + im, 3 + im]
 
+
     # Test explicitly setting refs type
     testarray = [1, 1, 2, 2, 0, 0, 3, 3]
     testdata = @data [1, 1, 2, 2, 0, 0, 3, 3]
@@ -107,4 +108,26 @@ module TestPDA
     pda = @pdata([NA, "A", "B", "C", "A", "B"])
     @test isequal(Base.permute!!(copy(pda), [2, 5, 3, 6, 4, 1]), @pdata(["A", "A", "B", "B", "C", NA]))
     @test isequal(Base.ipermute!!(copy(pda), [6, 1, 3, 5, 2, 4]), @pdata(["A", "A", "B", "B", "C", NA]))
+
+    #1. reordering levels 
+    pda = @pdata(["high" , "medium" , "low" , "high" , NA, "medium"])
+    #1.1 positive scenarios    
+    @test isequal(pda.pool, Vector{eltype(pda.pool)}(["high", "low", "medium"])) #alphabetically
+    @test isequal(pda.refs, Vector{eltype(pda.refs)}([1,3,2,1,0,3])) #high is 1, medium is 3, low is 1 according to alphabetical order
+
+    reorder!(pda, ["low","medium","high"]) #reorder according to e.g. visual plot needs
+    @test isequal(pda.pool, Vector{eltype(pda.pool)}(["low", "medium", "high"])) #semantic order
+    @test isequal(pda.refs, Vector{eltype(pda.refs)}([3,2,1,3,0,2]))
+
+    reorder!(pda, ["low","medium"]) 
+    @test isequal(pda.pool, Vector{eltype(pda.pool)}(["low", "medium"])) #semantic order
+    @test isequal(pda.refs, Vector{eltype(pda.refs)}([0,2,1,0,0,2]))
+
+    newpda = reorder(pda, ["low"])
+    @test newpda !== pda 
+    #1.2 negative scenarios
+    pda = @pdata(["high" , "medium" , "low" , "high" , NA, "medium"])
+    @test_throws ArgumentError reorder(pda, ["very low","very high"]) #new levels must be a subset of the original one   
+    reorder!(pda, ["new low","new medium"], false) #don't check inclusion and change level names
+    @test isequal(pda.refs, Vector{eltype(pda.refs)}([0,0,0,0,0,0])) #we have a mess, it's not reordering
 end