Skip to content
This repository was archived by the owner on May 4, 2019. It is now read-only.

Implement reductions with optional skipna argument #101

Merged
merged 7 commits into from
Jul 17, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions benchmark/operators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ macro perf(fn, replications, idx...)
quote
println($name)
gc_disable()
df = compare([()->$fn for i=$idx], $replications)
df = compare([let i=i; ()->$fn; end for i=$idx], $replications)
gc_enable()
gc()
df[:Function] = TEST_NAMES[$idx]
Expand Down Expand Up @@ -71,7 +71,6 @@ const Bool2 = make_test_types(make_bools, 1000)
@perf Bool1[i] $ Bool2[i] 100

# Vector operators
@perf sum(Float1[i]) 250 1:div(length(Float1), 2)
@perf diff(Float1[i]) 50 1:div(length(Float1), 2)
@perf cumsum(Float1[i]) 50 1:div(length(Float1), 2)
end
49 changes: 49 additions & 0 deletions benchmark/reduce.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
module ReduceBenchmark
using DataArrays, Benchmark

# seed rng for more consistent timings
srand(1776)

const TEST_NAMES = [
"Vector",
"DataVector No NA skipna=false",
"DataVector No NA skipna=true",
"DataVector Half NA skipna=false",
"DataVector Half NA skipna=true"
]

function make_test_types(genfunc, sz)
mat = genfunc(sz)
na = shuffle!([trues(ifloor(sz/2)), falses(iceil(sz/2))])
(
mat,
DataArray(mat),
DataArray(mat, na)
)
end

const Data = make_test_types(rand, 100000000)

macro perf(fn, replications)
quote
println($fn)
fns = [()->$fn(Data[1]),
()->$fn(Data[2]),
()->$fn(Data[2]; skipna=true),
()->$fn(Data[3]),
()->$fn(Data[3]; skipna=true)]
gc_disable()
df = compare(fns, $replications)
gc_enable()
gc()
df[:Function] = TEST_NAMES
df[:Relative] = df[:Average]./df[1, :Average]
println(df)
end
end

@perf sum 10
@perf maximum 10
@perf mean 10
@perf var 10
end
53 changes: 53 additions & 0 deletions benchmark/reducedim.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
module ReducedimBenchmark
using DataArrays, Benchmark

# seed rng for more consistent timings
srand(1776)

const TEST_NAMES = [
"Matrix",
"DataMatrix No NA skipna=false",
"DataMatrix No NA skipna=true",
"DataMatrix Half NA skipna=false",
"DataMatrix Half NA skipna=true"
]

function make_test_types(genfunc, sz)
mat = genfunc(abs2(sz))
na = shuffle!([trues(ifloor(abs2(sz)/2)), falses(iceil(abs2(sz)/2))])
(
reshape(mat, sz, sz),
DataArray(reshape(mat, sz, sz)),
DataArray(reshape(mat, sz, sz), reshape(na, sz, sz))
)
end

const Data = make_test_types(rand, 10000)

macro perf(fn, dim, replications)
quote
println($fn, " (region = ", $dim, ")")
fns = [()->$fn(Data[1], $dim),
()->$fn(Data[2], $dim),
()->$fn(Data[2], $dim; skipna=true),
()->$fn(Data[3], $dim),
()->$fn(Data[3], $dim; skipna=true)]
gc_disable()
df = compare(fns, $replications)
gc_enable()
gc()
df[:Function] = TEST_NAMES
df[:Relative] = df[:Average]./df[1, :Average]
println(df)
end
end

@perf sum 1 10
@perf sum 2 10
@perf maximum 1 10
@perf maximum 2 10
@perf mean 1 10
@perf mean 2 10
@perf var 1 10
@perf var 2 10
end
2 changes: 2 additions & 0 deletions src/DataArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ module DataArrays
include("datamatrix.jl")
include("linalg.jl")
include("operators.jl")
include("reduce.jl")
include("reducedim.jl")
include("broadcast.jl")
include("sort.jl")
include("extras.jl")
Expand Down
12 changes: 9 additions & 3 deletions src/broadcast.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using DataArrays, Base.Cartesian, Base.@get!
using DataArrays, Base.@get!
using Base.Broadcast: bitcache_chunks, bitcache_size, dumpbitcache,
promote_eltype, broadcast_shape, eltype_plus, type_minus, type_div,
type_pow
Expand Down Expand Up @@ -103,9 +103,11 @@ function gen_broadcast_dataarray(nd::Int, arrtype::(DataType...), outtype, f::Fu
# Set up output DataArray/PooledDataArray
$(if outtype == DataArray
quote
Bc = B.na.chunks
fill!(Bc, 0)
Bdata = B.data
# Copy in case aliased
# TODO: check for aliasing?
Bna = falses(size(Bdata))
Bc = Bna.chunks
ind = 1
end
elseif outtype == PooledDataArray
Expand Down Expand Up @@ -158,6 +160,10 @@ function gen_broadcast_dataarray(nd::Int, arrtype::(DataType...), outtype, f::Fu
:(ind += 1)
end)
end)

$(if outtype == DataArray
:(B.na = Bna)
end)
end
_F_
end
Expand Down
3 changes: 3 additions & 0 deletions src/dataarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,9 @@ function Base.copy!(dest::DataArray, src::DataArray) # -> DataArray{T}
dest
end

Base.fill!(A::DataArray, ::NAtype) = (fill!(A.na, true); A)
Base.fill!(A::DataArray, v) = (fill!(A.data, v); fill!(A.na, false); A)

#' @description
#'
#' Create a deep copy of a DataArray.
Expand Down
2 changes: 2 additions & 0 deletions src/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ daextract(a) = nothing
# Check for NA
unsafe_isna(da::DataArray, extr, idx::Real) = Base.unsafe_bitgetindex(extr[2], idx)
unsafe_isna(pda::PooledDataArray, extr, idx::Real) = extr[1][idx] == 0
unsafe_isna(a, extr, idx::Real) = false
unsafe_getindex_notna(da::DataArray, extr, idx::Real) = getindex(extr[1], idx)
unsafe_getindex_notna(pda::PooledDataArray, extr, idx::Real) = getindex(extr[2], extr[1][idx])
unsafe_getindex_notna(a, extr, idx::Real) = Base.unsafe_getindex(a, idx)
Expand Down Expand Up @@ -43,6 +44,7 @@ unsafe_dasetindex!(da::PooledDataArray, extr, val::NAtype, idx::Real) = nothing
unsafe_dasetindex!(da::DataArray, extr, val, idx::Real) = setindex!(extr[1], val, idx)
unsafe_dasetindex!(pda::PooledDataArray, extr, val, idx::Real) =
setindex!(extr[1], getpoolidx(pda, val), idx)
unsafe_dasetindex!(a::AbstractArray, extr, val, idx::Real) = setindex!(a, val, idx)

## PooledDataArray helper functions

Expand Down
23 changes: 13 additions & 10 deletions src/operators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const numeric_unary_operators = [:(Base.(:+)),
const logical_unary_operators = [:(Base.(:!))]

const elementary_functions = [:(Base.abs),
:(Base.abs2),
:(Base.sign),
:(Base.acos),
:(Base.acosh),
Expand Down Expand Up @@ -146,14 +147,7 @@ const bit_operators = [:(Base.(:&)),
:(Base.(:|)),
:(Base.(:$))]

const unary_vector_operators = [:(Base.minimum),
:(Base.maximum),
:(Base.prod),
:(Base.sum),
:(Base.mean),
:(Base.median),
:(Base.std),
:(Base.var),
const unary_vector_operators = [:(Base.median),
:(StatsBase.mad),
:(Base.norm),
:(StatsBase.skewness),
Expand Down Expand Up @@ -460,9 +454,17 @@ end
# XXX: The below should be revisited once we have a way to infer what
# the proper return type of an array should be.

# One-argument elementary functions that do something different for
# Complex
for f in (:(Base.abs), :(Base.abs2))
@eval begin
@dataarray_unary $(f) Complex T.parameters[1]
end
end

# One-argument elementary functions that return the same type as their
# inputs
for f in (:(Base.abs), :(Base.conj), :(Base.sign))
for f in (:(Base.abs), :(Base.abs2), :(Base.conj), :(Base.sign))
@eval begin
$(f)(::NAtype) = NA
@dataarray_unary $(f) Number T
Expand Down Expand Up @@ -672,7 +674,8 @@ Base.(:.^)(::MathConst{:e}, B::AbstractDataArray) = exp(B)

for f in (:(Base.(:+)), :(Base.(:.+)), :(Base.(:-)), :(Base.(:.-)),
:(Base.(:*)), :(Base.(:.*)), :(Base.(:.^)), :(Base.div),
:(Base.mod), :(Base.fld), :(Base.rem))
:(Base.mod), :(Base.fld), :(Base.rem), :(Base.min),
:(Base.max))
@eval begin
# Scalar with NA
($f)(::NAtype, ::NAtype) = NA
Expand Down
Loading