Skip to content

RFC: Preparation for Julia 0.6 #1164

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Mar 11, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@

language: julia
julia:
- 0.4
- 0.5
- nightly
os:
Expand All @@ -17,4 +16,3 @@ script:
after_success:
- julia -e 'cd(Pkg.dir("DataFrames")); Pkg.add("Documenter"); Pkg.add("Query"); include(joinpath("docs", "make.jl"))'
- julia -e 'cd(Pkg.dir("DataFrames")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())'

5 changes: 2 additions & 3 deletions REQUIRE
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
julia 0.4
julia 0.5
DataArrays 0.3.4
StatsBase 0.11.0
GZip
SortingAlgorithms
Reexport
Compat 0.8.4
Compat 0.18.0
FileIO 0.1.2
Juno 0.2.4
2 changes: 0 additions & 2 deletions appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
environment:
matrix:
- JULIAVERSION: "julialang/bin/winnt/x86/0.4/julia-0.4-latest-win32.exe"
- JULIAVERSION: "julialang/bin/winnt/x64/0.4/julia-0.4-latest-win64.exe"
- JULIAVERSION: "julialang/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe"
- JULIAVERSION: "julialang/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe"
- JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe"
Expand Down
8 changes: 3 additions & 5 deletions docs/src/lib/utilities.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ Pages = ["utilities.md"]
```

...

```@docs
eltypes
head
complete_cases
complete_cases!
completecases
completecases!
describe
dump
names!
Expand All @@ -26,5 +26,3 @@ tail
unique
unique!
```


15 changes: 5 additions & 10 deletions src/DataFrames.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
VERSION >= v"0.4.0-dev+6521" && __precompile__(true)
__precompile__()

module DataFrames

Expand All @@ -9,7 +9,6 @@ module DataFrames
##############################################################################

using Compat
import Compat.String
using Reexport
@reexport using StatsBase
@reexport using DataArrays
Expand Down Expand Up @@ -54,15 +53,16 @@ export @~,
coefnames,
colwise,
combine,
complete_cases,
complete_cases!,
completecases,
completecases!,
setcontrasts!,
deleterows!,
describe,
eachcol,
eachrow,
eltypes,
groupby,
head,
melt,
meltdf,
names!,
Expand All @@ -80,6 +80,7 @@ export @~,
showcols,
stack,
stackdf,
tail,
unique!,
unstack,
writetable,
Expand All @@ -93,12 +94,6 @@ export @~,
##
##############################################################################

if VERSION < v"0.5.0-dev+2023"
_displaysize(x...) = Base.tty_size()
else
const _displaysize = Base.displaysize
end

for (dir, filename) in [
("other", "utils.jl"),
("other", "index.jl"),
Expand Down
58 changes: 30 additions & 28 deletions src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ The following are normally implemented for AbstractDataFrames:
* [`tail`]({ref}) : last `n` rows
* `convert` : convert to an array
* `DataArray` : convert to a DataArray
* [`complete_cases`]({ref}) : indexes of complete cases (rows with no NA's)
* [`complete_cases!`]({ref}) : remove rows with NA's
* [`completecases`]({ref}) : indexes of complete cases (rows with no NA's)
* [`completecases!`]({ref}) : remove rows with NA's
* [`nonunique`]({ref}) : indexes of duplicate rows
* [`unique!`]({ref}) : remove duplicate rows
* `similar` : a DataFrame with similar columns as `d`
Expand Down Expand Up @@ -59,7 +59,7 @@ d[[1:3; 5], :]

`setindex` works similarly.
"""
abstract AbstractDataFrame
@compat abstract type AbstractDataFrame end

##############################################################################
##
Expand Down Expand Up @@ -165,10 +165,10 @@ rename(f::Function, df::AbstractDataFrame)

```julia
df = DataFrame(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
rename(x -> @compat(Symbol)(uppercase(string(x))), df)
rename(df, @compat(Dict(:i=>:A, :x=>:X)))
rename(x -> Symbol(uppercase(string(x))), df)
rename(df, Dict(:i=>:A, :x=>:X))
rename(df, :y, :Y)
rename!(df, @compat(Dict(:i=>:A, :x=>:X)))
rename!(df, Dict(:i=>:A, :x=>:X))
```

"""
Expand Down Expand Up @@ -199,7 +199,7 @@ eltypes(df)
"""
function eltypes(df::AbstractDataFrame)
ncols = size(df, 2)
res = Array(Type, ncols)
res = Vector{Type}(ncols)
for j in 1:ncols
res[j] = eltype(df[j])
end
Expand Down Expand Up @@ -231,10 +231,10 @@ Base.ndims(::AbstractDataFrame) = 2
Base.similar(df::AbstractDataFrame, dims::Int) =
DataFrame(Any[similar(x, dims) for x in columns(df)], copy(index(df)))

nas{T}(dv::AbstractArray{T}, dims::@compat(Union{Int, Tuple{Vararg{Int}}})) = # TODO move to datavector.jl?
DataArray(Array(T, dims), trues(dims))
nas{T}(dv::AbstractArray{T}, dims::Union{Int, Tuple{Vararg{Int}}}) = # TODO move to datavector.jl?
DataArray(Array{T}(dims), trues(dims))

nas{T,R}(dv::PooledDataArray{T,R}, dims::@compat(Union{Int, Tuple{Vararg{Int}}})) =
nas{T,R}(dv::PooledDataArray{T,R}, dims::Union{Int, Tuple{Vararg{Int}}}) =
PooledDataArray(DataArrays.RefArray(zeros(R, dims)), dv.pool)

nas(df::AbstractDataFrame, dims::Int) =
Expand Down Expand Up @@ -285,10 +285,10 @@ Base.isempty(df::AbstractDataFrame) = ncol(df) == 0
##
##############################################################################

DataArrays.head(df::AbstractDataFrame, r::Int) = df[1:min(r,nrow(df)), :]
DataArrays.head(df::AbstractDataFrame) = head(df, 6)
DataArrays.tail(df::AbstractDataFrame, r::Int) = df[max(1,nrow(df)-r+1):nrow(df), :]
DataArrays.tail(df::AbstractDataFrame) = tail(df, 6)
head(df::AbstractDataFrame, r::Int) = df[1:min(r,nrow(df)), :]
head(df::AbstractDataFrame) = head(df, 6)
tail(df::AbstractDataFrame, r::Int) = df[max(1,nrow(df)-r+1):nrow(df), :]
tail(df::AbstractDataFrame) = tail(df, 6)

"""
Show the first or last part of an AbstractDataFrame
Expand Down Expand Up @@ -443,7 +443,7 @@ end
Indexes of complete cases (rows without NA's)

```julia
complete_cases(df::AbstractDataFrame)
completecases(df::AbstractDataFrame)
```

**Arguments**
Expand All @@ -454,23 +454,23 @@ complete_cases(df::AbstractDataFrame)

* `::Vector{Bool}` : indexes of complete cases

See also [`complete_cases!`]({ref}).
See also [`completecases!`]({ref}).

**Examples**

```julia
df = DataFrame(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
df[[1,4,5], :x] = NA
df[[9,10], :y] = NA
complete_cases(df)
completecases(df)
```

"""
function complete_cases(df::AbstractDataFrame)
function completecases(df::AbstractDataFrame)
## Returns a Vector{Bool} of indexes of complete cases (rows with no NA's).
res = !isna(df[1])
res = (!).(isna(df[1]))
for i in 2:ncol(df)
res &= !isna(df[i])
res .&= (!).(isna(df[i]))
end
res
end
Expand All @@ -479,7 +479,7 @@ end
Delete rows with NA's.

```julia
complete_cases!(df::AbstractDataFrame)
completecases!(df::AbstractDataFrame)
```

**Arguments**
Expand All @@ -490,19 +490,19 @@ complete_cases!(df::AbstractDataFrame)

* `::AbstractDataFrame` : the updated version

See also [`complete_cases`]({ref}).
See also [`completecases`]({ref}).

**Examples**

```julia
df = DataFrame(i = 1:10, x = rand(10), y = rand(["a", "b", "c"], 10))
df[[1,4,5], :x] = NA
df[[9,10], :y] = NA
complete_cases!(df)
completecases!(df)
```

"""
complete_cases!(df::AbstractDataFrame) = deleterows!(df, find(!complete_cases(df)))
completecases!(df::AbstractDataFrame) = deleterows!(df, find(!, completecases(df)))

function Base.convert(::Type{Array}, df::AbstractDataFrame)
convert(Matrix, df)
Expand All @@ -516,7 +516,7 @@ function Base.convert{T}(::Type{Array{T}}, df::AbstractDataFrame)
end
function Base.convert{T}(::Type{Matrix{T}}, df::AbstractDataFrame)
n, p = size(df)
res = Array(T, n, p)
res = Matrix{T}(n, p)
idx = 1
for col in columns(df)
anyna(col) && error("DataFrame contains NAs")
Expand Down Expand Up @@ -598,8 +598,8 @@ unique!(df::AbstractDataFrame) = deleterows!(df, find(nonunique(df)))
unique!(df::AbstractDataFrame, cols::Any) = deleterows!(df, find(nonunique(df, cols)))

# Unique rows of an AbstractDataFrame.
Base.unique(df::AbstractDataFrame) = df[!nonunique(df), :]
Base.unique(df::AbstractDataFrame, cols::Any) = df[!nonunique(df, cols), :]
Base.unique(df::AbstractDataFrame) = df[(!).(nonunique(df)), :]
Base.unique(df::AbstractDataFrame, cols::Any) = df[(!).(nonunique(df, cols)), :]

"""
Delete duplicate rows
Expand Down Expand Up @@ -680,8 +680,10 @@ without(df::AbstractDataFrame, c::Any) = without(df, index(df)[c])

# catch-all to cover cases where indexing returns a DataFrame and copy doesn't
Base.hcat(df::AbstractDataFrame, x) = hcat!(df[:, :], x)
Base.hcat(df1::AbstractDataFrame, df2::AbstractDataFrame) = hcat!(df[:, :], df2)

Base.hcat(df::AbstractDataFrame, x, y...) = hcat!(hcat(df, x), y...)
Base.hcat(df1::AbstractDataFrame, df2::AbstractDataFrame, dfn::AbstractDataFrame...) = hcat!(hcat(df1, df2), dfn...)

# vcat only accepts DataFrames. Finds union of columns, maintaining order
# of first df. Missing data becomes NAs.
Expand Down Expand Up @@ -770,7 +772,7 @@ function Base.hash(df::AbstractDataFrame)
for i in 1:size(df, 2)
h = hash(df[i], h)
end
return @compat UInt(h)
return UInt(h)
end


Expand Down
16 changes: 8 additions & 8 deletions src/abstractdataframe/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,17 @@ function printtable(io::IO,
for j in 1:p
if ! (isna(df[j],i))
if ! (etypes[j] <: Real)
print(io, quotemark)
escapedprint(io, df[i, j], quotestr)
print(io, quotemark)
print(io, quotemark)
escapedprint(io, df[i, j], quotestr)
print(io, quotemark)
else
print(io, df[i, j])
print(io, df[i, j])
end
else
print(io, nastring)
print(io, nastring)
end
if j < p
print(io, separator)
print(io, separator)
else
print(io, '\n')
end
Expand Down Expand Up @@ -133,7 +133,7 @@ function writetable(filename::AbstractString,
# When 'append'-ing to a nonempty file,
# 'header' triggers a check for matching colnames
if header
if any(i -> @compat(Symbol(file_df[1, i])) != index(df)[i], 1:size(df, 2))
if any(i -> Symbol(file_df[1, i]) != index(df)[i], 1:size(df, 2))
throw(KeyError("Column names don't match names in file"))
end

Expand Down Expand Up @@ -181,7 +181,7 @@ end
write(io, "</tr>")
write(io, "</thead>")
write(io, "<tbody>")
tty_rows, tty_cols = _displaysize(io)
tty_rows, tty_cols = displaysize(io)
mxrow = min(n,tty_rows)
for row in 1:mxrow
write(io, "<tr>")
Expand Down
12 changes: 6 additions & 6 deletions src/abstractdataframe/join.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ function join_idx(left, right, max_groups)
left_pos = 0
right_pos = 0

left_indexer = Array(Int, tcount)
right_indexer = Array(Int, tcount)
leftonly_indexer = Array(Int, lcount)
rightonly_indexer = Array(Int, rcount)
left_indexer = Vector{Int}(tcount)
right_indexer = Vector{Int}(tcount)
leftonly_indexer = Vector{Int}(lcount)
rightonly_indexer = Vector{Int}(rcount)
for i in 1:(max_groups + 1)
lc = left_count[i]
rc = right_count[i]
Expand Down Expand Up @@ -113,7 +113,7 @@ function DataArrays.PooledDataArray{R}(df::AbstractDataFrame, ::Type{R})
# might be faster.
refs = zeros(R, nrow(df))
poolref = Dict{AbstractDataFrame, Int}()
pool = Array(UInt64, 0)
pool = Vector{UInt64}(0)
j = 1
for i = 1:nrow(df)
val = df[i,:]
Expand Down Expand Up @@ -188,7 +188,7 @@ join(name, job, kind = :cross)
"""
function Base.join(df1::AbstractDataFrame,
df2::AbstractDataFrame;
on::@compat(Union{Symbol, Vector{Symbol}}) = Symbol[],
on::Union{Symbol, Vector{Symbol}} = Symbol[],
kind::Symbol = :inner)
if kind == :cross
if on != Symbol[]
Expand Down
10 changes: 5 additions & 5 deletions src/abstractdataframe/reshape.jl
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ end
Stacks a DataFrame; convert from a wide to long format; see
`stack`.
"""
melt(df::AbstractDataFrame, id_vars::@compat(Union{Int,Symbol})) = melt(df, [id_vars])
melt(df::AbstractDataFrame, id_vars::Union{Int,Symbol}) = melt(df, [id_vars])
function melt(df::AbstractDataFrame, id_vars)
id_inds = index(df)[id_vars]
stack(df, _setdiff(1:ncol(df), id_inds), id_inds)
Expand Down Expand Up @@ -173,8 +173,8 @@ function unstack(df::AbstractDataFrame, rowkey::Int, colkey::Int, value::Int)
payload = DataFrame(Any[DataArray(eltype(valuecol), Nrow) for i in 1:Ncol], map(Symbol, keycol.pool))
nowarning = true
for k in 1:nrow(df)
j = @compat Int(keycol.refs[k])
i = @compat Int(refkeycol.refs[k])
j = Int(keycol.refs[k])
i = Int(refkeycol.refs[k])
if i > 0 && j > 0
if nowarning && !isna(payload[j][i])
warn("Duplicate entries in unstack.")
Expand Down Expand Up @@ -206,10 +206,10 @@ function unstack(df::AbstractDataFrame, colkey::Int, value::Int)
keys = unique(keycol)
Nrow = length(g)
Ncol = length(keycol.pool)
df2 = DataFrame(Any[DataArray(fill(valuecol[1], Nrow), fill(true, Nrow)) for i in 1:Ncol], map(@compat(Symbol), keycol.pool))
df2 = DataFrame(Any[DataArray(fill(valuecol[1], Nrow), fill(true, Nrow)) for i in 1:Ncol], map(Symbol, keycol.pool))
nowarning = true
for k in 1:nrow(df)
j = @compat Int(keycol.refs[k])
j = Int(keycol.refs[k])
i = rowkey[k]
if i > 0 && j > 0
if nowarning && !isna(df2[j][i])
Expand Down
Loading