diff --git a/01_constructors.ipynb b/01_constructors.ipynb
index baecd5a..70a5726 100644
--- a/01_constructors.ipynb
+++ b/01_constructors.ipynb
@@ -5,7 +5,7 @@
"metadata": {},
"source": [
"# Introduction to DataFrames\n",
- "**[Bogumił Kamiński](http://bogumilkaminski.pl/about/), May 23, 2018**\n",
+ "**[Bogumił Kamiński](http://bogumilkaminski.pl/about/), July 25, 2018**\n",
"\n",
"Let's get started by loading the `DataFrames` package."
]
@@ -13,9 +13,7 @@
{
"cell_type": "code",
"execution_count": 1,
- "metadata": {
- "collapsed": true
- },
+ "metadata": {},
"outputs": [],
"source": [
"using DataFrames"
@@ -77,15 +75,15 @@
{
"data": {
"text/html": [
- "
| A | B | C |
---|
1 | 1 | 0.865057 | Jds |
---|
2 | 2 | 0.870442 | hDG |
---|
3 | 3 | 0.128666 | oyt |
---|
"
+ " | A | B | C |
---|
1 | 1 | 0.582939 | eDh |
---|
2 | 2 | 0.899657 | Uma |
---|
3 | 3 | 0.873748 | RnO |
---|
"
],
"text/plain": [
"3×3 DataFrames.DataFrame\n",
"│ Row │ A │ B │ C │\n",
"├─────┼───┼──────────┼─────┤\n",
- "│ 1 │ 1 │ 0.865057 │ Jds │\n",
- "│ 2 │ 2 │ 0.870442 │ hDG │\n",
- "│ 3 │ 3 │ 0.128666 │ oyt │"
+ "│ 1 │ 1 │ 0.582939 │ eDh │\n",
+ "│ 2 │ 2 │ 0.899657 │ Uma │\n",
+ "│ 3 │ 3 │ 0.873748 │ RnO │"
]
},
"execution_count": 3,
@@ -183,15 +181,15 @@
{
"data": {
"text/html": [
- " | x1 | x2 | x3 |
---|
1 | 0.738969 | 0.476396 | 0.926968 |
---|
2 | 0.498559 | 0.190063 | 0.839678 |
---|
3 | 0.0957712 | 0.843156 | 0.120698 |
---|
"
+ " | x1 | x2 | x3 |
---|
1 | 0.291477 | 0.589915 | 0.321527 |
---|
2 | 0.230679 | 0.0387716 | 0.539359 |
---|
3 | 0.481537 | 0.494867 | 0.649062 |
---|
"
],
"text/plain": [
"3×3 DataFrames.DataFrame\n",
- "│ Row │ x1 │ x2 │ x3 │\n",
- "├─────┼───────────┼──────────┼──────────┤\n",
- "│ 1 │ 0.738969 │ 0.476396 │ 0.926968 │\n",
- "│ 2 │ 0.498559 │ 0.190063 │ 0.839678 │\n",
- "│ 3 │ 0.0957712 │ 0.843156 │ 0.120698 │"
+ "│ Row │ x1 │ x2 │ x3 │\n",
+ "├─────┼──────────┼───────────┼──────────┤\n",
+ "│ 1 │ 0.291477 │ 0.589915 │ 0.321527 │\n",
+ "│ 2 │ 0.230679 │ 0.0387716 │ 0.539359 │\n",
+ "│ 3 │ 0.481537 │ 0.494867 │ 0.649062 │"
]
},
"execution_count": 6,
@@ -222,11 +220,11 @@
"\u001b[1m\u001b[33mWARNING: \u001b[39m\u001b[22m\u001b[33mpassing columns argument with non-AbstractVector entries is deprecated\u001b[39m\n",
"Stacktrace:\n",
" [1] \u001b[1mdepwarn\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::Symbol\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\deprecated.jl:70\u001b[22m\u001b[22m\n",
- " [2] \u001b[1m#DataFrame#57\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Type{T} where T, ::Array{Float64,1}, ::Array{Symbol,1}\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\dataframe\\dataframe.jl:154\u001b[22m\u001b[22m\n",
+ " [2] \u001b[1m#DataFrame#62\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Type{T} where T, ::Array{Float64,1}, ::Array{Symbol,1}\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\dataframe\\dataframe.jl:154\u001b[22m\u001b[22m\n",
" [3] \u001b[1mDataFrames.DataFrame\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{Float64,1}\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\dataframe\\dataframe.jl:152\u001b[22m\u001b[22m\n",
" [4] \u001b[1minclude_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\loading.jl:522\u001b[22m\u001b[22m\n",
" [5] \u001b[1mexecute_request\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket, ::IJulia.Msg\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\IJulia\\src\\execute_request.jl:158\u001b[22m\u001b[22m\n",
- " [6] \u001b[1m(::Compat.#inner#17{Array{Any,1},IJulia.#execute_request,Tuple{ZMQ.Socket,IJulia.Msg}})\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\Compat\\src\\Compat.jl:385\u001b[22m\u001b[22m\n",
+ " [6] \u001b[1m(::Compat.#inner#14{Array{Any,1},IJulia.#execute_request,Tuple{ZMQ.Socket,IJulia.Msg}})\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\Compat\\src\\Compat.jl:332\u001b[22m\u001b[22m\n",
" [7] \u001b[1meventloop\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\IJulia\\src\\eventloop.jl:8\u001b[22m\u001b[22m\n",
" [8] \u001b[1m(::IJulia.##14#17)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\task.jl:335\u001b[22m\u001b[22m\n",
"while loading In[7], in expression starting on line 1\n"
@@ -235,13 +233,13 @@
{
"data": {
"text/html": [
- " | x1 | x2 | x3 |
---|
1 | 0.613673 | 0.9388 | 0.976714 |
---|
"
+ " | x1 | x2 | x3 |
---|
1 | 0.694614 | 0.305183 | 0.375302 |
---|
"
],
"text/plain": [
"1×3 DataFrames.DataFrame\n",
- "│ Row │ x1 │ x2 │ x3 │\n",
- "├─────┼──────────┼────────┼──────────┤\n",
- "│ 1 │ 0.613673 │ 0.9388 │ 0.976714 │"
+ "│ Row │ x1 │ x2 │ x3 │\n",
+ "├─────┼──────────┼──────────┼──────────┤\n",
+ "│ 1 │ 0.694614 │ 0.305183 │ 0.375302 │"
]
},
"execution_count": 7,
@@ -283,7 +281,7 @@
}
],
"source": [
- "DataFrame(transpose([1, 2, 3]))"
+ "DataFrame(transpose([1, 2, 3])) # permutedims in Julia 0.7"
]
},
{
@@ -336,15 +334,15 @@
{
"data": {
"text/html": [
- " | x1 | x2 | x3 | x4 |
---|
1 | 0.874557 | 0.777246 | 0.949467 | 0.697868 |
---|
2 | 0.579164 | 0.816029 | 0.191466 | 0.0563065 |
---|
3 | 0.280777 | 0.795716 | 0.201309 | 0.191633 |
---|
"
+ " | x1 | x2 | x3 | x4 |
---|
1 | 0.957768 | 0.222008 | 0.569215 | 0.553819 |
---|
2 | 0.0885923 | 0.49824 | 0.61231 | 0.985774 |
---|
3 | 0.152064 | 0.804575 | 0.825947 | 0.653275 |
---|
"
],
"text/plain": [
"3×4 DataFrames.DataFrame\n",
- "│ Row │ x1 │ x2 │ x3 │ x4 │\n",
- "├─────┼──────────┼──────────┼──────────┼───────────┤\n",
- "│ 1 │ 0.874557 │ 0.777246 │ 0.949467 │ 0.697868 │\n",
- "│ 2 │ 0.579164 │ 0.816029 │ 0.191466 │ 0.0563065 │\n",
- "│ 3 │ 0.280777 │ 0.795716 │ 0.201309 │ 0.191633 │"
+ "│ Row │ x1 │ x2 │ x3 │ x4 │\n",
+ "├─────┼───────────┼──────────┼──────────┼──────────┤\n",
+ "│ 1 │ 0.957768 │ 0.222008 │ 0.569215 │ 0.553819 │\n",
+ "│ 2 │ 0.0885923 │ 0.49824 │ 0.61231 │ 0.985774 │\n",
+ "│ 3 │ 0.152064 │ 0.804575 │ 0.825947 │ 0.653275 │"
]
},
"execution_count": 10,
@@ -371,15 +369,15 @@
{
"data": {
"text/html": [
- " | a | b | c | d |
---|
1 | 0.627336 | 0.850384 | 0.225164 | 0.617465 |
---|
2 | 0.645045 | 0.709581 | 0.0780468 | 0.0941601 |
---|
3 | 0.48563 | 0.608378 | 0.777213 | 0.630866 |
---|
"
+ " | a | b | c | d |
---|
1 | 0.188787 | 0.903936 | 0.500736 | 0.811463 |
---|
2 | 0.341302 | 0.557099 | 0.41954 | 0.382749 |
---|
3 | 0.0751378 | 0.430165 | 0.293501 | 0.253147 |
---|
"
],
"text/plain": [
"3×4 DataFrames.DataFrame\n",
- "│ Row │ a │ b │ c │ d │\n",
- "├─────┼──────────┼──────────┼───────────┼───────────┤\n",
- "│ 1 │ 0.627336 │ 0.850384 │ 0.225164 │ 0.617465 │\n",
- "│ 2 │ 0.645045 │ 0.709581 │ 0.0780468 │ 0.0941601 │\n",
- "│ 3 │ 0.48563 │ 0.608378 │ 0.777213 │ 0.630866 │"
+ "│ Row │ a │ b │ c │ d │\n",
+ "├─────┼───────────┼──────────┼──────────┼──────────┤\n",
+ "│ 1 │ 0.188787 │ 0.903936 │ 0.500736 │ 0.811463 │\n",
+ "│ 2 │ 0.341302 │ 0.557099 │ 0.41954 │ 0.382749 │\n",
+ "│ 3 │ 0.0751378 │ 0.430165 │ 0.293501 │ 0.253147 │"
]
},
"execution_count": 11,
@@ -408,13 +406,13 @@
{
"data": {
"text/html": [
- " | A | B | C |
---|
1 | 147259888 | 1.08013e-319 | missing |
---|
"
+ ""
],
"text/plain": [
"1×3 DataFrames.DataFrame\n",
- "│ Row │ A │ B │ C │\n",
- "├─────┼───────────┼──────────────┼─────────┤\n",
- "│ 1 │ 147259888 │ 1.08013e-319 │ \u001b[90mmissing\u001b[39m │"
+ "│ Row │ A │ B │ C │\n",
+ "├─────┼───┼─────┼─────────┤\n",
+ "│ 1 │ 0 │ 0.0 │ \u001b[90mmissing\u001b[39m │"
]
},
"execution_count": 12,
@@ -430,9 +428,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "Here we create a `DataFrame`, but column `:C` is #undef and Jupyter has problem with displaying it. (This works OK at the REPL.)\n",
- "\n",
- "This will be fixed in next release of DataFrames!"
+ "Here we create a `DataFrame` where `:C` is #undef"
]
},
{
@@ -441,24 +437,20 @@
"metadata": {},
"outputs": [
{
- "ename": "UndefRefError",
- "evalue": "\u001b[91mUndefRefError: access to undefined reference\u001b[39m",
- "output_type": "error",
- "traceback": [
- "\u001b[91mUndefRefError: access to undefined reference\u001b[39m",
- "",
- "Stacktrace:",
- " [1] \u001b[1mgetindex\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{String,1}, ::Int64\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\array.jl:554\u001b[22m\u001b[22m",
- " [2] \u001b[1mgetindex\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::DataFrames.DataFrame, ::Int64, ::Symbol\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\dataframe\\dataframe.jl:275\u001b[22m\u001b[22m",
- " [3] \u001b[1mshow\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::IOContext{Base.AbstractIOBuffer{Array{UInt8,1}}}, ::MIME{Symbol(\"text/html\")}, ::DataFrames.DataFrame\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\abstractdataframe\\io.jl:110\u001b[22m\u001b[22m",
- " [4] \u001b[1mlimitstringmime\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::MIME{Symbol(\"text/html\")}, ::DataFrames.DataFrame\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\IJulia\\src\\inline.jl:24\u001b[22m\u001b[22m",
- " [5] \u001b[1mdisplay_dict\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::DataFrames.DataFrame\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\IJulia\\src\\execute_request.jl:43\u001b[22m\u001b[22m",
- " [6] \u001b[1m(::Compat.#inner#17{Array{Any,1},IJulia.#display_dict,Tuple{DataFrames.DataFrame}})\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\Compat\\src\\Compat.jl:385\u001b[22m\u001b[22m",
- " [7] \u001b[1mexecute_request\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket, ::IJulia.Msg\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\IJulia\\src\\execute_request.jl:186\u001b[22m\u001b[22m",
- " [8] \u001b[1m(::Compat.#inner#17{Array{Any,1},IJulia.#execute_request,Tuple{ZMQ.Socket,IJulia.Msg}})\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\Compat\\src\\Compat.jl:385\u001b[22m\u001b[22m",
- " [9] \u001b[1meventloop\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\IJulia\\src\\eventloop.jl:8\u001b[22m\u001b[22m",
- " [10] \u001b[1m(::IJulia.##14#17)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\task.jl:335\u001b[22m\u001b[22m"
- ]
+ "data": {
+ "text/html": [
+ ""
+ ],
+ "text/plain": [
+ "1×3 DataFrames.DataFrame\n",
+ "│ Row │ A │ B │ C │\n",
+ "├─────┼────┼──────────────┼────────┤\n",
+ "│ 1 │ -1 │ 2.03553e-315 │ #undef │"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
@@ -510,15 +502,15 @@
{
"data": {
"text/html": [
- " | x1 | x2 | x3 | x4 | x5 |
---|
1 | 202568848 | 199866128 | 1649267441664 | 0 | 439701264 |
---|
2 | 147292368 | 147292368 | 0 | 0 | 148642768 |
---|
3 | 147293072 | 147292432 | 0 | 0 | 439701424 |
---|
"
+ " | x1 | x2 | x3 | x4 | x5 |
---|
1 | 166116144 | 165712048 | 166110096 | 169519056 | 165712048 |
---|
2 | 164408368 | 167317968 | 111313296 | 111313680 | 164408368 |
---|
3 | 111313296 | 141233040 | 141233200 | 141238528 | 141233232 |
---|
"
],
"text/plain": [
"3×5 DataFrames.DataFrame\n",
- "│ Row │ x1 │ x2 │ x3 │ x4 │ x5 │\n",
- "├─────┼───────────┼───────────┼───────────────┼────┼───────────┤\n",
- "│ 1 │ 202568848 │ 199866128 │ 1649267441664 │ 0 │ 439701264 │\n",
- "│ 2 │ 147292368 │ 147292368 │ 0 │ 0 │ 148642768 │\n",
- "│ 3 │ 147293072 │ 147292432 │ 0 │ 0 │ 439701424 │"
+ "│ Row │ x1 │ x2 │ x3 │ x4 │ x5 │\n",
+ "├─────┼───────────┼───────────┼───────────┼───────────┼───────────┤\n",
+ "│ 1 │ 166116144 │ 165712048 │ 166110096 │ 169519056 │ 165712048 │\n",
+ "│ 2 │ 164408368 │ 167317968 │ 111313296 │ 111313680 │ 164408368 │\n",
+ "│ 3 │ 111313296 │ 141233040 │ 141233200 │ 141238528 │ 141233232 │"
]
},
"execution_count": 15,
@@ -545,16 +537,16 @@
{
"data": {
"text/html": [
- " | x1 | x2 |
---|
1 | 148644688 | 2.17246e-315 |
---|
2 | 147261328 | 2.17242e-315 |
---|
3 | 147261328 | 2.17246e-315 |
---|
4 | 0 | 2.20078e-315 |
---|
"
+ " | x1 | x2 |
---|
1 | 166692528 | 8.26715e-316 |
---|
2 | 141234768 | 2.11841e-315 |
---|
3 | 111411280 | 6.97793e-316 |
---|
4 | 144117920 | 0.0 |
---|
"
],
"text/plain": [
"4×2 DataFrames.DataFrame\n",
"│ Row │ x1 │ x2 │\n",
"├─────┼───────────┼──────────────┤\n",
- "│ 1 │ 148644688 │ 2.17246e-315 │\n",
- "│ 2 │ 147261328 │ 2.17242e-315 │\n",
- "│ 3 │ 147261328 │ 2.17246e-315 │\n",
- "│ 4 │ 0 │ 2.20078e-315 │"
+ "│ 1 │ 166692528 │ 8.26715e-316 │\n",
+ "│ 2 │ 141234768 │ 2.11841e-315 │\n",
+ "│ 3 │ 111411280 │ 6.97793e-316 │\n",
+ "│ 4 │ 144117920 │ 0.0 │"
]
},
"execution_count": 16,
@@ -850,7 +842,7 @@
"\u001b[91mcannot convert a DataFrame containing missing values to array (found for column y)\u001b[39m",
"",
"Stacktrace:",
- " [1] \u001b[1mconvert\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Type{Array{Int64,2}}, ::DataFrames.DataFrame\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\abstractdataframe\\abstractdataframe.jl:626\u001b[22m\u001b[22m",
+ " [1] \u001b[1mconvert\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Type{Array{Int64,2}}, ::DataFrames.DataFrame\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\abstractdataframe\\abstractdataframe.jl:716\u001b[22m\u001b[22m",
" [2] \u001b[1mArray{Int64,2}\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::DataFrames.DataFrame\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\sysimg.jl:77\u001b[22m\u001b[22m"
]
}
@@ -913,15 +905,15 @@
"\u001b[1m\u001b[33mWARNING: \u001b[39m\u001b[22m\u001b[33mDuplicate variable names are deprecated: pass makeunique=true to add a suffix automatically.\u001b[39m\n",
"Stacktrace:\n",
" [1] \u001b[1mdepwarn\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::Symbol\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\deprecated.jl:70\u001b[22m\u001b[22m\n",
- " [2] \u001b[1m#make_unique#3\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Function, ::Array{Symbol,1}\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\other\\utils.jl:61\u001b[22m\u001b[22m\n",
+ " [2] \u001b[1m#make_unique#3\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Function, ::Array{Symbol,1}\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\other\\utils.jl:64\u001b[22m\u001b[22m\n",
" [3] \u001b[1m(::DataFrames.#kw##make_unique)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{Any,1}, ::DataFrames.#make_unique, ::Array{Symbol,1}\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\:0\u001b[22m\u001b[22m\n",
" [4] \u001b[1m#Index#6\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\other\\index.jl:12\u001b[22m\u001b[22m [inlined]\n",
" [5] \u001b[1m(::Core.#kw#Type)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{Any,1}, ::Type{DataFrames.Index}, ::Array{Symbol,1}\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\:0\u001b[22m\u001b[22m\n",
- " [6] \u001b[1m#DataFrame#47\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Type{T} where T, ::Pair{Symbol,Int64}, ::Vararg{Pair{Symbol,Int64},N} where N\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\dataframe\\dataframe.jl:126\u001b[22m\u001b[22m\n",
+ " [6] \u001b[1m#DataFrame#52\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Type{T} where T, ::Pair{Symbol,Int64}, ::Vararg{Pair{Symbol,Int64},N} where N\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\dataframe\\dataframe.jl:126\u001b[22m\u001b[22m\n",
" [7] \u001b[1mDataFrames.DataFrame\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Pair{Symbol,Int64}, ::Pair{Symbol,Int64}, ::Pair{Symbol,Int64}\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\dataframe\\dataframe.jl:124\u001b[22m\u001b[22m\n",
" [8] \u001b[1minclude_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\loading.jl:522\u001b[22m\u001b[22m\n",
" [9] \u001b[1mexecute_request\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket, ::IJulia.Msg\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\IJulia\\src\\execute_request.jl:158\u001b[22m\u001b[22m\n",
- " [10] \u001b[1m(::Compat.#inner#17{Array{Any,1},IJulia.#execute_request,Tuple{ZMQ.Socket,IJulia.Msg}})\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\Compat\\src\\Compat.jl:385\u001b[22m\u001b[22m\n",
+ " [10] \u001b[1m(::Compat.#inner#14{Array{Any,1},IJulia.#execute_request,Tuple{ZMQ.Socket,IJulia.Msg}})\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\Compat\\src\\Compat.jl:332\u001b[22m\u001b[22m\n",
" [11] \u001b[1meventloop\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\IJulia\\src\\eventloop.jl:8\u001b[22m\u001b[22m\n",
" [12] \u001b[1m(::IJulia.##14#17)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\task.jl:335\u001b[22m\u001b[22m\n",
"while loading In[28], in expression starting on line 1\n"
@@ -968,17 +960,17 @@
"\u001b[1m\u001b[33mWARNING: \u001b[39m\u001b[22m\u001b[33mDuplicate variable names are deprecated: pass makeunique=true to add a suffix automatically.\u001b[39m\n",
"Stacktrace:\n",
" [1] \u001b[1mdepwarn\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::Symbol\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\deprecated.jl:70\u001b[22m\u001b[22m\n",
- " [2] \u001b[1m#make_unique#3\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Function, ::Array{Symbol,1}\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\other\\utils.jl:61\u001b[22m\u001b[22m\n",
+ " [2] \u001b[1m#make_unique#3\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Function, ::Array{Symbol,1}\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\other\\utils.jl:64\u001b[22m\u001b[22m\n",
" [3] \u001b[1m(::DataFrames.#kw##make_unique)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{Any,1}, ::DataFrames.#make_unique, ::Array{Symbol,1}\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\:0\u001b[22m\u001b[22m\n",
" [4] \u001b[1m#Index#6\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\other\\index.jl:12\u001b[22m\u001b[22m [inlined]\n",
" [5] \u001b[1m(::Core.#kw#Type)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{Any,1}, ::Type{DataFrames.Index}, ::Array{Symbol,1}\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\:0\u001b[22m\u001b[22m\n",
- " [6] \u001b[1m#DataFrame#47\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Type{T} where T, ::Pair{Symbol,Int64}, ::Vararg{Pair{Symbol,#s8} where #s8,N} where N\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\dataframe\\dataframe.jl:126\u001b[22m\u001b[22m\n",
+ " [6] \u001b[1m#DataFrame#52\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Bool, ::Type{T} where T, ::Pair{Symbol,Int64}, ::Vararg{Pair{Symbol,#s8} where #s8,N} where N\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\dataframe\\dataframe.jl:126\u001b[22m\u001b[22m\n",
" [7] \u001b[1mDataFrames.DataFrame\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Pair{Symbol,Int64}, ::Pair{Symbol,Int64}, ::Pair{Symbol,Bool}\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\dataframe\\dataframe.jl:124\u001b[22m\u001b[22m\n",
- " [8] \u001b[1m#DataFrame#56\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\dataframe\\dataframe.jl:145\u001b[22m\u001b[22m [inlined]\n",
+ " [8] \u001b[1m#DataFrame#61\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\dataframe\\dataframe.jl:145\u001b[22m\u001b[22m [inlined]\n",
" [9] \u001b[1m(::Core.#kw#Type)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{Any,1}, ::Type{DataFrames.DataFrame}\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\:0\u001b[22m\u001b[22m\n",
" [10] \u001b[1minclude_string\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::String, ::String\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\loading.jl:522\u001b[22m\u001b[22m\n",
" [11] \u001b[1mexecute_request\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket, ::IJulia.Msg\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\IJulia\\src\\execute_request.jl:158\u001b[22m\u001b[22m\n",
- " [12] \u001b[1m(::Compat.#inner#17{Array{Any,1},IJulia.#execute_request,Tuple{ZMQ.Socket,IJulia.Msg}})\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\Compat\\src\\Compat.jl:385\u001b[22m\u001b[22m\n",
+ " [12] \u001b[1m(::Compat.#inner#14{Array{Any,1},IJulia.#execute_request,Tuple{ZMQ.Socket,IJulia.Msg}})\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\Compat\\src\\Compat.jl:332\u001b[22m\u001b[22m\n",
" [13] \u001b[1meventloop\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::ZMQ.Socket\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\IJulia\\src\\eventloop.jl:8\u001b[22m\u001b[22m\n",
" [14] \u001b[1m(::IJulia.##14#17)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\task.jl:335\u001b[22m\u001b[22m\n",
"while loading In[29], in expression starting on line 1\n"
@@ -1004,11 +996,45 @@
"source": [
"df = DataFrame(a=1, a=2, makeunique=true)"
]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Finallly observe that `nothing` is not printed when displaying a `DataFrame`:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ ""
+ ],
+ "text/plain": [
+ "2×2 DataFrames.DataFrame\n",
+ "│ Row │ x │ y │\n",
+ "├─────┼───┼───┤\n",
+ "│ 1 │ 1 │ │\n",
+ "│ 2 │ │ a │"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "DataFrame(x=[1, nothing], y=[nothing, \"a\"])"
+ ]
}
],
"metadata": {
"kernelspec": {
- "display_name": "Julia 0.6.0",
+ "display_name": "Julia 0.6.2",
"language": "julia",
"name": "julia-0.6"
},
@@ -1016,7 +1042,7 @@
"file_extension": ".jl",
"mimetype": "application/julia",
"name": "julia",
- "version": "0.6.0"
+ "version": "0.6.3"
}
},
"nbformat": 4,
diff --git a/02_basicinfo.ipynb b/02_basicinfo.ipynb
index d4107a5..a34cece 100644
--- a/02_basicinfo.ipynb
+++ b/02_basicinfo.ipynb
@@ -5,15 +5,13 @@
"metadata": {},
"source": [
"# Introduction to DataFrames\n",
- "**[Bogumił Kamiński](http://bogumilkaminski.pl/about/), May 23, 2018**"
+ "**[Bogumił Kamiński](http://bogumilkaminski.pl/about/), July 25, 2018**"
]
},
{
"cell_type": "code",
"execution_count": 1,
- "metadata": {
- "collapsed": true
- },
+ "metadata": {},
"outputs": [],
"source": [
"using DataFrames # load package"
@@ -122,75 +120,28 @@
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "A\n",
- "Summary Stats:\n",
- "Mean: 1.500000\n",
- "Minimum: 1.000000\n",
- "1st Quartile: 1.250000\n",
- "Median: 1.500000\n",
- "3rd Quartile: 1.750000\n",
- "Maximum: 2.000000\n",
- "Length: 2\n",
- "Type: Int64\n",
- "\n",
- "B\n",
- "Summary Stats:\n",
- "Mean: 1.000000\n",
- "Minimum: 1.000000\n",
- "1st Quartile: 1.000000\n",
- "Median: 1.000000\n",
- "3rd Quartile: 1.000000\n",
- "Maximum: 1.000000\n",
- "Length: 2\n",
- "Type: Union{Float64, Missings.Missing}\n",
- "Number Missing: 1\n",
- "% Missing: 50.000000\n",
- "\n",
- "C\n",
- "Summary Stats:\n",
- "Length: 2\n",
- "Type: String\n",
- "Number Unique: 2\n",
- "\n"
- ]
+ "data": {
+ "text/html": [
+ " | variable | mean | min | median | max | nunique | nmissing | eltype |
---|
1 | A | 1.5 | 1 | 1.5 | 2 | | | Int64 |
---|
2 | B | 1.0 | 1.0 | 1.0 | 1.0 | | 1 | Float64 |
---|
3 | C | | a | | b | 2 | | String |
---|
"
+ ],
+ "text/plain": [
+ "3×8 DataFrames.DataFrame\n",
+ "│ Row │ variable │ mean │ min │ median │ max │ nunique │ nmissing │ eltype │\n",
+ "├─────┼──────────┼──────┼─────┼────────┼─────┼─────────┼──────────┼─────────┤\n",
+ "│ 1 │ A │ 1.5 │ 1 │ 1.5 │ 2 │ │ │ Int64 │\n",
+ "│ 2 │ B │ 1.0 │ 1.0 │ 1.0 │ 1.0 │ │ 1 │ Float64 │\n",
+ "│ 3 │ C │ │ a │ │ b │ 2 │ │ String │"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
"describe(x)"
]
},
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Use `showcols` to get informaton about columns stored in a DataFrame."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "2×3 DataFrames.DataFrame\n",
- "│ Col # │ Name │ Eltype │ Missing │ Values │\n",
- "├───────┼──────┼──────────────────────────────────┼─────────┼─────────────────┤\n",
- "│ 1 │ A │ Int64 │ 0 │ 1 … 2 │\n",
- "│ 2 │ B │ Union{Float64, Missings.Missing} │ 1 │ 1.0 … missing │\n",
- "│ 3 │ C │ String │ 0 │ a … b │"
- ]
- }
- ],
- "source": [
- "showcols(x)"
- ]
- },
{
"cell_type": "markdown",
"metadata": {},
@@ -200,7 +151,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -212,7 +163,7 @@
" :C"
]
},
- "execution_count": 7,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -221,6 +172,13 @@
"names(x)"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Future tip: In Julia 0.7 `propertynames` is also supported."
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -230,7 +188,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
@@ -242,7 +200,7 @@
" String "
]
},
- "execution_count": 8,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -260,10 +218,8 @@
},
{
"cell_type": "code",
- "execution_count": 9,
- "metadata": {
- "collapsed": true
- },
+ "execution_count": 8,
+ "metadata": {},
"outputs": [],
"source": [
"y = DataFrame(rand(1:10, 1000, 10));"
@@ -278,27 +234,27 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- " | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 |
---|
1 | 8 | 6 | 1 | 2 | 7 | 10 | 5 | 1 | 5 | 10 |
---|
2 | 8 | 9 | 6 | 6 | 10 | 4 | 9 | 3 | 10 | 9 |
---|
3 | 5 | 1 | 4 | 3 | 10 | 5 | 1 | 10 | 5 | 9 |
---|
4 | 2 | 9 | 2 | 2 | 5 | 7 | 7 | 9 | 9 | 5 |
---|
5 | 4 | 8 | 4 | 10 | 8 | 5 | 1 | 2 | 1 | 10 |
---|
6 | 8 | 6 | 6 | 8 | 3 | 3 | 3 | 6 | 8 | 6 |
---|
"
+ " | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 |
---|
1 | 4 | 1 | 2 | 8 | 10 | 7 | 5 | 1 | 8 | 3 |
---|
2 | 5 | 6 | 6 | 3 | 2 | 4 | 9 | 10 | 10 | 4 |
---|
3 | 3 | 5 | 4 | 8 | 4 | 4 | 4 | 6 | 6 | 9 |
---|
4 | 8 | 8 | 2 | 6 | 7 | 3 | 6 | 4 | 10 | 6 |
---|
5 | 1 | 2 | 6 | 10 | 4 | 7 | 7 | 7 | 5 | 4 |
---|
6 | 5 | 7 | 9 | 10 | 5 | 1 | 6 | 2 | 3 | 4 |
---|
"
],
"text/plain": [
"6×10 DataFrames.DataFrame\n",
"│ Row │ x1 │ x2 │ x3 │ x4 │ x5 │ x6 │ x7 │ x8 │ x9 │ x10 │\n",
"├─────┼────┼────┼────┼────┼────┼────┼────┼────┼────┼─────┤\n",
- "│ 1 │ 8 │ 6 │ 1 │ 2 │ 7 │ 10 │ 5 │ 1 │ 5 │ 10 │\n",
- "│ 2 │ 8 │ 9 │ 6 │ 6 │ 10 │ 4 │ 9 │ 3 │ 10 │ 9 │\n",
- "│ 3 │ 5 │ 1 │ 4 │ 3 │ 10 │ 5 │ 1 │ 10 │ 5 │ 9 │\n",
- "│ 4 │ 2 │ 9 │ 2 │ 2 │ 5 │ 7 │ 7 │ 9 │ 9 │ 5 │\n",
- "│ 5 │ 4 │ 8 │ 4 │ 10 │ 8 │ 5 │ 1 │ 2 │ 1 │ 10 │\n",
- "│ 6 │ 8 │ 6 │ 6 │ 8 │ 3 │ 3 │ 3 │ 6 │ 8 │ 6 │"
+ "│ 1 │ 4 │ 1 │ 2 │ 8 │ 10 │ 7 │ 5 │ 1 │ 8 │ 3 │\n",
+ "│ 2 │ 5 │ 6 │ 6 │ 3 │ 2 │ 4 │ 9 │ 10 │ 10 │ 4 │\n",
+ "│ 3 │ 3 │ 5 │ 4 │ 8 │ 4 │ 4 │ 4 │ 6 │ 6 │ 9 │\n",
+ "│ 4 │ 8 │ 8 │ 2 │ 6 │ 7 │ 3 │ 6 │ 4 │ 10 │ 6 │\n",
+ "│ 5 │ 1 │ 2 │ 6 │ 10 │ 4 │ 7 │ 7 │ 7 │ 5 │ 4 │\n",
+ "│ 6 │ 5 │ 7 │ 9 │ 10 │ 5 │ 1 │ 6 │ 2 │ 3 │ 4 │"
]
},
- "execution_count": 10,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -316,24 +272,24 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
- " | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 |
---|
1 | 1 | 10 | 5 | 7 | 8 | 6 | 1 | 2 | 3 | 6 |
---|
2 | 1 | 1 | 2 | 7 | 9 | 7 | 3 | 3 | 3 | 3 |
---|
3 | 4 | 6 | 1 | 2 | 1 | 1 | 4 | 7 | 9 | 4 |
---|
"
+ " | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 |
---|
1 | 2 | 8 | 3 | 7 | 6 | 4 | 3 | 8 | 5 | 4 |
---|
2 | 7 | 1 | 5 | 5 | 3 | 6 | 1 | 8 | 5 | 1 |
---|
3 | 8 | 1 | 10 | 9 | 4 | 2 | 10 | 2 | 6 | 6 |
---|
"
],
"text/plain": [
"3×10 DataFrames.DataFrame\n",
"│ Row │ x1 │ x2 │ x3 │ x4 │ x5 │ x6 │ x7 │ x8 │ x9 │ x10 │\n",
"├─────┼────┼────┼────┼────┼────┼────┼────┼────┼────┼─────┤\n",
- "│ 1 │ 1 │ 10 │ 5 │ 7 │ 8 │ 6 │ 1 │ 2 │ 3 │ 6 │\n",
- "│ 2 │ 1 │ 1 │ 2 │ 7 │ 9 │ 7 │ 3 │ 3 │ 3 │ 3 │\n",
- "│ 3 │ 4 │ 6 │ 1 │ 2 │ 1 │ 1 │ 4 │ 7 │ 9 │ 4 │"
+ "│ 1 │ 2 │ 8 │ 3 │ 7 │ 6 │ 4 │ 3 │ 8 │ 5 │ 4 │\n",
+ "│ 2 │ 7 │ 1 │ 5 │ 5 │ 3 │ 6 │ 1 │ 8 │ 5 │ 1 │\n",
+ "│ 3 │ 8 │ 1 │ 10 │ 9 │ 4 │ 2 │ 10 │ 2 │ 6 │ 6 │"
]
},
- "execution_count": 11,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@@ -353,7 +309,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
@@ -362,7 +318,7 @@
"([1, 2], [1, 2], [1, 2])"
]
},
- "execution_count": 12,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -371,6 +327,13 @@
"x[1], x[:A], x[:, 1]"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Future tip: In Julia 0.7 also accessing column using `x.A` syntax (`getproperty`/`setproperty!`) is supported."
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -380,7 +343,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
@@ -395,7 +358,7 @@
"│ 1 │ 1 │ 1.0 │ a │"
]
},
- "execution_count": 13,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -413,7 +376,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
@@ -422,7 +385,7 @@
"1"
]
},
- "execution_count": 14,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -440,7 +403,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -456,7 +419,7 @@
"│ 2 │ 1 │ 1.0 │ b │"
]
},
- "execution_count": 15,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -475,7 +438,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 15,
"metadata": {},
"outputs": [
{
@@ -491,7 +454,7 @@
"│ 2 │ 2 │ 2.0 │ b │"
]
},
- "execution_count": 16,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -510,7 +473,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
@@ -526,7 +489,7 @@
"│ 2 │ 7 │ 8.0 │ b │"
]
},
- "execution_count": 17,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -539,7 +502,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Julia 0.6.0",
+ "display_name": "Julia 0.6.2",
"language": "julia",
"name": "julia-0.6"
},
@@ -547,7 +510,7 @@
"file_extension": ".jl",
"mimetype": "application/julia",
"name": "julia",
- "version": "0.6.0"
+ "version": "0.6.3"
}
},
"nbformat": 4,
diff --git a/03_missingvalues.ipynb b/03_missingvalues.ipynb
index 44abc5b..c580a08 100644
--- a/03_missingvalues.ipynb
+++ b/03_missingvalues.ipynb
@@ -5,7 +5,7 @@
"metadata": {},
"source": [
"# Introduction to DataFrames\n",
- "**[Bogumił Kamiński](http://bogumilkaminski.pl/about/), May 23, 2018**"
+ "**[Bogumił Kamiński](http://bogumilkaminski.pl/about/), July 25, 2018**"
]
},
{
@@ -391,7 +391,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "Caution: `nothing` would also be replaced here (for Julia 0.7 a more sophisticated behavior of `coalesce` that allows to avoid this problem is planned)."
+ "Future tip:: `nothing` would also be replaced here (for Julia 0.7 `coalesce` will only handle `missing`)."
]
},
{
@@ -641,7 +641,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "When we call `showcols` on a `DataFrame` with dropped missing values, the columns still allow missing values."
+ "When we call `describe` on a `DataFrame` with dropped missing values, the columns still allow missing values."
]
},
{
@@ -650,48 +650,60 @@
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "2×2 DataFrames.DataFrame\n",
- "│ Col # │ Name │ Eltype │ Missing │ Values │\n",
- "├───────┼──────┼─────────────────────────────────┼─────────┼─────────┤\n",
- "│ 1 │ A │ Union{Int64, Missings.Missing} │ 0 │ 1 … 4 │\n",
- "│ 2 │ B │ Union{Missings.Missing, String} │ 0 │ A … C │"
- ]
+ "data": {
+ "text/html": [
+ " | variable | mean | min | median | max | nunique | nmissing | eltype |
---|
1 | A | 2.5 | 1 | 2.5 | 4 | | 0 | Int64 |
---|
2 | B | | A | | C | 2 | 0 | String |
---|
"
+ ],
+ "text/plain": [
+ "2×8 DataFrames.DataFrame\n",
+ "│ Row │ variable │ mean │ min │ median │ max │ nunique │ nmissing │ eltype │\n",
+ "├─────┼──────────┼──────┼─────┼────────┼─────┼─────────┼──────────┼────────┤\n",
+ "│ 1 │ A │ 2.5 │ 1 │ 2.5 │ 4 │ │ 0 │ Int64 │\n",
+ "│ 2 │ B │ │ A │ │ C │ 2 │ 0 │ String │"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
- "showcols(x)"
+ "describe(x)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "Since we've excluded missing values, we can safely use `disallowmissing!` so that the columns will no longer accept missing values."
+ "Since we've excluded missing values, we can safely use `disallowmissing!` so that the columns will no longer accept missing values (we can see this as `nmissing` column is empty)."
]
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 25,
"metadata": {},
"outputs": [
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "2×2 DataFrames.DataFrame\n",
- "│ Col # │ Name │ Eltype │ Missing │ Values │\n",
- "├───────┼──────┼────────┼─────────┼─────────┤\n",
- "│ 1 │ A │ Int64 │ 0 │ 1 … 4 │\n",
- "│ 2 │ B │ String │ 0 │ A … C │"
- ]
+ "data": {
+ "text/html": [
+ " | variable | mean | min | median | max | nunique | nmissing | eltype |
---|
1 | A | 2.5 | 1 | 2.5 | 4 | | | Int64 |
---|
2 | B | | A | | C | 2 | | String |
---|
"
+ ],
+ "text/plain": [
+ "2×8 DataFrames.DataFrame\n",
+ "│ Row │ variable │ mean │ min │ median │ max │ nunique │ nmissing │ eltype │\n",
+ "├─────┼──────────┼──────┼─────┼────────┼─────┼─────────┼──────────┼────────┤\n",
+ "│ 1 │ A │ 2.5 │ 1 │ 2.5 │ 4 │ │ │ Int64 │\n",
+ "│ 2 │ B │ │ A │ │ C │ 2 │ │ String │"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
"disallowmissing!(x)\n",
- "showcols(x)"
+ "describe(x)"
]
}
],
@@ -705,7 +717,7 @@
"file_extension": ".jl",
"mimetype": "application/julia",
"name": "julia",
- "version": "0.6.2"
+ "version": "0.6.3"
}
},
"nbformat": 4,
diff --git a/04_loadsave.ipynb b/04_loadsave.ipynb
index b6b1a1e..8ca3145 100644
--- a/04_loadsave.ipynb
+++ b/04_loadsave.ipynb
@@ -5,15 +5,13 @@
"metadata": {},
"source": [
"# Introduction to DataFrames\n",
- "**[Bogumił Kamiński](http://bogumilkaminski.pl/about/), May 23, 2018**"
+ "**[Bogumił Kamiński](http://bogumilkaminski.pl/about/), July 25, 2018**"
]
},
{
"cell_type": "code",
"execution_count": 1,
- "metadata": {
- "collapsed": true
- },
+ "metadata": {},
"outputs": [],
"source": [
"using DataFrames # load package"
@@ -32,9 +30,7 @@
{
"cell_type": "code",
"execution_count": 2,
- "metadata": {
- "collapsed": true
- },
+ "metadata": {},
"outputs": [],
"source": [
"using CSV\n",
@@ -131,7 +127,8 @@
" dateformat: nothing\n",
" decimal: '.'\n",
" truestring: 'true'\n",
- " falsestring: 'false', IOBuffer(data=UInt8[...], readable=true, writable=true, seekable=true, append=false, size=0, maxsize=Inf, ptr=1, mark=-1), \"x.csv\", 8, true, String[\"A\", \"B\", \"C\", \"D\"], 4, false, Val{false})"
+ " falsestring: 'false'\n",
+ " internstrings: true, IOBuffer(data=UInt8[...], readable=true, writable=true, seekable=true, append=false, size=0, maxsize=Inf, ptr=1, mark=-1), \"x.csv\", 8, true, String[\"A\", \"B\", \"C\", \"D\"], 4, false, Val{false})"
]
},
"execution_count": 5,
@@ -246,9 +243,7 @@
{
"cell_type": "code",
"execution_count": 9,
- "metadata": {
- "collapsed": true
- },
+ "metadata": {},
"outputs": [],
"source": [
"save(\"x.jld\", \"x\", x)"
@@ -338,15 +333,15 @@
"name": "stdout",
"output_type": "stream",
"text": [
- " 0.782157 seconds (688.90 k allocations: 30.828 MiB, 1.08% gc time)\n",
- " 0.018250 seconds (203.61 k allocations: 3.339 MiB)\n"
+ " 2.529390 seconds (936.79 k allocations: 44.927 MiB, 0.83% gc time)\n",
+ " 0.018948 seconds (203.62 k allocations: 3.339 MiB)\n"
]
},
{
"data": {
"text/plain": [
"2-element Array{Int64,1}:\n",
- " 595307\n",
+ " 595456\n",
" 154487"
]
},
@@ -372,9 +367,7 @@
{
"cell_type": "code",
"execution_count": 13,
- "metadata": {
- "collapsed": true
- },
+ "metadata": {},
"outputs": [],
"source": [
"foreach(rm, [\"x.csv\", \"x.jld\", \"bigdf.csv\", \"bigdf.jld\"])"
@@ -383,7 +376,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Julia 0.6.0",
+ "display_name": "Julia 0.6.2",
"language": "julia",
"name": "julia-0.6"
},
@@ -391,7 +384,7 @@
"file_extension": ".jl",
"mimetype": "application/julia",
"name": "julia",
- "version": "0.6.0"
+ "version": "0.6.3"
}
},
"nbformat": 4,
diff --git a/05_columns.ipynb b/05_columns.ipynb
index e9c4ab6..2790a92 100644
--- a/05_columns.ipynb
+++ b/05_columns.ipynb
@@ -5,15 +5,13 @@
"metadata": {},
"source": [
"# Introduction to DataFrames\n",
- "**[Bogumił Kamiński](http://bogumilkaminski.pl/about/), May 23, 2018**"
+ "**[Bogumił Kamiński](http://bogumilkaminski.pl/about/), July 25, 2018**"
]
},
{
"cell_type": "code",
"execution_count": 1,
- "metadata": {
- "collapsed": true
- },
+ "metadata": {},
"outputs": [],
"source": [
"using DataFrames # load package"
@@ -43,7 +41,7 @@
{
"data": {
"text/html": [
- " | x1 | x2 | x3 | x4 |
---|
1 | false | false | false | false |
---|
2 | false | false | false | false |
---|
3 | false | false | false | false |
---|
"
+ " | x1 | x2 | x3 | x4 |
---|
1 | false | false | false | false |
---|
2 | false | false | false | false |
---|
3 | true | true | true | false |
---|
"
],
"text/plain": [
"3×4 DataFrames.DataFrame\n",
@@ -51,7 +49,7 @@
"├─────┼───────┼───────┼───────┼───────┤\n",
"│ 1 │ false │ false │ false │ false │\n",
"│ 2 │ false │ false │ false │ false │\n",
- "│ 3 │ false │ false │ false │ false │"
+ "│ 3 │ true │ true │ true │ false │"
]
},
"execution_count": 2,
@@ -78,7 +76,7 @@
{
"data": {
"text/html": [
- " | A | x2 | x3 | x4 |
---|
1 | false | false | false | false |
---|
2 | false | false | false | false |
---|
3 | false | false | false | false |
---|
"
+ " | A | x2 | x3 | x4 |
---|
1 | false | false | false | false |
---|
2 | false | false | false | false |
---|
3 | true | true | true | false |
---|
"
],
"text/plain": [
"3×4 DataFrames.DataFrame\n",
@@ -86,7 +84,7 @@
"├─────┼───────┼───────┼───────┼───────┤\n",
"│ 1 │ false │ false │ false │ false │\n",
"│ 2 │ false │ false │ false │ false │\n",
- "│ 3 │ false │ false │ false │ false │"
+ "│ 3 │ true │ true │ true │ false │"
]
},
"execution_count": 3,
@@ -115,7 +113,7 @@
{
"data": {
"text/html": [
- " | x1x1 | x2x2 | x3x3 | x4x4 |
---|
1 | false | false | false | false |
---|
2 | false | false | false | false |
---|
3 | false | false | false | false |
---|
"
+ " | x1x1 | x2x2 | x3x3 | x4x4 |
---|
1 | false | false | false | false |
---|
2 | false | false | false | false |
---|
3 | true | true | true | false |
---|
"
],
"text/plain": [
"3×4 DataFrames.DataFrame\n",
@@ -123,7 +121,7 @@
"├─────┼───────┼───────┼───────┼───────┤\n",
"│ 1 │ false │ false │ false │ false │\n",
"│ 2 │ false │ false │ false │ false │\n",
- "│ 3 │ false │ false │ false │ false │"
+ "│ 3 │ true │ true │ true │ false │"
]
},
"execution_count": 4,
@@ -152,7 +150,7 @@
{
"data": {
"text/html": [
- " | x1x1 | x2x2 | third | x4x4 |
---|
1 | false | false | false | false |
---|
2 | false | false | false | false |
---|
3 | false | false | false | false |
---|
"
+ " | x1x1 | x2x2 | third | x4x4 |
---|
1 | false | false | false | false |
---|
2 | false | false | false | false |
---|
3 | true | true | true | false |
---|
"
],
"text/plain": [
"3×4 DataFrames.DataFrame\n",
@@ -160,7 +158,7 @@
"├─────┼───────┼───────┼───────┼───────┤\n",
"│ 1 │ false │ false │ false │ false │\n",
"│ 2 │ false │ false │ false │ false │\n",
- "│ 3 │ false │ false │ false │ false │"
+ "│ 3 │ true │ true │ true │ false │"
]
},
"execution_count": 5,
@@ -187,7 +185,7 @@
{
"data": {
"text/html": [
- " | a | b | c | d |
---|
1 | false | false | false | false |
---|
2 | false | false | false | false |
---|
3 | false | false | false | false |
---|
"
+ " | a | b | c | d |
---|
1 | false | false | false | false |
---|
2 | false | false | false | false |
---|
3 | true | true | true | false |
---|
"
],
"text/plain": [
"3×4 DataFrames.DataFrame\n",
@@ -195,7 +193,7 @@
"├─────┼───────┼───────┼───────┼───────┤\n",
"│ 1 │ false │ false │ false │ false │\n",
"│ 2 │ false │ false │ false │ false │\n",
- "│ 3 │ false │ false │ false │ false │"
+ "│ 3 │ true │ true │ true │ false │"
]
},
"execution_count": 6,
@@ -253,7 +251,7 @@
{
"data": {
"text/html": [
- " | a | a_1 | a_2 | a_3 |
---|
1 | false | false | false | false |
---|
2 | false | false | false | false |
---|
3 | false | false | false | false |
---|
"
+ " | a | a_1 | a_2 | a_3 |
---|
1 | false | false | false | false |
---|
2 | false | false | false | false |
---|
3 | true | true | true | false |
---|
"
],
"text/plain": [
"3×4 DataFrames.DataFrame\n",
@@ -261,7 +259,7 @@
"├─────┼───────┼───────┼───────┼───────┤\n",
"│ 1 │ false │ false │ false │ false │\n",
"│ 2 │ false │ false │ false │ false │\n",
- "│ 3 │ false │ false │ false │ false │"
+ "│ 3 │ true │ true │ true │ false │"
]
},
"execution_count": 8,
@@ -297,7 +295,7 @@
{
"data": {
"text/html": [
- " | a_1 | a_3 | a_2 | a |
---|
1 | false | false | false | false |
---|
2 | false | false | false | false |
---|
3 | false | false | false | false |
---|
"
+ " | a_1 | a_3 | a_2 | a |
---|
1 | false | false | false | false |
---|
2 | false | false | false | false |
---|
3 | true | false | true | true |
---|
"
],
"text/plain": [
"3×4 DataFrames.DataFrame\n",
@@ -305,7 +303,7 @@
"├─────┼───────┼───────┼───────┼───────┤\n",
"│ 1 │ false │ false │ false │ false │\n",
"│ 2 │ false │ false │ false │ false │\n",
- "│ 3 │ false │ false │ false │ false │"
+ "│ 3 │ true │ false │ true │ true │"
]
},
"execution_count": 9,
@@ -322,7 +320,35 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "also `permutecols!` will be introduced in next release of DataFrames"
+ "Also `permutecols!` can be used to achieve this in place:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | a_3 | a_2 | a_1 | a |
---|
1 | false | false | false | false |
---|
2 | false | false | false | false |
---|
3 | false | true | true | true |
---|
"
+ ],
+ "text/plain": [
+ "3×4 DataFrames.DataFrame\n",
+ "│ Row │ a_3 │ a_2 │ a_1 │ a │\n",
+ "├─────┼───────┼───────┼───────┼───────┤\n",
+ "│ 1 │ false │ false │ false │ false │\n",
+ "│ 2 │ false │ false │ false │ false │\n",
+ "│ 3 │ false │ true │ true │ true │"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "permutecols!(x, 4:-1:1); x"
]
},
{
@@ -334,7 +360,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
@@ -351,7 +377,7 @@
"│ 3 │ (3, 1) │ (3, 2) │ (3, 3) │ (3, 4) │"
]
},
- "execution_count": 10,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -369,7 +395,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
@@ -386,7 +412,7 @@
"│ 3 │ (3, 1) │ (3, 2) │ (3, 3) │ (3, 4) │ (3, 1) │ (3, 2) │ (3, 3) │ (3, 4) │"
]
},
- "execution_count": 11,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -404,7 +430,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
@@ -421,7 +447,7 @@
"│ 3 │ (3, 1) │ (3, 2) │ (3, 3) │ (3, 4) │ 3 │"
]
},
- "execution_count": 12,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -439,7 +465,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -456,7 +482,7 @@
"│ 3 │ 3 │ (3, 1) │ (3, 2) │ (3, 3) │ (3, 4) │"
]
},
- "execution_count": 13,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -474,7 +500,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 15,
"metadata": {},
"outputs": [
{
@@ -491,7 +517,7 @@
"│ 3 │ (3, 1) │ (3, 2) │ (3, 3) │ (3, 4) │ 3 │"
]
},
- "execution_count": 14,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -509,7 +535,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
@@ -526,7 +552,7 @@
"│ 3 │ 3 │ (3, 1) │ (3, 2) │ (3, 3) │ (3, 4) │"
]
},
- "execution_count": 15,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -544,14 +570,14 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- " 20.993 μs (133 allocations: 10.20 KiB)\n"
+ " 21.460 μs (133 allocations: 10.20 KiB)\n"
]
},
{
@@ -568,7 +594,7 @@
"│ 3 │ (3, 1) │ (3, 2) │ 3 │ (3, 3) │ (3, 4) │"
]
},
- "execution_count": 16,
+ "execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@@ -587,7 +613,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 18,
"metadata": {},
"outputs": [
{
@@ -604,7 +630,7 @@
"│ 3 │ 3 │ 3 │ (3, 1) │ (3, 2) │ (3, 3) │ (3, 4) │"
]
},
- "execution_count": 17,
+ "execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@@ -622,7 +648,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 19,
"metadata": {},
"outputs": [
{
@@ -639,7 +665,7 @@
"│ 3 │ 3 │ 3 │ 3 │ (3, 1) │ (3, 2) │ (3, 3) │ (3, 4) │"
]
},
- "execution_count": 18,
+ "execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
@@ -657,7 +683,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 20,
"metadata": {},
"outputs": [
{
@@ -681,7 +707,7 @@
"│ 3 │ (3, 1) │ (3, 2) │ 3 │ (3, 3) │ (3, 4) │"
]
},
- "execution_count": 19,
+ "execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
@@ -699,7 +725,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 21,
"metadata": {},
"outputs": [
{
@@ -716,7 +742,7 @@
"│ 3 │ (3, 1) │ (3, 2) │ (3, 3) │ (3, 4) │ 3 │"
]
},
- "execution_count": 20,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
@@ -734,7 +760,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 22,
"metadata": {},
"outputs": [
{
@@ -751,7 +777,7 @@
"│ 3 │ 3 │ (3, 1) │ (3, 2) │ (3, 3) │ (3, 4) │ 3 │"
]
},
- "execution_count": 21,
+ "execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
@@ -769,7 +795,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 23,
"metadata": {},
"outputs": [
{
@@ -793,7 +819,7 @@
"│ 3 │ 'c' │ 6 │ 'f' │ 13 │)"
]
},
- "execution_count": 22,
+ "execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
@@ -813,7 +839,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 24,
"metadata": {},
"outputs": [
{
@@ -830,7 +856,7 @@
"│ 3 │ 3 │ 6 │ 'c' │ 'f' │ 13 │"
]
},
- "execution_count": 23,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -852,7 +878,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 25,
"metadata": {},
"outputs": [
{
@@ -869,7 +895,7 @@
"│ 3 │ (3, 1) │ (3, 2) │ (3, 3) │ (3, 4) │ (3, 5) │"
]
},
- "execution_count": 24,
+ "execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
@@ -887,7 +913,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 26,
"metadata": {},
"outputs": [
{
@@ -904,7 +930,7 @@
"│ 3 │ (3, 1) │ (3, 2) │ (3, 4) │ (3, 5) │"
]
},
- "execution_count": 25,
+ "execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
@@ -922,7 +948,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 27,
"metadata": {},
"outputs": [
{
@@ -939,7 +965,7 @@
"│ 3 │ (3, 1) │ (3, 4) │"
]
},
- "execution_count": 26,
+ "execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
@@ -957,7 +983,7 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 28,
"metadata": {},
"outputs": [
{
@@ -974,7 +1000,7 @@
"│ 3 │ (3, 1) │ (3, 3) │ (3, 5) │"
]
},
- "execution_count": 27,
+ "execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
@@ -992,7 +1018,7 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 29,
"metadata": {},
"outputs": [
{
@@ -1009,7 +1035,7 @@
"│ 3 │ (3, 1) │"
]
},
- "execution_count": 28,
+ "execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
@@ -1027,7 +1053,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 30,
"metadata": {},
"outputs": [
{
@@ -1039,7 +1065,7 @@
" (3, 1)"
]
},
- "execution_count": 29,
+ "execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
@@ -1057,7 +1083,7 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 31,
"metadata": {},
"outputs": [
{
@@ -1069,7 +1095,7 @@
" (3, 1)"
]
},
- "execution_count": 30,
+ "execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
@@ -1087,7 +1113,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 32,
"metadata": {},
"outputs": [
{
@@ -1099,7 +1125,7 @@
"0×0 DataFrames.DataFrame\n"
]
},
- "execution_count": 31,
+ "execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
@@ -1117,7 +1143,7 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 33,
"metadata": {},
"outputs": [
{
@@ -1136,7 +1162,7 @@
"│ 3 │ (3, 1) │ (3, 2) │ (3, 4) │ (3, 5) │)"
]
},
- "execution_count": 32,
+ "execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
@@ -1155,7 +1181,7 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 34,
"metadata": {},
"outputs": [
{
@@ -1172,7 +1198,7 @@
"│ 3 │ (3, 1) │ (3, 2) │ (3, 3) │ (3, 4) │ (3, 5) │"
]
},
- "execution_count": 33,
+ "execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
@@ -1190,7 +1216,7 @@
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 35,
"metadata": {},
"outputs": [
{
@@ -1207,7 +1233,7 @@
"│ 3 │ (3, 2) │ (3, 2) │ (3, 3) │ (3, 4) │ (3, 5) │"
]
},
- "execution_count": 34,
+ "execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
@@ -1226,7 +1252,7 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 36,
"metadata": {},
"outputs": [
{
@@ -1243,7 +1269,7 @@
"│ 3 │ (3, 2) │ (3, 2) │ (3, 3) │ (3, 4) │ (3, 5) │ 3 │"
]
},
- "execution_count": 35,
+ "execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
@@ -1262,7 +1288,7 @@
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 37,
"metadata": {},
"outputs": [
{
@@ -1279,7 +1305,7 @@
"│ 3 │ (3, 2) │ (3, 2) │ (3, 3) │ (3, 4) │ (3, 5) │ 3 │ 13 │"
]
},
- "execution_count": 36,
+ "execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
@@ -1298,7 +1324,7 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 38,
"metadata": {},
"outputs": [
{
@@ -1315,7 +1341,7 @@
"│ 3 │ (3, 1) │ (3, 2) │ (3, 3) │ (3, 4) │ (3, 5) │"
]
},
- "execution_count": 37,
+ "execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
@@ -1333,7 +1359,7 @@
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 39,
"metadata": {},
"outputs": [
{
@@ -1342,7 +1368,7 @@
"true"
]
},
- "execution_count": 38,
+ "execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
@@ -1360,7 +1386,7 @@
},
{
"cell_type": "code",
- "execution_count": 39,
+ "execution_count": 40,
"metadata": {},
"outputs": [
{
@@ -1369,7 +1395,7 @@
"2"
]
},
- "execution_count": 39,
+ "execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
@@ -1381,7 +1407,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Julia 0.6.0",
+ "display_name": "Julia 0.6.2",
"language": "julia",
"name": "julia-0.6"
},
@@ -1389,7 +1415,7 @@
"file_extension": ".jl",
"mimetype": "application/julia",
"name": "julia",
- "version": "0.6.0"
+ "version": "0.6.3"
}
},
"nbformat": 4,
diff --git a/08_joins.ipynb b/08_joins.ipynb
index 2d273be..60b83f1 100644
--- a/08_joins.ipynb
+++ b/08_joins.ipynb
@@ -5,7 +5,7 @@
"metadata": {},
"source": [
"# Introduction to DataFrames\n",
- "**[Bogumił Kamiński](http://bogumilkaminski.pl/about/), Apr 21, 2017**"
+ "**[Bogumił Kamiński](http://bogumilkaminski.pl/about/), July 25, 2017**"
]
},
{
@@ -417,20 +417,24 @@
{
"data": {
"text/html": [
- " | id1 | id2 | name | id2_1 | age |
---|
1 | 1 | 1 | Alice | 11 | 21 |
---|
2 | 1 | 1 | Alice | 1 | 22 |
---|
3 | 1 | 11 | Bob | 11 | 21 |
---|
4 | 1 | 11 | Bob | 1 | 22 |
---|
5 | missing | missing | Zed | missing | 99 |
---|
6 | missing | missing | Zed | 999 | 100 |
---|
7 | missing | 99 | Zoe | missing | 99 |
---|
8 | missing | 99 | Zoe | 999 | 100 |
---|
"
+ " | id1 | id2 | name | id2_1 | age | source |
---|
1 | 1 | 1 | Alice | 11 | 21 | both |
---|
2 | 1 | 1 | Alice | 1 | 22 | both |
---|
3 | 1 | 11 | Bob | 11 | 21 | both |
---|
4 | 1 | 11 | Bob | 1 | 22 | both |
---|
5 | 2 | 2 | Conor | missing | missing | left_only |
---|
6 | 2 | 21 | Dave | missing | missing | left_only |
---|
7 | missing | missing | Zed | missing | 99 | both |
---|
8 | missing | missing | Zed | 999 | 100 | both |
---|
9 | missing | 99 | Zoe | missing | 99 | both |
---|
10 | missing | 99 | Zoe | 999 | 100 | both |
---|
11 | 3 | missing | missing | 31 | 23 | right_only |
---|
12 | 3 | missing | missing | 3 | 24 | right_only |
---|
"
],
"text/plain": [
- "8×5 DataFrames.DataFrame\n",
- "│ Row │ id1 │ id2 │ name │ id2_1 │ age │\n",
- "├─────┼─────────┼─────────┼───────┼─────────┼─────┤\n",
- "│ 1 │ 1 │ 1 │ Alice │ 11 │ 21 │\n",
- "│ 2 │ 1 │ 1 │ Alice │ 1 │ 22 │\n",
- "│ 3 │ 1 │ 11 │ Bob │ 11 │ 21 │\n",
- "│ 4 │ 1 │ 11 │ Bob │ 1 │ 22 │\n",
- "│ 5 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │ Zed │ \u001b[90mmissing\u001b[39m │ 99 │\n",
- "│ 6 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │ Zed │ 999 │ 100 │\n",
- "│ 7 │ \u001b[90mmissing\u001b[39m │ 99 │ Zoe │ \u001b[90mmissing\u001b[39m │ 99 │\n",
- "│ 8 │ \u001b[90mmissing\u001b[39m │ 99 │ Zoe │ 999 │ 100 │"
+ "12×6 DataFrames.DataFrame\n",
+ "│ Row │ id1 │ id2 │ name │ id2_1 │ age │ source │\n",
+ "├─────┼─────────┼─────────┼─────────┼─────────┼─────────┼────────────┤\n",
+ "│ 1 │ 1 │ 1 │ Alice │ 11 │ 21 │ both │\n",
+ "│ 2 │ 1 │ 1 │ Alice │ 1 │ 22 │ both │\n",
+ "│ 3 │ 1 │ 11 │ Bob │ 11 │ 21 │ both │\n",
+ "│ 4 │ 1 │ 11 │ Bob │ 1 │ 22 │ both │\n",
+ "│ 5 │ 2 │ 2 │ Conor │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │ left_only │\n",
+ "│ 6 │ 2 │ 21 │ Dave │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │ left_only │\n",
+ "│ 7 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │ Zed │ \u001b[90mmissing\u001b[39m │ 99 │ both │\n",
+ "│ 8 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │ Zed │ 999 │ 100 │ both │\n",
+ "│ 9 │ \u001b[90mmissing\u001b[39m │ 99 │ Zoe │ \u001b[90mmissing\u001b[39m │ 99 │ both │\n",
+ "│ 10 │ \u001b[90mmissing\u001b[39m │ 99 │ Zoe │ 999 │ 100 │ both │\n",
+ "│ 11 │ 3 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │ 31 │ 23 │ right_only │\n",
+ "│ 12 │ 3 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │ 3 │ 24 │ right_only │"
]
},
"execution_count": 13,
@@ -439,13 +443,35 @@
}
],
"source": [
- "join(x, y, on=[:id1], makeunique=true) # with duplicates all combinations are produced (here :inner join)"
+ "join(x, y, on=[:id1], makeunique=true, kind=:outer, indicator=:source) # with duplicates all combinations are produced"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
+ "outputs": [
+ {
+ "ename": "LoadError",
+ "evalue": "\u001b[91mArgumentError: Merge key(s) are not unique in both df1 and df2. First duplicate in df1 at 2. First duplicate in df2 at 2\u001b[39m",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[91mArgumentError: Merge key(s) are not unique in both df1 and df2. First duplicate in df1 at 2. First duplicate in df2 at 2\u001b[39m",
+ "",
+ "Stacktrace:",
+ " [1] \u001b[1m#join#138\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{Symbol,1}, ::Symbol, ::Bool, ::Void, ::Tuple{Bool,Bool}, ::Function, ::DataFrames.DataFrame, ::DataFrames.DataFrame\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\abstractdataframe\\join.jl:327\u001b[22m\u001b[22m",
+ " [2] \u001b[1m(::Base.#kw##join)\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::Array{Any,1}, ::Base.#join, ::DataFrames.DataFrame, ::DataFrames.DataFrame\u001b[1m)\u001b[22m\u001b[22m at \u001b[1m.\\:0\u001b[22m\u001b[22m"
+ ]
+ }
+ ],
+ "source": [
+ "join(x, y, on=[:id1], makeunique=true, validate=(true,true)) # you can force validation of uniqueness of key on which you join"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
"outputs": [
{
"data": {
@@ -462,7 +488,7 @@
"│ 4 │ \u001b[90mmissing\u001b[39m │ 99 │ Zoe │"
]
},
- "execution_count": 14,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -482,7 +508,7 @@
"file_extension": ".jl",
"mimetype": "application/julia",
"name": "julia",
- "version": "0.6.2"
+ "version": "0.6.3"
}
},
"nbformat": 4,
diff --git a/10_transforms.ipynb b/10_transforms.ipynb
index 6bfeb84..8281c3e 100644
--- a/10_transforms.ipynb
+++ b/10_transforms.ipynb
@@ -562,7 +562,7 @@
"file_extension": ".jl",
"mimetype": "application/julia",
"name": "julia",
- "version": "0.6.2"
+ "version": "0.6.3"
}
},
"nbformat": 4,
diff --git a/README.md b/README.md
index 71e7a3d..9865043 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,9 @@
# An Introduction to DataFrames
-[Bogumił Kamiński](http://bogumilkaminski.pl/about/), May 23, 2018
+[Bogumił Kamiński](http://bogumilkaminski.pl/about/), July 25, 2018
A brief introduction to basic usage of [DataFrames](https://github.com/JuliaData/DataFrames.jl).
-Tested under Julia 0.6.2, DataFrames 0.11.6, CSV 0.2.4, JLD 0.8.3, Missings 0.2.9, CategoricalArrays 0.3.9, FreqTables 0.2.2, DataFramesMeta 0.3.0, StatPlots 0.7.2.
+Tested under Julia 0.6.3, DataFrames 0.11.7, CSV 0.2.5, JLD 0.8.3, Missings 0.2.10, CategoricalArrays 0.3.11, FreqTables 0.2.2, DataFramesMeta 0.3.0, StatPlots 0.7.2.
I will try to keep it up to date as the package evolves.
This tutorial covers
@@ -54,14 +54,15 @@ Changelog:
| 2018-05-01 | Added `byrow!` example |
| 2018-05-13 | Added `StatPlots` package to extras |
| 2018-05-23 | Improved comments in sections 1 do 5 by [Jane Herriman](https://github.com/xorJane) |
+| 2018-07-25 | Update to 0.11.7 release |
# Core functions summary
1. Constructors: `DataFrame`
-2. Getting summary: `size`, `nrow`, `ncol`, `length`, `describe`, `showcols`, `names`, `eltypes`, `head`, `tail`
+2. Getting summary: `size`, `nrow`, `ncol`, `length`, `describe`, `names`, `eltypes`, `head`, `tail`
3. Handling missing: `missing` (singleton instance of `Missing`), `ismissing`, `Missings.T`, `skipmissing`, `coalesce`, `allowmissing`, `disallowmissing`, `allowmissing!`, `completecases`, `dropmissing`, `dropmissing!`, disallowmissing, disallowmissing!
4. Loading and saving: `CSV` (package), `JLD` (package), `CSV.read`, `CSV.write`, `save` (from `JLD`), `load` (from `JLD`)
-5. Working with columns: `rename`, `rename!`, `names!`, `hcat`, `insert!`, `DataFrames.hcat!`, `merge!`, `delete!`, `empty!`, `categorical!`, `DataFrames.index`
+5. Working with columns: `rename`, `rename!`, `names!`, `hcat`, `insert!`, `DataFrames.hcat!`, `merge!`, `delete!`, `empty!`, `categorical!`, `DataFrames.index`, `permutedims!`
6. Working with rows: `sort!`, `sort`, `issorted`, `append!`, `vcat`, `push!`, `view`, `filter`, `filter!`, `deleterows!`, `unique`, `nonunique`, `unique!`
7. Working with categorical: `categorical`, `cut`, `isordered`, `ordered!`, `levels`, `unique`, `levels!`, `droplevels!`, `get`, `recode`, `recode!`
8. Joining: `join`
@@ -75,7 +76,3 @@ Changelog:
# Changes in DataFrames master since last update of the tutorial
-1. Improved rendering of `#undef` in HTML/LaTeX.
-2. Added `permutecols!` function.
-3. `describe` returns a `DataFrame`
-4. On Julia 0.7 you can access columns of `DataFrame` using `.` notation