JuliaML
diff --git a/‎.github/workflows/Documenter.yml
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/Documenter.yml
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/UnitTest.yml
Lines changed: 2 additions & 1 deletion b/‎.github/workflows/UnitTest.yml
Lines changed: 2 additions & 1 deletion
diff --git a/‎Project.toml
Lines changed: 3 additions & 1 deletion b/‎Project.toml
Lines changed: 3 additions & 1 deletion
diff --git a/‎README.md
Lines changed: 3 additions & 1 deletion b/‎README.md
Lines changed: 3 additions & 1 deletion
diff --git a/‎docs/make.jl
Lines changed: 4 additions & 0 deletions b/‎docs/make.jl
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/src/datasets/CiteSeer.md
Lines changed: 11 additions & 0 deletions b/‎docs/src/datasets/CiteSeer.md
Lines changed: 11 additions & 0 deletions
diff --git a/‎docs/src/datasets/Cora.md
Lines changed: 1 addition & 1 deletion b/‎docs/src/datasets/Cora.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/src/datasets/PubMed.md
Lines changed: 11 additions & 0 deletions b/‎docs/src/datasets/PubMed.md
Lines changed: 11 additions & 0 deletions
diff --git a/‎docs/src/utils.md
Lines changed: 5 additions & 0 deletions b/‎docs/src/utils.md
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/CiteSeer/CiteSeer.jl
Lines changed: 79 additions & 0 deletions b/‎src/CiteSeer/CiteSeer.jl
Lines changed: 79 additions & 0 deletions
@@ -10,6 +10,8 @@ on:
 jobs:
   build:
     runs-on: ubuntu-latest
+    env:
+      PYTHON: ""
     steps:
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@latest
 
@@ -18,7 +18,8 @@ jobs:
       matrix:
         julia-version: ['1.0', '1', 'nightly']
         os: [ubuntu-latest, windows-latest, macOS-latest]
-
+    env:
+      PYTHON: ""
     steps:
       - uses: actions/[email protected]
       - name: "Set up Julia"
 
@@ -1,6 +1,6 @@
 name = "MLDatasets"
 uuid = "eb30cadb-4394-5ae3-aed4-317e484a6458"
-version = "0.5.8"
+version = "0.5.9"
 
 [deps]
 BinDeps = "9e28174c-4ba2-5203-b857-d8d62c4213ee"
@@ -10,6 +10,7 @@ DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
 FixedPointNumbers = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
 GZip = "92fee26a-97fe-5a0c-ad85-20a5f3185b63"
 MAT = "23992714-dd62-5051-b70f-ba57cb901cac"
+PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 
 [compat]
@@ -20,6 +21,7 @@ FixedPointNumbers = "0.3, 0.4, 0.5, 0.6, 0.7, 0.8"
 GZip = "0.5"
 ImageCore = "0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8"
 MAT = "0.7, 0.8, 0.9, 0.10"
+PyCall = "1"
 Requires = "1"
 julia = "1"
 
 
@@ -20,7 +20,7 @@ Each dataset has its own dedicated sub-module.
 Find below a list of available datasets and links to their documentation.
 
 #### Vision
-  - [CIFAR10](https://juliaml.github.io/MLDatasets.jl/latest/datasets/CIFAR100/)
+  - [CIFAR10](https://juliaml.github.io/MLDatasets.jl/latest/datasets/CIFAR10/)
   - [CIFAR100](https://juliaml.github.io/MLDatasets.jl/latest/datasets/CIFAR100/)
   - [EMNIST](https://juliaml.github.io/MLDatasets.jl/latest/datasets/EMNIST/)
   - [FashionMNIST](https://juliaml.github.io/MLDatasets.jl/latest/datasets/FashionMNIST/)
@@ -38,7 +38,9 @@ Find below a list of available datasets and links to their documentation.
   - [UD_English](https://juliaml.github.io/MLDatasets.jl/latest/datasets/UD_English/)
 
 #### Graphs
+  - [CiteSeer](https://juliaml.github.io/MLDatasets.jl/latest/datasets/CiteSeer/)
   - [Cora](https://juliaml.github.io/MLDatasets.jl/latest/datasets/Cora/)
+  - [PubMed](https://juliaml.github.io/MLDatasets.jl/latest/datasets/PubMed/)
 
 
 
 
@@ -19,6 +19,7 @@ makedocs(
     ),
 
     authors = "Hiroyuki Shindo, Christof Stocker",
+    # TODO: automatize `pages` creation
     pages = Any[
         "Home" => "index.md",
         "Available Datasets" => Any[
@@ -40,10 +41,13 @@ makedocs(
             ],
 
             "Graphs" => Any[
+                "CiteSeer" => "datasets/CiteSeer.md",
                 "Cora" => "datasets/Cora.md",
+                "PubMed" => "datasets/PubMed.md",
             ],
 
         ],
+        "Utils" => "utils.md",
         "LICENSE.md",
     ],
     strict = true
 
@@ -0,0 +1,11 @@
+# CiteSeer
+
+```@docs
+CiteSeer
+```
+
+## API reference
+
+```@docs
+CiteSeer.dataset
+```
@@ -7,5 +7,5 @@ Cora
 ## API reference
 
 ```@docs
-Cora.alldata
+Cora.dataset
 ```
@@ -0,0 +1,11 @@
+# PubMed
+
+```@docs
+PubMed
+```
+
+## API reference
+
+```@docs
+PubMed.dataset
+```
@@ -0,0 +1,5 @@
+# Utils
+
+```@docs
+MLDatasets.read_planetoid_data
+```
@@ -0,0 +1,79 @@
+export CiteSeer
+
+
+"""
+    CiteSeer
+
+The CiteSeer citation network dataset from Ref. [1].
+Nodes represent documents and edges represent citation links.
+The dataset is designed for the node classification task. 
+The task is to predict the category of certain paper.
+The dataset is retrieved from Ref. [2].
+
+## Interface
+
+- [`CiteSeer.dataset`](@ref)
+
+## References
+
+[1]: [Deep Gaussian Embedding of Graphs: Unsupervised Inductive Learning via Ranking](https://arxiv.org/abs/1707.03815)
+[2]: [Planetoid](https://github.com/kimiyoung/planetoid)
+"""
+module CiteSeer
+
+using DataDeps
+using ..MLDatasets: datafile, read_planetoid_data
+using DelimitedFiles: readdlm
+
+using PyCall
+
+const DEPNAME = "CiteSeer"
+const LINK = "https://github.com/kimiyoung/planetoid/raw/master/data"
+const DOCS = "https://github.com/kimiyoung/planetoid"
+const DATA = "ind.citeseer." .* ["x", "y", "tx", "allx", "ty", "ally", "graph", "test.index"]
+
+function __init__()
+    register(DataDep(
+        DEPNAME,
+        """
+        Dataset: The $DEPNAME dataset.
+        Website: $DOCS
+        """,
+        map(x -> "$LINK/$x", DATA),
+        "7f7ec4df97215c573eee316de35754d89382011dfd9fb2b954a4a491057e3eb3",  # if checksum omitted, will be generated by DataDeps
+        # post_fetch_method = unpack
+    ))
+end
+
+"""
+    dataset(; dir=nothing, reverse_edges=true)
+
+Retrieve the CiteSeer dataset. The output is a named tuple with fields
+```juliarepl
+julia> keys(CiteSeer.dataset())
+(:node_features, :node_labels, :adjacency_list, :train_indices, :val_indices, :test_indices, :num_classes, :num_nodes, :num_edges, :directed)
+```
+
+In particular, `adjacency_list` is a vector of vector, 
+where `adjacency_list[i]` will contain the neighbors of node `i`
+through outgoing edges.
+
+If `reverse_edges=true`, the graph will contain
+the reverse of each edge and the graph will be undirected.
+
+See also [`CiteSeer`](@ref).
+
+## Usage Examples
+
+```julia
+using MLDatasets: CiteSeer
+data = CiteSeer.dataset()
+train_labels = data.node_labels[data.train_indices]
+```
+"""
+dataset(; dir=nothing, reverse_edges=true) = 
+    read_planetoid_data(DEPNAME, dir=dir, reverse_edges=reverse_edges)
+
+
+end #module 
+
-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +# Utils
++
 +```@docs
 +MLDatasets.read_planetoid_data
 +```