From 0f609ba9ac60d772c32a20d3a6b12058a642edf0 Mon Sep 17 00:00:00 2001
From: SoJ <102796027+MrJs133@users.noreply.github.com>
Date: Tue, 12 Nov 2024 19:33:02 +0800
Subject: [PATCH] feat(ml): graph learning algorithm impl (10+) (#102)

* glcc-hugegraph-graph-ai-B

* change readme.md

* Update README.md

* Update test_examples.py

---------

Co-authored-by: Simon Cheung <ming@apache.org>
---
 hugegraph-ml/README.md                        |  56 +-
 .../src/hugegraph_ml/data/hugegraph2dgl.py    | 295 ++++++-
 .../src/hugegraph_ml/examples/agnn_example.py |  39 +
 .../hugegraph_ml/examples/appnp_example.py    |  43 +
 .../src/hugegraph_ml/examples/arma_example.py |  42 +
 .../src/hugegraph_ml/examples/bgnn_example.py |  67 ++
 .../src/hugegraph_ml/examples/bgrl_example.py |  50 ++
 .../hugegraph_ml/examples/care_gnn_example.py |  51 ++
 .../examples/cluster_gcn_example.py           |  37 +
 .../examples/correct_and_smooth_example.py    |  51 ++
 .../hugegraph_ml/examples/dagnn_example.py    |  39 +
 .../examples/deepergcn_example.py             |  42 +
 .../hugegraph_ml/examples/gatne_example.py    |  46 +
 .../src/hugegraph_ml/examples/pgnn_example.py |  35 +
 .../src/hugegraph_ml/examples/seal_example.py |  50 ++
 hugegraph-ml/src/hugegraph_ml/models/agnn.py  |  64 ++
 hugegraph-ml/src/hugegraph_ml/models/appnp.py |  84 ++
 hugegraph-ml/src/hugegraph_ml/models/arma.py  | 175 ++++
 hugegraph-ml/src/hugegraph_ml/models/bgnn.py  | 741 ++++++++++++++++
 hugegraph-ml/src/hugegraph_ml/models/bgrl.py  | 260 ++++++
 .../src/hugegraph_ml/models/care_gnn.py       | 232 +++++
 .../src/hugegraph_ml/models/cluster_gcn.py    |  58 ++
 .../hugegraph_ml/models/correct_and_smooth.py | 262 ++++++
 hugegraph-ml/src/hugegraph_ml/models/dagnn.py | 145 +++
 .../src/hugegraph_ml/models/deepergcn.py      | 287 ++++++
 hugegraph-ml/src/hugegraph_ml/models/gatne.py | 273 ++++++
 hugegraph-ml/src/hugegraph_ml/models/pgnn.py  | 462 ++++++++++
 hugegraph-ml/src/hugegraph_ml/models/seal.py  | 826 ++++++++++++++++++
 .../tasks/fraud_detector_caregnn.py           | 122 +++
 .../tasks/hetero_sample_embed_gatne.py        | 122 +++
 .../tasks/link_prediction_pgnn.py             |  94 ++
 .../tasks/link_prediction_seal.py             | 172 ++++
 .../tasks/node_classify_with_edge.py          | 123 +++
 .../tasks/node_classify_with_sample.py        | 156 ++++
 .../hugegraph_ml/utils/dgl2hugegraph_utils.py | 745 +++++++++++++++-
 .../src/tests/test_examples/test_examples.py  |  85 +-
 36 files changed, 6413 insertions(+), 18 deletions(-)
 create mode 100644 hugegraph-ml/src/hugegraph_ml/examples/agnn_example.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/examples/appnp_example.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/examples/arma_example.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/examples/bgnn_example.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/examples/bgrl_example.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/examples/care_gnn_example.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/examples/cluster_gcn_example.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/examples/correct_and_smooth_example.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/examples/dagnn_example.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/examples/deepergcn_example.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/examples/gatne_example.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/examples/pgnn_example.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/examples/seal_example.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/models/agnn.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/models/appnp.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/models/arma.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/models/bgnn.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/models/bgrl.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/models/care_gnn.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/models/cluster_gcn.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/models/correct_and_smooth.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/models/dagnn.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/models/deepergcn.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/models/gatne.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/models/pgnn.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/models/seal.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/tasks/fraud_detector_caregnn.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/tasks/hetero_sample_embed_gatne.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/tasks/link_prediction_pgnn.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/tasks/link_prediction_seal.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/tasks/node_classify_with_edge.py
 create mode 100644 hugegraph-ml/src/hugegraph_ml/tasks/node_classify_with_sample.py

diff --git a/hugegraph-ml/README.md b/hugegraph-ml/README.md
index 26e16ddd..c6fca7f9 100644
--- a/hugegraph-ml/README.md
+++ b/hugegraph-ml/README.md
@@ -1,4 +1,4 @@
- # hugegraph-ml
+  # hugegraph-ml
 
 ## Summary
 
@@ -7,6 +7,26 @@ It implements most graph learning algorithms, enabling users to perform end-to-e
 Graph data can be read directly from `HugeGraph` and used for tasks such as node embedding, node classification, and graph classification. 
 The implemented algorithm models can be found in the [models](./src/hugegraph_ml/models) folder.
 
+| model       | paper                                              |
+| ----------- | -------------------------------------------------- |
+| AGNN        | https://arxiv.org/abs/1803.03735                   |
+| APPNP       | https://arxiv.org/abs/1810.05997                   |
+| ARMA        | https://arxiv.org/abs/1901.01343                   |
+| BGNN        | https://arxiv.org/abs/2101.08543                   |
+| BGRL        | https://arxiv.org/abs/2102.06514                   |
+| CARE-GNN    | https://arxiv.org/abs/2008.08692                   |
+| Cluster-GCN | https://arxiv.org/abs/1905.07953                   |
+| C&S         | https://arxiv.org/abs/2010.13993                   |
+| DAGNN       | https://arxiv.org/abs/2007.09296                   |
+| DeeperGCN   | https://arxiv.org/abs/2006.07739                   |
+| DGI         | https://arxiv.org/abs/1809.10341                   |
+| DiffPool    | https://arxiv.org/abs/1806.08804                   |
+| GATNE       | https://arxiv.org/abs/1905.01669                   |
+| GRACE       | https://arxiv.org/abs/2006.04131                   |
+| GRAND       | https://arxiv.org/abs/2005.11079                   |
+| JKNet       | https://arxiv.org/abs/1806.03536                   |
+| P-GNN       | http://proceedings.mlr.press/v97/you19b/you19b.pdf |
+| SEAL        | https://arxiv.org/abs/1802.09691                   |
 
 ## Environment Requirements
 
@@ -16,22 +36,28 @@ The implemented algorithm models can be found in the [models](./src/hugegraph_ml
 ## Preparation
 
 1. Start the HugeGraph database, you can do it via Docker/[Binary packages](https://hugegraph.apache.org/docs/download/download/). 
-Refer to [docker-link](https://hub.docker.com/r/hugegraph/hugegraph) & [deploy-doc](https://hugegraph.apache.org/docs/quickstart/hugegraph-server/#31-use-docker-container-convenient-for-testdev) for guidance
+   Refer to [docker-link](https://hub.docker.com/r/hugegraph/hugegraph) & [deploy-doc](https://hugegraph.apache.org/docs/quickstart/hugegraph-server/#31-use-docker-container-convenient-for-testdev) for guidance
+
 2. Clone this project
-    ```bash
-    git clone https://github.com/apache/incubator-hugegraph-ai.git
-    ```
-3. Install [hugegraph-python-client](../hugegraph-python-client) and [hugegraph_ml](../hugegraph-ml)
-    ```bash
-    cd ./incubator-hugegraph-ai # better to use virtualenv (source venv/bin/activate) 
-    pip install ./hugegraph-python-client
-    cd ./hugegraph-ml/
-    pip install -e .
-    ```
+
+   ```bash
+   git clone https://github.com/apache/incubator-hugegraph-ai.git
+   ```
+
+3. Install [hugegraph-python-client](../hugegraph-python-client) and [hugegraph-ml](../hugegraph-ml)
+
+   ```bash
+   cd ./incubator-hugegraph-ai # better to use virtualenv (source venv/bin/activate) 
+   pip install ./hugegraph-python-client
+   cd ./hugegraph-ml/
+   pip install -e .
+   ```
+
 4. Enter the project directory
-    ```bash
-    cd ./hugegraph-ml/src
-    ```
+
+   ```bash
+   cd ./hugegraph-ml/src
+   ```
 
 ## Examples
 
diff --git a/hugegraph-ml/src/hugegraph_ml/data/hugegraph2dgl.py b/hugegraph-ml/src/hugegraph_ml/data/hugegraph2dgl.py
index 658b404f..92ea00cc 100644
--- a/hugegraph-ml/src/hugegraph_ml/data/hugegraph2dgl.py
+++ b/hugegraph-ml/src/hugegraph_ml/data/hugegraph2dgl.py
@@ -26,7 +26,7 @@
 from pyhugegraph.client import PyHugeClient
 
 from hugegraph_ml.data.hugegraph_dataset import HugeGraphDataset
-
+import networkx as nx
 
 class HugeGraph2DGL:
     def __init__(
@@ -150,6 +150,132 @@ def convert_graph_dataset(
         dataset_dgl = HugeGraphDataset(graphs=graphs, labels=graph_labels, info=graphs_info)
         return dataset_dgl
 
+    def convert_graph_nx(
+        self,
+        vertex_label: str,
+        edge_label: str,
+    ):
+        vertices = self._graph_germlin.exec(f"g.V().hasLabel('{vertex_label}')")["data"]
+        edges = self._graph_germlin.exec(f"g.E().hasLabel('{edge_label}')")["data"]
+        graph_nx = self._convert_graph_from_v_e_nx(vertices=vertices, edges=edges)
+        return graph_nx
+
+    def convert_graph_with_edge_feat(
+        self,
+        vertex_label: str,
+        edge_label: str,
+        node_feat_key: str = "feat",
+        edge_feat_key: str = "edge_feat",
+        label_key: str = "label",
+        mask_keys: Optional[List[str]] = None,
+    ):
+        if mask_keys is None:
+            mask_keys = ["train_mask", "val_mask", "test_mask"]
+        vertices = self._graph_germlin.exec(f"g.V().hasLabel('{vertex_label}')")["data"]
+        edges = self._graph_germlin.exec(f"g.E().hasLabel('{edge_label}')")["data"]
+        graph_dgl = self._convert_graph_from_v_e_with_edge_feat(
+            vertices, edges, edge_feat_key, node_feat_key, label_key, mask_keys
+        )
+
+        return graph_dgl
+
+    def convert_graph_ogb(self, vertex_label: str, edge_label: str, split_label: str):
+        vertices = self._graph_germlin.exec(f"g.V().hasLabel('{vertex_label}')")["data"]
+        edges = self._graph_germlin.exec(f"g.E().hasLabel('{edge_label}')")["data"]
+        graph_dgl, vertex_id_to_idx = self._convert_graph_from_ogb(
+            vertices, edges, "feat", "year", "weight"
+        )
+        edges_split = self._graph_germlin.exec(f"g.E().hasLabel('{split_label}')")[
+            "data"
+        ]
+        split_edge = self._convert_split_edge_from_ogb(edges_split, vertex_id_to_idx)
+        return graph_dgl, split_edge
+
+    def convert_hetero_graph_bgnn(
+        self,
+        vertex_labels: List[str],
+        edge_labels: List[str],
+        feat_key: str = "feat",
+        label_key: str = "class",
+        cat_key: str = "cat_features",
+        mask_keys: Optional[List[str]] = None,
+    ):
+        if mask_keys is None:
+            mask_keys = ["train_mask", "val_mask", "test_mask"]
+        vertex_label_id2idx = {}
+        vertex_label_data = {}
+        # for each vertex label
+        for vertex_label in vertex_labels:
+            vertices = self._graph_germlin.exec(f"g.V().hasLabel('{vertex_label}')")[
+                "data"
+            ]
+            if len(vertices) == 0:
+                warnings.warn(
+                    f"Graph has no vertices of vertex_label: {vertex_label}", Warning
+                )
+            else:
+                vertex_ids = [v["id"] for v in vertices]
+                id2idx = {vertex_id: idx for idx, vertex_id in enumerate(vertex_ids)}
+                vertex_label_id2idx[vertex_label] = id2idx
+                # extract vertex property(feat, label, mask)
+                vertex_label_data[vertex_label] = {}
+                if feat_key in vertices[0]["properties"]:
+                    node_feats = torch.tensor(
+                        [v["properties"][feat_key] for v in vertices],
+                        dtype=torch.int32,
+                    )
+                    vertex_label_data[vertex_label]["feat"] = node_feats
+                if label_key in vertices[0]["properties"]:
+                    node_labels = torch.tensor(
+                        [v["properties"][label_key] for v in vertices],
+                        dtype=torch.float64,
+                    )
+                    vertex_label_data[vertex_label]["class"] = node_labels
+                if cat_key in vertices[0]["properties"]:
+                    node_cat = torch.tensor(
+                        [v["properties"][cat_key] for v in vertices],
+                        dtype=torch.int32,
+                    )
+                    vertex_label_data[vertex_label]["cat_features"] = node_cat
+                if mask_keys:
+                    for mk in mask_keys:
+                        if mk in vertices[0]["properties"]:
+                            mask = torch.tensor(
+                                [v["properties"][mk] for v in vertices],
+                                dtype=torch.bool,
+                            )
+                            vertex_label_data[vertex_label][mk] = mask
+        # build hetero graph from edges
+        edge_data_dict = {}
+        for edge_label in edge_labels:
+            edges = self._graph_germlin.exec(f"g.E().hasLabel('{edge_label}')")["data"]
+            if len(edges) == 0:
+                warnings.warn(
+                    f"Graph has no edges of edge_label: {edge_label}", Warning
+                )
+            else:
+                src_vertex_label = edges[0]["outVLabel"]
+                src_idx = [
+                    vertex_label_id2idx[src_vertex_label][e["outV"]] for e in edges
+                ]
+                dst_vertex_label = edges[0]["inVLabel"]
+                dst_idx = [
+                    vertex_label_id2idx[dst_vertex_label][e["inV"]] for e in edges
+                ]
+                edge_data_dict[(src_vertex_label, edge_label, dst_vertex_label)] = (
+                    src_idx,
+                    dst_idx,
+                )
+        # add vertex properties data
+        hetero_graph = dgl.heterograph(edge_data_dict)
+        for vertex_label in vertex_labels:
+            for prop in vertex_label_data[vertex_label]:
+                hetero_graph.nodes[vertex_label].data[prop] = vertex_label_data[
+                    vertex_label
+                ][prop]
+
+        return hetero_graph
+
     @staticmethod
     def _convert_graph_from_v_e(vertices, edges, feat_key=None, label_key=None, mask_keys=None):
         if len(vertices) == 0:
@@ -175,6 +301,154 @@ def _convert_graph_from_v_e(vertices, edges, feat_key=None, label_key=None, mask
                     graph_dgl.ndata[mk] = mask
         return graph_dgl
 
+    @staticmethod
+    def _convert_graph_from_v_e_nx(vertices, edges):
+        if len(vertices) == 0:
+            warnings.warn("This graph has no vertices", Warning)
+            return nx.Graph(())
+        vertex_ids = [v["id"] for v in vertices]
+        vertex_id_to_idx = {vertex_id: idx for idx, vertex_id in enumerate(vertex_ids)}
+        new_vertex_ids = [vertex_id_to_idx[id] for id in vertex_ids]
+        edge_list = [(edge["outV"], edge["inV"]) for edge in edges]
+        new_edge_list = [
+            (vertex_id_to_idx[src], vertex_id_to_idx[dst]) for src, dst in edge_list
+        ]
+        graph_nx = nx.Graph()
+        graph_nx.add_nodes_from(new_vertex_ids)
+        graph_nx.add_edges_from(new_edge_list)
+        return graph_nx
+
+    @staticmethod
+    def _convert_graph_from_v_e_with_edge_feat(
+        vertices,
+        edges,
+        edge_feat_key,
+        node_feat_key=None,
+        label_key=None,
+        mask_keys=None,
+    ):
+        if len(vertices) == 0:
+            warnings.warn("This graph has no vertices", Warning)
+            return dgl.graph(())
+        vertex_ids = [v["id"] for v in vertices]
+        vertex_id_to_idx = {vertex_id: idx for idx, vertex_id in enumerate(vertex_ids)}
+        src_idx = [vertex_id_to_idx[e["outV"]] for e in edges]
+        dst_idx = [vertex_id_to_idx[e["inV"]] for e in edges]
+        graph_dgl = dgl.graph((src_idx, dst_idx))
+
+        if node_feat_key and node_feat_key in vertices[0]["properties"]:
+            node_feats = [v["properties"][node_feat_key] for v in vertices]
+            graph_dgl.ndata["feat"] = torch.tensor(node_feats, dtype=torch.int64)
+        if edge_feat_key and edge_feat_key in edges[0]["properties"]:
+            edge_feats = [e["properties"][edge_feat_key] for e in edges]
+            graph_dgl.edata["feat"] = torch.tensor(edge_feats, dtype=torch.int64)
+        if label_key and label_key in vertices[0]["properties"]:
+            node_labels = [v["properties"][label_key] for v in vertices]
+            graph_dgl.ndata["label"] = torch.tensor(node_labels, dtype=torch.long)
+        if mask_keys:
+            for mk in mask_keys:
+                if mk in vertices[0]["properties"]:
+                    node_masks = [v["properties"][mk] for v in vertices]
+                    mask = torch.tensor(node_masks, dtype=torch.bool)
+                    graph_dgl.ndata[mk] = mask
+        return graph_dgl
+
+    @staticmethod
+    def _convert_graph_from_ogb(vertices, edges, feat_key, year_key, weight_key):
+        if len(vertices) == 0:
+            warnings.warn("This graph has no vertices", Warning)
+            return dgl.graph(())
+        vertex_ids = [v["id"] for v in vertices]
+        vertex_id_to_idx = {vertex_id: idx for idx, vertex_id in enumerate(vertex_ids)}
+        src_idx = [vertex_id_to_idx[e["outV"]] for e in edges]
+        dst_idx = [vertex_id_to_idx[e["inV"]] for e in edges]
+        graph_dgl = dgl.graph((src_idx, dst_idx))
+        if feat_key and feat_key in vertices[0]["properties"]:
+            node_feats = [
+                v["properties"][feat_key]
+                for v in vertices[0 : graph_dgl.number_of_nodes()]
+            ]
+            graph_dgl.ndata["feat"] = torch.tensor(node_feats, dtype=torch.float32)
+        if year_key and year_key in edges[0]["properties"]:
+            year = [e["properties"][year_key] for e in edges]
+            graph_dgl.edata["year"] = torch.tensor(year, dtype=torch.int64)
+        if weight_key and weight_key in edges[0]["properties"]:
+            weight = [e["properties"][weight_key] for e in edges]
+            graph_dgl.edata["weight"] = torch.tensor(weight, dtype=torch.int64)
+
+        return graph_dgl, vertex_id_to_idx
+
+    @staticmethod
+    def _convert_split_edge_from_ogb(edges, vertex_id_to_idx):
+        train_edge_list = []
+        train_year_list = []
+        train_weight_list = []
+        valid_edge_list = []
+        valid_year_list = []
+        valid_weight_list = []
+        valid_edge_neg_list = []
+        test_edge_list = []
+        test_year_list = []
+        test_weight_list = []
+        test_edge_neg_list = []
+
+        for edge in edges:
+            if edge["properties"]["train_edge_mask"] == 1:
+                train_edge_list.append(
+                    [vertex_id_to_idx[edge["outV"]], vertex_id_to_idx[edge["inV"]]]
+                )
+            if edge["properties"]["train_year_mask"] != -1:
+                train_year_list.append(edge["properties"]["train_year_mask"])
+            if edge["properties"]["train_weight_mask"] != -1:
+                train_weight_list.append(edge["properties"]["train_weight_mask"])
+
+            if edge["properties"]["valid_edge_mask"] == 1:
+                valid_edge_list.append(
+                    [vertex_id_to_idx[edge["outV"]], vertex_id_to_idx[edge["inV"]]]
+                )
+            if edge["properties"]["valid_year_mask"] != -1:
+                valid_year_list.append(edge["properties"]["valid_year_mask"])
+            if edge["properties"]["valid_weight_mask"] != -1:
+                valid_weight_list.append(edge["properties"]["valid_weight_mask"])
+            if edge["properties"]["valid_edge_neg_mask"] == 1:
+                valid_edge_neg_list.append(
+                    [vertex_id_to_idx[edge["outV"]], vertex_id_to_idx[edge["inV"]]]
+                )
+
+            if edge["properties"]["test_edge_mask"] == 1:
+                test_edge_list.append(
+                    [vertex_id_to_idx[edge["outV"]], vertex_id_to_idx[edge["inV"]]]
+                )
+            if edge["properties"]["test_year_mask"] != -1:
+                test_year_list.append(edge["properties"]["test_year_mask"])
+            if edge["properties"]["test_weight_mask"] != -1:
+                test_weight_list.append(edge["properties"]["test_weight_mask"])
+            if edge["properties"]["test_edge_neg_mask"] == 1:
+                test_edge_neg_list.append(
+                    [vertex_id_to_idx[edge["outV"]], vertex_id_to_idx[edge["inV"]]]
+                )
+
+        split_edge = {
+            "train": {
+                "edge": torch.tensor(train_edge_list),
+                "weight": torch.tensor(train_weight_list),
+                "year": torch.tensor(train_year_list),
+            },
+            "valid": {
+                "edge": torch.tensor(valid_edge_list),
+                "weight": torch.tensor(valid_weight_list),
+                "year": torch.tensor(valid_year_list),
+                "edge_neg": torch.tensor(valid_edge_neg_list),
+            },
+            "test": {
+                "edge": torch.tensor(test_edge_list),
+                "weight": torch.tensor(test_weight_list),
+                "year": torch.tensor(test_year_list),
+                "edge_neg": torch.tensor(test_edge_neg_list),
+            },
+        }
+
+        return split_edge
 
 if __name__ == "__main__":
     hg2d = HugeGraph2DGL()
@@ -188,3 +462,22 @@ def _convert_graph_from_v_e(vertices, edges, feat_key=None, label_key=None, mask
         vertex_labels=["ACM_paper_v", "ACM_author_v", "ACM_field_v"],
         edge_labels=["ACM_ap_e", "ACM_fp_e", "ACM_pa_e", "ACM_pf_e"]
     )
+    hg2d.convert_graph_nx(vertex_label="CAVEMAN_vertex", edge_label="CAVEMAN_edge")
+    hg2d.convert_graph_with_edge_feat(
+        vertex_label="CORA_edge_feat_vertex", edge_label="CORA_edge_feat_edge"
+    )
+    hg2d.convert_graph_ogb(
+        vertex_label="ogbl-collab_vertex",
+        edge_label="ogbl-collab_edge",
+        split_label="ogbl-collab_split_edge",
+    )
+    hg2d.convert_hetero_graph_bgnn(
+        vertex_labels=["AVAZU__N_v"], edge_labels=["AVAZU__E_e"]
+    )
+    hg2d.convert_hetero_graph(
+        vertex_labels=["AMAZONGATNE__N_v"],
+        edge_labels=[
+            "AMAZONGATNE_1_e",
+            "AMAZONGATNE_2_e",
+        ],
+    )
\ No newline at end of file
diff --git a/hugegraph-ml/src/hugegraph_ml/examples/agnn_example.py b/hugegraph-ml/src/hugegraph_ml/examples/agnn_example.py
new file mode 100644
index 00000000..5b5b14ba
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/examples/agnn_example.py
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from hugegraph_ml.data.hugegraph2dgl import HugeGraph2DGL
+from hugegraph_ml.models.agnn import AGNN
+from hugegraph_ml.tasks.node_classify import NodeClassify
+
+
+def agnn_example(n_epochs=200):
+    hg2d = HugeGraph2DGL()
+    graph = hg2d.convert_graph(vertex_label="CORA_vertex", edge_label="CORA_edge")
+    model = AGNN(
+        num_layers=2,
+        in_dim=graph.ndata["feat"].shape[1],
+        hid_dim=64,
+        out_dim=graph.ndata["label"].unique().shape[0],
+        dropout=0.2,
+    )
+    node_clf_task = NodeClassify(graph, model)
+    node_clf_task.train(lr=0.005, weight_decay=0.0005, n_epochs=n_epochs, patience=200)
+    print(node_clf_task.evaluate())
+
+
+if __name__ == "__main__":
+    agnn_example()
diff --git a/hugegraph-ml/src/hugegraph_ml/examples/appnp_example.py b/hugegraph-ml/src/hugegraph_ml/examples/appnp_example.py
new file mode 100644
index 00000000..6754b747
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/examples/appnp_example.py
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from hugegraph_ml.data.hugegraph2dgl import HugeGraph2DGL
+from hugegraph_ml.models.appnp import APPNP
+from hugegraph_ml.tasks.node_classify import NodeClassify
+import torch.nn.functional as F
+
+
+def appnp_example(n_epochs=200):
+    hg2d = HugeGraph2DGL()
+    graph = hg2d.convert_graph(vertex_label="CORA_vertex", edge_label="CORA_edge")
+    model = APPNP(
+        in_feats=graph.ndata["feat"].shape[1],
+        hiddens=[64],
+        n_classes=graph.ndata["label"].unique().shape[0],
+        activation=F.relu,
+        feat_drop=0.5,
+        edge_drop=0.5,
+        alpha=0.1,
+        k=10,
+    )
+    node_clf_task = NodeClassify(graph, model)
+    node_clf_task.train(lr=0.005, weight_decay=0.0005, n_epochs=n_epochs, patience=200)
+    print(node_clf_task.evaluate())
+
+
+if __name__ == "__main__":
+    appnp_example()
diff --git a/hugegraph-ml/src/hugegraph_ml/examples/arma_example.py b/hugegraph-ml/src/hugegraph_ml/examples/arma_example.py
new file mode 100644
index 00000000..0c75b5be
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/examples/arma_example.py
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from hugegraph_ml.data.hugegraph2dgl import HugeGraph2DGL
+from hugegraph_ml.models.arma import ARMA4NC
+from hugegraph_ml.tasks.node_classify import NodeClassify
+from torch import nn
+
+
+def arma_example(n_epochs=200):
+    hg2d = HugeGraph2DGL()
+    graph = hg2d.convert_graph(vertex_label="CORA_vertex", edge_label="CORA_edge")
+    model = ARMA4NC(
+        in_dim=graph.ndata["feat"].shape[1],
+        hid_dim=16,
+        out_dim=graph.ndata["label"].unique().shape[0],
+        num_stacks=2,
+        num_layers=1,
+        activation=nn.ReLU(),
+        dropout=0.75,
+    )
+    node_clf_task = NodeClassify(graph, model)
+    node_clf_task.train(lr=0.005, weight_decay=0.0005, n_epochs=n_epochs, patience=200)
+    print(node_clf_task.evaluate())
+
+
+if __name__ == "__main__":
+    arma_example()
diff --git a/hugegraph-ml/src/hugegraph_ml/examples/bgnn_example.py b/hugegraph-ml/src/hugegraph_ml/examples/bgnn_example.py
new file mode 100644
index 00000000..7c353f3b
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/examples/bgnn_example.py
@@ -0,0 +1,67 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from hugegraph_ml.models.bgnn import (
+    GNNModelDGL,
+    BGNNPredictor,
+    encode_cat_features,
+    replace_na,
+    convert_data,
+)
+from hugegraph_ml.data.hugegraph2dgl import HugeGraph2DGL
+
+
+def bgnn_example():
+    hg2d = HugeGraph2DGL()
+    g = hg2d.convert_hetero_graph_bgnn(
+        vertex_labels=["AVAZU__N_v"], edge_labels=["AVAZU__E_e"]
+    )
+    X, y, cat_features, train_mask, val_mask, test_mask = convert_data(g)
+    encoded_X = X.copy()
+    encoded_X = encode_cat_features(
+        encoded_X, y, cat_features, train_mask, val_mask, test_mask
+    )
+    encoded_X = replace_na(encoded_X, train_mask)
+    gnn_model = GNNModelDGL(in_dim=y.shape[1], hidden_dim=128, out_dim=y.shape[1])
+    bgnn = BGNNPredictor(
+        gnn_model,
+        task="regression",
+        loss_fn=None,
+        trees_per_epoch=5,
+        backprop_per_epoch=5,
+        lr=0.1,
+        append_gbdt_pred=False,
+        gbdt_depth=6,
+        gbdt_lr=0.1,
+    )
+    metrics = bgnn.fit(
+        g,
+        encoded_X,
+        y,
+        train_mask,
+        val_mask,
+        test_mask,
+        original_X=X,
+        cat_features=cat_features,
+        num_epochs=100,
+        patience=10,
+        metric_name="loss",
+    )
+
+
+if __name__ == "__main__":
+    bgnn_example()
diff --git a/hugegraph-ml/src/hugegraph_ml/examples/bgrl_example.py b/hugegraph-ml/src/hugegraph_ml/examples/bgrl_example.py
new file mode 100644
index 00000000..77c03f96
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/examples/bgrl_example.py
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from hugegraph_ml.data.hugegraph2dgl import HugeGraph2DGL
+from hugegraph_ml.models.bgrl import BGRL, GCN, MLP_Predictor, CosineDecayScheduler, get_graph_drop_transform
+from hugegraph_ml.models.mlp import MLPClassifier
+from hugegraph_ml.tasks.node_classify import NodeClassify
+from hugegraph_ml.tasks.node_embed import NodeEmbed
+
+
+def bgrl_example(n_epochs_embed=300, n_epochs_clf=400):
+    hg2d = HugeGraph2DGL()
+    graph = hg2d.convert_graph(vertex_label="CORA_vertex", edge_label="CORA_edge")
+    encoder = GCN([graph.ndata["feat"].size(1)] + [256, 128])
+    predictor = MLP_Predictor(
+        input_size=128,
+        output_size=128,
+        hidden_size=512,
+    )
+    model = BGRL(encoder=encoder, predictor=predictor)
+    node_embed_task = NodeEmbed(graph=graph, model=model)
+    embedded_graph = node_embed_task.train_and_embed(
+        add_self_loop=True, lr=0.001, weight_decay=1e-5, n_epochs=n_epochs_embed, patience=40
+    )
+    model = MLPClassifier(
+        n_in_feat=embedded_graph.ndata["feat"].shape[1],
+        n_out_feat=embedded_graph.ndata["label"].unique().shape[0],
+        n_hidden=128
+    )
+    node_clf_task = NodeClassify(graph=embedded_graph, model=model)
+    node_clf_task.train(lr=1e-3, n_epochs=n_epochs_clf, patience=30)
+    print(node_clf_task.evaluate())
+
+
+if __name__ == "__main__":
+    bgrl_example()
diff --git a/hugegraph-ml/src/hugegraph_ml/examples/care_gnn_example.py b/hugegraph-ml/src/hugegraph_ml/examples/care_gnn_example.py
new file mode 100644
index 00000000..e6fb52f2
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/examples/care_gnn_example.py
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from hugegraph_ml.data.hugegraph2dgl import HugeGraph2DGL
+from hugegraph_ml.models.care_gnn import CAREGNN
+from hugegraph_ml.tasks.fraud_detector_caregnn import DetectorCaregnn
+
+import dgl
+import torch
+
+
+def care_gnn_example(n_epochs=200):
+    hg2d = HugeGraph2DGL()
+    graph = hg2d.convert_hetero_graph(
+        vertex_labels=["AMAZON_user_v"],
+        edge_labels=[
+            "AMAZON_net_upu_e",
+            "AMAZON_net_usu_e",
+            "AMAZON_net_uvu_e",
+        ],
+    )
+    model = CAREGNN(
+        in_dim=graph.ndata["feature"].shape[-1],
+        num_classes=graph.ndata["label"].unique().shape[0],
+        hid_dim=64,
+        num_layers=1,
+        activation=torch.tanh,
+        step_size=0.02,
+        edges=graph.canonical_etypes,
+    )
+    detector_task = DetectorCaregnn(graph, model)
+    detector_task.train(lr=0.005, weight_decay=0.0005, n_epochs=n_epochs)
+    print(detector_task.evaluate())
+
+
+if __name__ == "__main__":
+    care_gnn_example()
diff --git a/hugegraph-ml/src/hugegraph_ml/examples/cluster_gcn_example.py b/hugegraph-ml/src/hugegraph_ml/examples/cluster_gcn_example.py
new file mode 100644
index 00000000..3cdcf8e3
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/examples/cluster_gcn_example.py
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from hugegraph_ml.data.hugegraph2dgl import HugeGraph2DGL
+from hugegraph_ml.models.cluster_gcn import SAGE
+from hugegraph_ml.tasks.node_classify_with_sample import NodeClassifyWithSample
+
+
+def cluster_gcn_example(n_epochs=200):
+    hg2d = HugeGraph2DGL()
+    graph = hg2d.convert_graph(vertex_label="CORA_vertex", edge_label="CORA_edge")
+    model = SAGE(
+        in_feats=graph.ndata["feat"].shape[1],
+        n_hidden=64,
+        n_classes=graph.ndata["label"].unique().shape[0],
+    )
+    node_clf_task = NodeClassifyWithSample(graph, model)
+    node_clf_task.train(lr=0.005, weight_decay=0.0005, n_epochs=n_epochs, patience=200)
+    print(node_clf_task.evaluate())
+
+
+if __name__ == "__main__":
+    cluster_gcn_example()
diff --git a/hugegraph-ml/src/hugegraph_ml/examples/correct_and_smooth_example.py b/hugegraph-ml/src/hugegraph_ml/examples/correct_and_smooth_example.py
new file mode 100644
index 00000000..6faae7f1
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/examples/correct_and_smooth_example.py
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from hugegraph_ml.data.hugegraph2dgl import HugeGraph2DGL
+from hugegraph_ml.models.correct_and_smooth import MLP, MLPLinear
+from hugegraph_ml.tasks.node_classify import NodeClassify
+import argparse
+
+
+def cs_example(n_epochs=200):
+    hg2d = HugeGraph2DGL()
+    graph = hg2d.convert_graph(vertex_label="CORA_vertex", edge_label="CORA_edge")
+    if args.model == "mlp":
+        model = MLP(
+            in_dim=graph.ndata["feat"].shape[1],
+            hid_dim=64,
+            out_dim=graph.ndata["label"].unique().shape[0],
+            num_layers=3,
+            dropout=0.4,
+        )
+    elif args.model == "linear":
+        model = MLPLinear(
+            in_dim=graph.ndata["feat"].shape[1],
+            out_dim=graph.ndata["label"].unique().shape[0],
+        )
+    else:
+        raise NotImplementedError(f"Model {args.model} is not supported.")
+    node_clf_task = NodeClassify(graph, model)
+    node_clf_task.train(lr=0.005, weight_decay=0.0005, n_epochs=n_epochs, patience=200)
+    print(node_clf_task.evaluate())
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Base predictor(C&S)")
+    parser.add_argument("--model", type=str, default="mlp", choices=["mlp", "linear"])
+    args = parser.parse_args()
+    cs_example()
diff --git a/hugegraph-ml/src/hugegraph_ml/examples/dagnn_example.py b/hugegraph-ml/src/hugegraph_ml/examples/dagnn_example.py
new file mode 100644
index 00000000..38f3e96d
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/examples/dagnn_example.py
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from hugegraph_ml.data.hugegraph2dgl import HugeGraph2DGL
+from hugegraph_ml.models.dagnn import DAGNN
+from hugegraph_ml.tasks.node_classify import NodeClassify
+
+
+def dagnn_example(n_epochs=200):
+    hg2d = HugeGraph2DGL()
+    graph = hg2d.convert_graph(vertex_label="CORA_vertex", edge_label="CORA_edge")
+    model = DAGNN(
+        k=12,
+        in_dim=graph.ndata["feat"].shape[1],
+        hid_dim=64,
+        out_dim=graph.ndata["label"].unique().shape[0],
+        dropout=0.8,
+    )
+    node_clf_task = NodeClassify(graph, model)
+    node_clf_task.train(lr=0.005, weight_decay=0.0005, n_epochs=n_epochs, patience=200)
+    print(node_clf_task.evaluate())
+
+
+if __name__ == "__main__":
+    dagnn_example()
diff --git a/hugegraph-ml/src/hugegraph_ml/examples/deepergcn_example.py b/hugegraph-ml/src/hugegraph_ml/examples/deepergcn_example.py
new file mode 100644
index 00000000..197826e2
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/examples/deepergcn_example.py
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from hugegraph_ml.data.hugegraph2dgl import HugeGraph2DGL
+from hugegraph_ml.models.deepergcn import DeeperGCN
+from hugegraph_ml.tasks.node_classify_with_edge import NodeClassifyWithEdge
+
+
+def deepergcn_example(n_epochs=1000):
+    hg2d = HugeGraph2DGL()
+    graph = hg2d.convert_graph_with_edge_feat(
+        vertex_label="CORA_vertex", edge_label="CORA_edge"
+    )
+    model = DeeperGCN(
+        node_feat_dim=graph.ndata["feat"].shape[1],
+        edge_feat_dim=graph.edata["feat"].shape[1],
+        hid_dim=256,
+        out_dim=graph.ndata["label"].unique().shape[0],
+        num_layers=7,
+        dropout=0.2,
+    )
+    node_clf_task = NodeClassifyWithEdge(graph, model)
+    node_clf_task.train(lr=0.005, weight_decay=0.0005, n_epochs=n_epochs, patience=200)
+    print(node_clf_task.evaluate())
+
+
+if __name__ == "__main__":
+    deepergcn_example()
diff --git a/hugegraph-ml/src/hugegraph_ml/examples/gatne_example.py b/hugegraph-ml/src/hugegraph_ml/examples/gatne_example.py
new file mode 100644
index 00000000..0c9c0c5c
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/examples/gatne_example.py
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from hugegraph_ml.data.hugegraph2dgl import HugeGraph2DGL
+from hugegraph_ml.models.gatne import DGLGATNE, NeighborSampler
+from hugegraph_ml.tasks.hetero_sample_embed_gatne import HeteroSampleEmbedGATNE
+
+
+def gatne_example(n_epochs=200):
+    hg2d = HugeGraph2DGL()
+    graph = hg2d.convert_hetero_graph(
+        vertex_labels=["AMAZONGATNE__N_v"],
+        edge_labels=[
+            "AMAZONGATNE_1_e",
+            "AMAZONGATNE_2_e",
+        ],
+    )
+    model = DGLGATNE(
+        graph.number_of_nodes(),
+        200,
+        10,
+        graph.etypes,
+        len(graph.etypes),
+        20,
+    )
+    gatne_task = HeteroSampleEmbedGATNE(graph, model)
+    embs = gatne_task.train_and_embed(lr=0.005, n_epochs=n_epochs)
+
+
+if __name__ == "__main__":
+    gatne_example()
diff --git a/hugegraph-ml/src/hugegraph_ml/examples/pgnn_example.py b/hugegraph-ml/src/hugegraph_ml/examples/pgnn_example.py
new file mode 100644
index 00000000..7297de23
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/examples/pgnn_example.py
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from hugegraph_ml.data.hugegraph2dgl import HugeGraph2DGL
+from hugegraph_ml.models.pgnn import PGNN, get_dataset
+from hugegraph_ml.tasks.link_prediction_pgnn import LinkPredictionPGNN
+
+
+def pgnn_example(n_epochs=200):
+    hg2d = HugeGraph2DGL()
+    graph = hg2d.convert_graph_nx(
+        vertex_label="CAVEMAN_vertex", edge_label="CAVEMAN_edge"
+    )
+    model = PGNN(input_dim=get_dataset(graph)["feature"].shape[1])
+    link_pre_task = LinkPredictionPGNN(graph, model)
+    link_pre_task.train(lr=0.005, weight_decay=0.0005, n_epochs=n_epochs)
+
+
+if __name__ == "__main__":
+    pgnn_example()
diff --git a/hugegraph-ml/src/hugegraph_ml/examples/seal_example.py b/hugegraph-ml/src/hugegraph_ml/examples/seal_example.py
new file mode 100644
index 00000000..3d6e7d3b
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/examples/seal_example.py
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from hugegraph_ml.data.hugegraph2dgl import HugeGraph2DGL
+from hugegraph_ml.models.seal import DGCNN, data_prepare
+from hugegraph_ml.tasks.link_prediction_seal import LinkPredictionSeal
+import torch
+
+
+def seal_example(n_epochs=200):
+    torch.manual_seed(2021)
+    hg2d = HugeGraph2DGL()
+    graph, split_edge = hg2d.convert_graph_ogb(
+        vertex_label="ogbl-collab_vertex",
+        edge_label="ogbl-collab_edge",
+        split_label="ogbl-collab_split_edge",
+    )
+    node_attribute, edge_weight = data_prepare(graph=graph, split_edge=split_edge)
+    model = DGCNN(
+        num_layers=3,
+        hidden_units=32,
+        k=30,
+        gcn_type="gcn",
+        node_attributes=node_attribute,
+        edge_weights=edge_weight,
+        node_embedding=None,
+        use_embedding=True,
+        num_nodes=graph.num_nodes(),
+        dropout=0.5,
+    )
+    link_pre_task = LinkPredictionSeal(graph, split_edge, model)
+    link_pre_task.train(lr=0.005, n_epochs=n_epochs)
+
+
+if __name__ == "__main__":
+    seal_example()
diff --git a/hugegraph-ml/src/hugegraph_ml/models/agnn.py b/hugegraph-ml/src/hugegraph_ml/models/agnn.py
new file mode 100644
index 00000000..b3c3767d
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/models/agnn.py
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Attention-based Graph Neural Network (AGNN)
+
+References
+----------
+Paper: https://arxiv.org/abs/1803.03735
+Author's code: 
+DGL code: https://github.com/dmlc/dgl/blob/master/python/dgl/nn/pytorch/conv/agnnconv.py
+"""
+
+import dgl
+import torch
+from dgl.nn.pytorch.conv import AGNNConv
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class AGNN(nn.Module):
+    def __init__(self, num_layers, in_dim, hid_dim, out_dim, dropout):
+        super().__init__()
+        self.num_layers = num_layers
+        self.embedding_layer = nn.Linear(in_dim, hid_dim, bias=False)
+
+        self.attention_layers = nn.ModuleList()
+        # 2-layer AGNN
+        for i in range(self.num_layers):
+            self.attention_layers.append(AGNNConv())
+
+        self.output_layer = nn.Linear(hid_dim, out_dim, bias=False)
+
+        self.dropout = nn.Dropout(dropout)
+
+        self.criterion = nn.CrossEntropyLoss()
+
+    def forward(self, graph, features):
+        h = F.relu(self.embedding_layer(features))
+        for i in range(self.num_layers):
+            self.attention_layers[i](graph, h)
+        h = self.output_layer(h)
+        h = self.dropout(h)
+        return h
+
+    def loss(self, logits, labels):
+        return self.criterion(logits, labels)
+
+    def inference(self, graph, feats):
+        return self.forward(graph, feats)
diff --git a/hugegraph-ml/src/hugegraph_ml/models/appnp.py b/hugegraph-ml/src/hugegraph_ml/models/appnp.py
new file mode 100644
index 00000000..04cb19d5
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/models/appnp.py
@@ -0,0 +1,84 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.right (c) 2024 by jinsong, All Rights Reserved.
+
+"""
+Approximate personalized propagation of neural predictions (APPNP)
+
+References
+----------
+Paper: https://arxiv.org/abs/1810.05997
+Author's code: https://github.com/klicperajo/ppnp
+DGL code: https://github.com/dmlc/dgl/tree/master/examples/pytorch/appnp
+"""
+
+import torch.nn as nn
+
+from dgl.nn.pytorch.conv import APPNPConv
+
+
+class APPNP(nn.Module):
+    def __init__(
+        self,
+        in_feats,
+        hiddens,
+        n_classes,
+        activation,
+        feat_drop,
+        edge_drop,
+        alpha,
+        k,
+    ):
+        super(APPNP, self).__init__()
+        self.layers = nn.ModuleList()
+        # input layer
+        self.layers.append(nn.Linear(in_feats, hiddens[0]))
+        # hidden layers
+        for i in range(1, len(hiddens)):
+            self.layers.append(nn.Linear(hiddens[i - 1], hiddens[i]))
+        # output layer
+        self.layers.append(nn.Linear(hiddens[-1], n_classes))
+        self.activation = activation
+        if feat_drop:
+            self.feat_drop = nn.Dropout(feat_drop)
+        else:
+            self.feat_drop = lambda x: x
+        self.propagate = APPNPConv(k, alpha, edge_drop)
+        self.reset_parameters()
+
+        self.criterion = nn.CrossEntropyLoss()
+
+    def reset_parameters(self):
+        for layer in self.layers:
+            layer.reset_parameters()
+
+    def forward(self, graph, features):
+        # prediction step
+        h = features
+        h = self.feat_drop(h)
+        h = self.activation(self.layers[0](h))
+        for layer in self.layers[1:-1]:
+            h = self.activation(layer(h))
+        h = self.layers[-1](self.feat_drop(h))
+        # propagation step
+        h = self.propagate(graph, h)
+        return h
+
+    def loss(self, logits, labels):
+        return self.criterion(logits, labels)
+
+    def inference(self, graph, feats):
+        return self.forward(graph, feats)
diff --git a/hugegraph-ml/src/hugegraph_ml/models/arma.py b/hugegraph-ml/src/hugegraph_ml/models/arma.py
new file mode 100644
index 00000000..7b07c684
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/models/arma.py
@@ -0,0 +1,175 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.right (c) 2024 by jinsong, All Rights Reserved.
+
+"""
+auto-regressive moving average (ARMA)
+
+References
+----------
+Paper: https://arxiv.org/abs/1901.01343
+Author's code: 
+DGL code: https://github.com/dmlc/dgl/tree/master/examples/pytorch/arma
+"""
+
+import math
+
+import dgl.function as fn
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def glorot(tensor):
+    if tensor is not None:
+        stdv = math.sqrt(6.0 / (tensor.size(-2) + tensor.size(-1)))
+        tensor.data.uniform_(-stdv, stdv)
+
+
+def zeros(tensor):
+    if tensor is not None:
+        tensor.data.fill_(0)
+
+
+class ARMAConv(nn.Module):
+    def __init__(
+        self,
+        in_dim,
+        out_dim,
+        num_stacks,
+        num_layers,
+        activation=None,
+        dropout=0.0,
+        bias=True,
+    ):
+        super(ARMAConv, self).__init__()
+
+        self.in_dim = in_dim
+        self.out_dim = out_dim
+        self.K = num_stacks
+        self.T = num_layers
+        self.activation = activation
+        self.dropout = nn.Dropout(p=dropout)
+
+        # init weight
+        self.w_0 = nn.ModuleDict(
+            {str(k): nn.Linear(in_dim, out_dim, bias=False) for k in range(self.K)}
+        )
+        # deeper weight
+        self.w = nn.ModuleDict(
+            {str(k): nn.Linear(out_dim, out_dim, bias=False) for k in range(self.K)}
+        )
+        # v
+        self.v = nn.ModuleDict(
+            {str(k): nn.Linear(in_dim, out_dim, bias=False) for k in range(self.K)}
+        )
+        # bias
+        if bias:
+            self.bias = nn.Parameter(torch.Tensor(self.K, self.T, 1, self.out_dim))
+        else:
+            self.register_parameter("bias", None)
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        for k in range(self.K):
+            glorot(self.w_0[str(k)].weight)
+            glorot(self.w[str(k)].weight)
+            glorot(self.v[str(k)].weight)
+        zeros(self.bias)
+
+    def forward(self, g, feats):
+        with g.local_scope():
+            init_feats = feats
+            # assume that the graphs are undirected and graph.in_degrees() is the same as graph.out_degrees()
+            degs = g.in_degrees().float().clamp(min=1)
+            norm = torch.pow(degs, -0.5).to(feats.device).unsqueeze(1)
+            output = []
+
+            for k in range(self.K):
+                feats = init_feats
+                for t in range(self.T):
+                    feats = feats * norm
+                    g.ndata["h"] = feats
+                    g.update_all(fn.copy_u("h", "m"), fn.sum("m", "h"))
+                    feats = g.ndata.pop("h")
+                    feats = feats * norm
+
+                    if t == 0:
+                        feats = self.w_0[str(k)](feats)
+                    else:
+                        feats = self.w[str(k)](feats)
+
+                    feats += self.dropout(self.v[str(k)](init_feats))
+                    feats += self.v[str(k)](self.dropout(init_feats))
+
+                    if self.bias is not None:
+                        feats += self.bias[k][t]
+
+                    if self.activation is not None:
+                        feats = self.activation(feats)
+                output.append(feats)
+
+            return torch.stack(output).mean(dim=0)
+
+
+class ARMA4NC(nn.Module):
+    def __init__(
+        self,
+        in_dim,
+        hid_dim,
+        out_dim,
+        num_stacks,
+        num_layers,
+        activation=None,
+        dropout=0.0,
+    ):
+        super(ARMA4NC, self).__init__()
+
+        self.conv1 = ARMAConv(
+            in_dim=in_dim,
+            out_dim=hid_dim,
+            num_stacks=num_stacks,
+            num_layers=num_layers,
+            activation=activation,
+            dropout=dropout,
+        )
+
+        self.conv2 = ARMAConv(
+            in_dim=hid_dim,
+            out_dim=out_dim,
+            num_stacks=num_stacks,
+            num_layers=num_layers,
+            activation=activation,
+            dropout=dropout,
+        )
+
+        self.dropout = nn.Dropout(p=dropout)
+
+        self.criterion = nn.CrossEntropyLoss()
+
+    def forward(self, g, feats):
+        feats = F.relu(self.conv1(g, feats))
+        feats = self.dropout(feats)
+        feats = self.conv2(g, feats)
+        return feats
+
+    def loss(self, logits, labels):
+        return self.criterion(logits, labels)
+
+    def inference(self, graph, feats):
+        return self.forward(graph, feats)
diff --git a/hugegraph-ml/src/hugegraph_ml/models/bgnn.py b/hugegraph-ml/src/hugegraph_ml/models/bgnn.py
new file mode 100644
index 00000000..3db380db
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/models/bgnn.py
@@ -0,0 +1,741 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Boost-GNN (BGNN)
+
+References
+----------
+Paper: https://arxiv.org/abs/2101.08543
+Author's code: https://github.com/nd7141/bgnn
+DGL code: https://github.com/dmlc/dgl/tree/master/examples/pytorch/bgnn
+"""
+
+import itertools
+import time
+from collections import defaultdict as ddict
+
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn.functional as F
+from catboost import CatBoostClassifier, CatBoostRegressor, Pool, sum_models
+from sklearn import preprocessing
+from sklearn.metrics import r2_score
+from tqdm import tqdm
+from category_encoders import CatBoostEncoder
+from dgl.nn.pytorch import (
+    AGNNConv as AGNNConvDGL,
+    APPNPConv,
+    ChebConv as ChebConvDGL,
+    GATConv as GATConvDGL,
+    GraphConv,
+)
+from torch.nn import Dropout, ELU, Linear, ReLU, Sequential
+
+
+class BGNNPredictor:
+    """
+    Description
+    -----------
+    Boost GNN predictor for semi-supervised node classification or regression problems.
+    Publication: https://arxiv.org/abs/2101.08543
+
+    Parameters
+    ----------
+    gnn_model : nn.Module
+        DGL implementation of GNN model.
+    task: str, optional
+        Regression or classification task.
+    loss_fn : callable, optional
+        Function that takes torch tensors, pred and true, and returns a scalar.
+    trees_per_epoch : int, optional
+        Number of GBDT trees to build each epoch.
+    backprop_per_epoch : int, optional
+        Number of backpropagation steps to make each epoch.
+    lr : float, optional
+        Learning rate of gradient descent optimizer.
+    append_gbdt_pred : bool, optional
+        Append GBDT predictions or replace original input node features.
+    train_input_features : bool, optional
+        Train original input node features.
+    gbdt_depth : int, optional
+        Depth of each tree in GBDT model.
+    gbdt_lr : float, optional
+        Learning rate of GBDT model.
+    gbdt_alpha : int, optional
+        Weight to combine previous and new GBDT trees.
+    random_seed : int, optional
+        random seed for GNN and GBDT models.
+
+    Examples
+    ----------
+    gnn_model = GAT(10, 20, num_heads=5),
+    bgnn = BGNNPredictor(gnn_model)
+    metrics = bgnn.fit(graph, X, y, train_mask, val_mask, test_mask, cat_features)
+    """
+
+    def __init__(
+        self,
+        gnn_model,
+        task="regression",
+        loss_fn=None,
+        trees_per_epoch=10,
+        backprop_per_epoch=10,
+        lr=0.01,
+        append_gbdt_pred=True,
+        train_input_features=False,
+        gbdt_depth=6,
+        gbdt_lr=0.1,
+        gbdt_alpha=1,
+        random_seed=0,
+    ):
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+        self.model = gnn_model.to(self.device)
+        self.task = task
+        self.loss_fn = loss_fn
+        self.trees_per_epoch = trees_per_epoch
+        self.backprop_per_epoch = backprop_per_epoch
+        self.lr = lr
+        self.append_gbdt_pred = append_gbdt_pred
+        self.train_input_features = train_input_features
+        self.gbdt_depth = gbdt_depth
+        self.gbdt_lr = gbdt_lr
+        self.gbdt_alpha = gbdt_alpha
+        self.random_seed = random_seed
+        torch.manual_seed(random_seed)
+        np.random.seed(random_seed)
+
+    def init_gbdt_model(self, num_epochs, epoch):
+        if self.task == "regression":
+            catboost_model_obj = CatBoostRegressor
+            catboost_loss_fn = "RMSE"
+        else:
+            if epoch == 0:  # we predict multiclass probs at first epoch
+                catboost_model_obj = CatBoostClassifier
+                catboost_loss_fn = "MultiClass"
+            else:  # we predict the gradients for each class at epochs > 0
+                catboost_model_obj = CatBoostRegressor
+                catboost_loss_fn = "MultiRMSE"
+
+        return catboost_model_obj(
+            iterations=num_epochs,
+            depth=self.gbdt_depth,
+            learning_rate=self.gbdt_lr,
+            loss_function=catboost_loss_fn,
+            random_seed=self.random_seed,
+            nan_mode="Min",
+        )
+
+    def fit_gbdt(self, pool, trees_per_epoch, epoch):
+        gbdt_model = self.init_gbdt_model(trees_per_epoch, epoch)
+        gbdt_model.fit(pool, verbose=False)
+        return gbdt_model
+
+    def append_gbdt_model(self, new_gbdt_model, weights):
+        if self.gbdt_model is None:
+            return new_gbdt_model
+        return sum_models([self.gbdt_model, new_gbdt_model], weights=weights)
+
+    def train_gbdt(
+        self,
+        gbdt_X_train,
+        gbdt_y_train,
+        cat_features,
+        epoch,
+        gbdt_trees_per_epoch,
+        gbdt_alpha,
+    ):
+        pool = Pool(gbdt_X_train, gbdt_y_train, cat_features=cat_features)
+        epoch_gbdt_model = self.fit_gbdt(pool, gbdt_trees_per_epoch, epoch)
+        if epoch == 0 and self.task == "classification":
+            self.base_gbdt = epoch_gbdt_model
+        else:
+            self.gbdt_model = self.append_gbdt_model(
+                epoch_gbdt_model, weights=[1, gbdt_alpha]
+            )
+
+    def update_node_features(self, node_features, X, original_X):
+        # get predictions from gbdt model
+        if self.task == "regression":
+            predictions = np.expand_dims(self.gbdt_model.predict(original_X), axis=1)
+        else:
+            predictions = self.base_gbdt.predict_proba(original_X)
+            if self.gbdt_model is not None:
+                predictions_after_one = self.gbdt_model.predict(original_X)
+                predictions += predictions_after_one
+
+        # update node features with predictions
+        if self.append_gbdt_pred:
+            if self.train_input_features:
+                predictions = np.append(
+                    node_features.detach().cpu().data[:, : -self.out_dim],
+                    predictions,
+                    axis=1,
+                )  # replace old predictions with new predictions
+            else:
+                predictions = np.append(
+                    X, predictions, axis=1
+                )  # append original features with new predictions
+
+        predictions = torch.from_numpy(predictions).to(self.device)
+
+        node_features.data = predictions.float().data
+
+    def update_gbdt_targets(self, node_features, node_features_before, train_mask):
+        return (
+            (node_features - node_features_before)
+            .detach()
+            .cpu()
+            .numpy()[train_mask, -self.out_dim :]
+        )
+
+    def init_node_features(self, X):
+        node_features = torch.empty(
+            X.shape[0], self.in_dim, requires_grad=True, device=self.device
+        )
+        if self.append_gbdt_pred:
+            node_features.data[:, : -self.out_dim] = torch.from_numpy(
+                X.to_numpy(copy=True)
+            )
+        return node_features
+
+    def init_optimizer(self, node_features, optimize_node_features, learning_rate):
+        params = [self.model.parameters()]
+        if optimize_node_features:
+            params.append([node_features])
+        optimizer = torch.optim.Adam(itertools.chain(*params), lr=learning_rate)
+        return optimizer
+
+    def train_model(self, model_in, target_labels, train_mask, optimizer):
+        y = target_labels[train_mask]
+
+        self.model.train()
+        logits = self.model(*model_in).squeeze()
+        pred = logits[train_mask]
+
+        if self.loss_fn is not None:
+            loss = self.loss_fn(pred, y)
+        else:
+            if self.task == "regression":
+                loss = torch.sqrt(F.mse_loss(pred, y))
+            elif self.task == "classification":
+                loss = F.cross_entropy(pred, y.long())
+            else:
+                raise NotImplemented(
+                    "Unknown task. Supported tasks: classification, regression."
+                )
+
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        return loss
+
+    def evaluate_model(self, logits, target_labels, mask):
+        metrics = {}
+        y = target_labels[mask]
+        with torch.no_grad():
+            pred = logits[mask]
+            if self.task == "regression":
+                metrics["loss"] = torch.sqrt(F.mse_loss(pred, y).squeeze() + 1e-8)
+                metrics["rmsle"] = torch.sqrt(
+                    F.mse_loss(torch.log(pred + 1), torch.log(y + 1)).squeeze() + 1e-8
+                )
+                metrics["mae"] = F.l1_loss(pred, y)
+                metrics["r2"] = torch.Tensor(
+                    [r2_score(y.cpu().numpy(), pred.cpu().numpy())]
+                )
+            elif self.task == "classification":
+                metrics["loss"] = F.cross_entropy(pred, y.long())
+                metrics["accuracy"] = torch.Tensor(
+                    [(y == pred.max(1)[1]).sum().item() / y.shape[0]]
+                )
+
+            return metrics
+
+    def train_and_evaluate(
+        self,
+        model_in,
+        target_labels,
+        train_mask,
+        val_mask,
+        test_mask,
+        optimizer,
+        metrics,
+        gnn_passes_per_epoch,
+    ):
+        loss = None
+
+        for _ in range(gnn_passes_per_epoch):
+            loss = self.train_model(model_in, target_labels, train_mask, optimizer)
+
+        self.model.eval()
+        logits = self.model(*model_in).squeeze()
+        train_results = self.evaluate_model(logits, target_labels, train_mask)
+        val_results = self.evaluate_model(logits, target_labels, val_mask)
+        test_results = self.evaluate_model(logits, target_labels, test_mask)
+        for metric_name in train_results:
+            metrics[metric_name].append(
+                (
+                    train_results[metric_name].detach().item(),
+                    val_results[metric_name].detach().item(),
+                    test_results[metric_name].detach().item(),
+                )
+            )
+        return loss
+
+    def update_early_stopping(
+        self,
+        metrics,
+        epoch,
+        best_metric,
+        best_val_epoch,
+        epochs_since_last_best_metric,
+        metric_name,
+        lower_better=False,
+    ):
+        train_metric, val_metric, test_metric = metrics[metric_name][-1]
+        if (lower_better and val_metric < best_metric[1]) or (
+            not lower_better and val_metric > best_metric[1]
+        ):
+            best_metric = metrics[metric_name][-1]
+            best_val_epoch = epoch
+            epochs_since_last_best_metric = 0
+        else:
+            epochs_since_last_best_metric += 1
+        return best_metric, best_val_epoch, epochs_since_last_best_metric
+
+    def log_epoch(
+        self,
+        pbar,
+        metrics,
+        epoch,
+        loss,
+        epoch_time,
+        logging_epochs,
+        metric_name="loss",
+    ):
+        train_metric, val_metric, test_metric = metrics[metric_name][-1]
+        if epoch and epoch % logging_epochs == 0:
+            pbar.set_description(
+                "Epoch {:05d} | Loss {:.3f} | Loss {:.3f}/{:.3f}/{:.3f} | Time {:.4f}".format(
+                    epoch,
+                    loss,
+                    train_metric,
+                    val_metric,
+                    test_metric,
+                    epoch_time,
+                )
+            )
+
+    def fit(
+        self,
+        graph,
+        X,
+        y,
+        train_mask,
+        val_mask,
+        test_mask,
+        original_X=None,
+        cat_features=None,
+        num_epochs=100,
+        patience=10,
+        logging_epochs=1,
+        metric_name="loss",
+    ):
+        """
+
+        :param graph : dgl.DGLGraph
+            Input graph
+        :param X : pd.DataFrame
+            Input node features. Each column represents one input feature. Each row is a node.
+            Values in dataframe are numerical, after preprocessing.
+        :param y : pd.DataFrame
+            Input node targets. Each column represents one target. Each row is a node
+            (order of nodes should be the same as in X).
+        :param train_mask : list[int]
+            Node indexes (rows) that belong to train set.
+        :param val_mask : list[int]
+            Node indexes (rows) that belong to validation set.
+        :param test_mask : list[int]
+            Node indexes (rows) that belong to test set.
+        :param original_X : pd.DataFrame, optional
+            Input node features before preprocessing. Each column represents one input feature. Each row is a node.
+            Values in dataframe can be of any type, including categorical (e.g. string, bool) or
+            missing values (None). This is useful if you want to preprocess X with GBDT model.
+        :param cat_features: list[int]
+            Feature indexes (columns) which are categorical features.
+        :param num_epochs : int
+            Number of epochs to run.
+        :param patience : int
+            Number of epochs to wait until early stopping.
+        :param logging_epochs : int
+            Log every n epoch.
+        :param metric_name : str
+            Metric to use for early stopping.
+        :param normalize_features : bool
+            If to normalize original input features X (column wise).
+        :param replace_na: bool
+            If to replace missing values (None) in X.
+        :return: metrics evaluated during training
+        """
+
+        # initialize for early stopping and metrics
+        if metric_name in ["r2", "accuracy"]:
+            best_metric = [np.cfloat("-inf")] * 3  # for train/val/test
+        else:
+            best_metric = [np.cfloat("inf")] * 3  # for train/val/test
+
+        best_val_epoch = 0
+        epochs_since_last_best_metric = 0
+        metrics = ddict(list)
+        if cat_features is None:
+            cat_features = []
+
+        if self.task == "regression":
+            self.out_dim = y.shape[1]
+        elif self.task == "classification":
+            self.out_dim = len(set(y.iloc[test_mask, 0]))
+        self.in_dim = (
+            self.out_dim + X.shape[1] if self.append_gbdt_pred else self.out_dim
+        )
+
+        if original_X is None:
+            original_X = X.copy()
+            cat_features = []
+
+        gbdt_X_train = original_X.iloc[train_mask]
+        gbdt_y_train = y.iloc[train_mask]
+        gbdt_alpha = self.gbdt_alpha
+        self.gbdt_model = None
+
+        node_features = self.init_node_features(X)
+        optimizer = self.init_optimizer(
+            node_features, optimize_node_features=True, learning_rate=self.lr
+        )
+
+        y = torch.from_numpy(y.to_numpy(copy=True)).float().squeeze().to(self.device)
+        graph = graph.to(self.device)
+
+        pbar = tqdm(range(num_epochs))
+        for epoch in pbar:
+            start2epoch = time.time()
+
+            # gbdt part
+            self.train_gbdt(
+                gbdt_X_train,
+                gbdt_y_train,
+                cat_features,
+                epoch,
+                self.trees_per_epoch,
+                gbdt_alpha,
+            )
+
+            self.update_node_features(node_features, X, original_X)
+            node_features_before = node_features.clone()
+            model_in = (graph, node_features)
+            loss = self.train_and_evaluate(
+                model_in,
+                y,
+                train_mask,
+                val_mask,
+                test_mask,
+                optimizer,
+                metrics,
+                self.backprop_per_epoch,
+            )
+            gbdt_y_train = self.update_gbdt_targets(
+                node_features, node_features_before, train_mask
+            )
+
+            self.log_epoch(
+                pbar,
+                metrics,
+                epoch,
+                loss,
+                time.time() - start2epoch,
+                logging_epochs,
+                metric_name=metric_name,
+            )
+
+            # check early stopping
+            (
+                best_metric,
+                best_val_epoch,
+                epochs_since_last_best_metric,
+            ) = self.update_early_stopping(
+                metrics,
+                epoch,
+                best_metric,
+                best_val_epoch,
+                epochs_since_last_best_metric,
+                metric_name,
+                lower_better=(metric_name not in ["r2", "accuracy"]),
+            )
+            if patience and epochs_since_last_best_metric > patience:
+                break
+
+            if np.isclose(gbdt_y_train.sum(), 0.0):
+                print("Node embeddings do not change anymore. Stopping...")
+                break
+
+        print(
+            "Best {} at iteration {}: {:.3f}/{:.3f}/{:.3f}".format(
+                metric_name, best_val_epoch, *best_metric
+            )
+        )
+        return metrics
+
+    def predict(self, graph, X, test_mask):
+        graph = graph.to(self.device)
+        node_features = torch.empty(X.shape[0], self.in_dim).to(self.device)
+        self.update_node_features(node_features, X, X)
+        logits = self.model(graph, node_features).squeeze()
+        if self.task == "regression":
+            return logits[test_mask]
+        else:
+            return logits[test_mask].max(1)[1]
+
+    def plot_interactive(
+        self,
+        metrics,
+        legend,
+        title,
+        logx=False,
+        logy=False,
+        metric_name="loss",
+        start_from=0,
+    ):
+        import plotly.graph_objects as go
+
+        metric_results = metrics[metric_name]
+        xs = [list(range(len(metric_results)))] * len(metric_results[0])
+        ys = list(zip(*metric_results))
+
+        fig = go.Figure()
+        for i in range(len(ys)):
+            fig.add_trace(
+                go.Scatter(
+                    x=xs[i][start_from:],
+                    y=ys[i][start_from:],
+                    mode="lines+markers",
+                    name=legend[i],
+                )
+            )
+
+        fig.update_layout(
+            title=title,
+            title_x=0.5,
+            xaxis_title="Epoch",
+            yaxis_title=metric_name,
+            font=dict(
+                size=40,
+            ),
+            height=600,
+        )
+
+        if logx:
+            fig.update_layout(xaxis_type="log")
+        if logy:
+            fig.update_layout(yaxis_type="log")
+
+        fig.show()
+
+
+class GNNModelDGL(torch.nn.Module):
+    def __init__(
+        self,
+        in_dim,
+        hidden_dim,
+        out_dim,
+        dropout=0.0,
+        name="gat",
+        residual=True,
+        use_mlp=False,
+        join_with_mlp=False,
+    ):
+        super(GNNModelDGL, self).__init__()
+        self.name = name
+        self.use_mlp = use_mlp
+        self.join_with_mlp = join_with_mlp
+        self.normalize_input_columns = True
+        if name == "gat":
+            self.l1 = GATConvDGL(
+                in_dim,
+                hidden_dim // 8,
+                8,
+                feat_drop=dropout,
+                attn_drop=dropout,
+                residual=False,
+                activation=F.elu,
+            )
+            self.l2 = GATConvDGL(
+                hidden_dim,
+                out_dim,
+                1,
+                feat_drop=dropout,
+                attn_drop=dropout,
+                residual=residual,
+                activation=None,
+            )
+        elif name == "gcn":
+            self.l1 = GraphConv(in_dim, hidden_dim, activation=F.elu)
+            self.l2 = GraphConv(hidden_dim, out_dim, activation=F.elu)
+            self.drop = Dropout(p=dropout)
+        elif name == "cheb":
+            self.l1 = ChebConvDGL(in_dim, hidden_dim, k=3)
+            self.l2 = ChebConvDGL(hidden_dim, out_dim, k=3)
+            self.drop = Dropout(p=dropout)
+        elif name == "agnn":
+            self.lin1 = Sequential(
+                Dropout(p=dropout), Linear(in_dim, hidden_dim), ELU()
+            )
+            self.l1 = AGNNConvDGL(learn_beta=False)
+            self.l2 = AGNNConvDGL(learn_beta=True)
+            self.lin2 = Sequential(
+                Dropout(p=dropout), Linear(hidden_dim, out_dim), ELU()
+            )
+        elif name == "appnp":
+            self.lin1 = Sequential(
+                Dropout(p=dropout),
+                Linear(in_dim, hidden_dim),
+                ReLU(),
+                Dropout(p=dropout),
+                Linear(hidden_dim, out_dim),
+            )
+            self.l1 = APPNPConv(k=10, alpha=0.1, edge_drop=0.0)
+
+    def forward(self, graph, features):
+        h = features
+        if self.use_mlp:
+            if self.join_with_mlp:
+                h = torch.cat((h, self.mlp(features)), 1)
+            else:
+                h = self.mlp(features)
+        if self.name == "gat":
+            h = self.l1(graph, h).flatten(1)
+            logits = self.l2(graph, h).mean(1)
+        elif self.name in ["appnp"]:
+            h = self.lin1(h)
+            logits = self.l1(graph, h)
+        elif self.name == "agnn":
+            h = self.lin1(h)
+            h = self.l1(graph, h)
+            h = self.l2(graph, h)
+            logits = self.lin2(h)
+        elif self.name == "che3b":
+            lambda_max = dgl.laplacian_lambda_max(graph)
+            h = self.drop(h)
+            h = self.l1(graph, h, lambda_max)
+            logits = self.l2(graph, h, lambda_max)
+        elif self.name == "gcn":
+            h = self.drop(h)
+            h = self.l1(graph, h)
+            logits = self.l2(graph, h)
+
+        return logits
+
+
+def read_input(input_folder):
+    X = pd.read_csv(f"{input_folder}/X.csv")
+    y = pd.read_csv(f"{input_folder}/y.csv")
+
+    categorical_columns = []
+    if os.path.exists(f"{input_folder}/cat_features.txt"):
+        with open(f"{input_folder}/cat_features.txt") as f:
+            for line in f:
+                if line.strip():
+                    categorical_columns.append(line.strip())
+
+    cat_features = None
+    if categorical_columns:
+        columns = X.columns
+        cat_features = np.where(columns.isin(categorical_columns))[0]
+
+        for col in list(columns[cat_features]):
+            X[col] = X[col].astype(str)
+
+    gs, _ = load_graphs(f"{input_folder}/graph.dgl")
+    graph = gs[0]
+
+    with open(f"{input_folder}/masks.json") as f:
+        masks = json.load(f)
+
+    return graph, X, y, cat_features, masks
+
+
+def normalize_features(X, train_mask, val_mask, test_mask):
+    min_max_scaler = preprocessing.MinMaxScaler()
+    A = X.to_numpy(copy=True)
+    A[train_mask] = min_max_scaler.fit_transform(A[train_mask])
+    A[val_mask + test_mask] = min_max_scaler.transform(A[val_mask + test_mask])
+    return pd.DataFrame(A, columns=X.columns).astype(float)
+
+
+def replace_na(X, train_mask):
+    if X.isna().any().any():
+        return X.fillna(X.iloc[train_mask].min() - 1)
+    return X
+
+
+def encode_cat_features(X, y, cat_features, train_mask, val_mask, test_mask):
+    enc = CatBoostEncoder()
+    A = X.to_numpy(copy=True)
+    b = y.to_numpy(copy=True)
+    A[np.ix_(train_mask, cat_features)] = enc.fit_transform(
+        A[np.ix_(train_mask, cat_features)], b[train_mask]
+    )
+    A[np.ix_(val_mask + test_mask, cat_features)] = enc.transform(
+        A[np.ix_(val_mask + test_mask, cat_features)]
+    )
+    A = A.astype(float)
+    return pd.DataFrame(A, columns=X.columns)
+
+
+def convert_data(g):
+    retrieved_tensor = g.ndata["feat"]
+    retrieved_np = retrieved_tensor.numpy()
+    retrieved_str = retrieved_np.astype(str)
+    X = pd.DataFrame(retrieved_str)
+
+    retrieved_y_tensor = g.ndata["class"]
+    retrieved_y_np = retrieved_y_tensor.numpy()
+    y = pd.DataFrame(retrieved_y_np)
+
+    retrieved_cat_features_tensor = g.ndata["cat_features"][0]
+    cat_features = retrieved_cat_features_tensor.numpy()
+
+    train_mask = g.ndata["train_mask"].numpy().tolist()
+    val_mask = g.ndata["val_mask"].numpy().tolist()
+    test_mask = g.ndata["test_mask"].numpy().tolist()
+    masks = {
+        "0": {
+            "train": [i for i, v in enumerate(train_mask) if v == 1],
+            "val": [i for i, v in enumerate(val_mask) if v == 1],
+            "test": [i for i, v in enumerate(test_mask) if v == 1],
+        }
+    }
+
+    # graph, X, y, cat_features, masks = read_input(input_folder)
+    train_mask, val_mask, test_mask = (
+        masks["0"]["train"],
+        masks["0"]["val"],
+        masks["0"]["test"],
+    )
+
+    return X, y, cat_features, train_mask, val_mask, test_mask
diff --git a/hugegraph-ml/src/hugegraph_ml/models/bgrl.py b/hugegraph-ml/src/hugegraph_ml/models/bgrl.py
new file mode 100644
index 00000000..6d991ee2
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/models/bgrl.py
@@ -0,0 +1,260 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.right (c) 2024 by jinsong, All Rights Reserved.
+
+"""
+Bootstrapped Graph Latents (BGRL)
+
+References
+----------
+Paper: https://arxiv.org/abs/2102.06514
+Author's code: https://github.com/nerdslab/bgrl
+DGL code: https://github.com/dmlc/dgl/tree/master/examples/pytorch/bgrl
+"""
+
+import copy
+
+import dgl
+
+import torch
+from dgl.nn.pytorch.conv import GraphConv, SAGEConv
+from torch import nn
+from torch.nn import BatchNorm1d, Parameter
+from torch.nn.init import ones_, zeros_
+from dgl.transforms import Compose, DropEdge, FeatMask
+from torch.nn.functional import cosine_similarity
+
+class MLP_Predictor(nn.Module):
+    r"""MLP used for predictor. The MLP has one hidden layer.
+    Args:
+        input_size (int): Size of input features.
+        output_size (int): Size of output features.
+        hidden_size (int, optional): Size of hidden layer. (default: :obj:`4096`).
+    """
+
+    def __init__(self, input_size, output_size, hidden_size=512):
+        super().__init__()
+
+        self.net = nn.Sequential(
+            nn.Linear(input_size, hidden_size, bias=True),
+            nn.PReLU(1),
+            nn.Linear(hidden_size, output_size, bias=True),
+        )
+        self.reset_parameters()
+
+    def forward(self, x):
+        return self.net(x)
+
+    def reset_parameters(self):
+        # kaiming_uniform
+        for m in self.modules():
+            if isinstance(m, nn.Linear):
+                m.reset_parameters()
+
+
+class GCN(nn.Module):
+    def __init__(self, layer_sizes, batch_norm_mm=0.99):
+        super(GCN, self).__init__()
+
+        self.layers = nn.ModuleList()
+        for in_dim, out_dim in zip(layer_sizes[:-1], layer_sizes[1:]):
+            self.layers.append(GraphConv(in_dim, out_dim))
+            self.layers.append(BatchNorm1d(out_dim, momentum=batch_norm_mm))
+            self.layers.append(nn.PReLU())
+
+    def forward(self, g, feats):
+        x = feats
+        for layer in self.layers:
+            if isinstance(layer, GraphConv):
+                x = layer(g, x)
+            else:
+                x = layer(x)
+        return x
+
+    def reset_parameters(self):
+        for layer in self.layers:
+            if hasattr(layer, "reset_parameters"):
+                layer.reset_parameters()
+
+class BGRL(nn.Module):
+    r"""BGRL architecture for Graph representation learning.
+    Args:
+        encoder (torch.nn.Module): Encoder network to be duplicated and used in both online and target networks.
+        predictor (torch.nn.Module): Predictor network used to predict the target projection from the online projection.
+    .. note::
+        `encoder` must have a `reset_parameters` method, as the weights of the target network will be initialized
+        differently from the online network.
+    """
+
+    def __init__(self, encoder, predictor):
+        super(BGRL, self).__init__()
+        # online network
+        self.online_encoder = encoder
+        self.predictor = predictor
+
+        # target network
+        self.target_encoder = copy.deepcopy(encoder)
+
+        # reinitialize weights
+        self.target_encoder.reset_parameters()
+
+        # stop gradient
+        for param in self.target_encoder.parameters():
+            param.requires_grad = False
+
+    def trainable_parameters(self):
+        r"""Returns the parameters that will be updated via an optimizer."""
+        return list(self.online_encoder.parameters()) + list(
+            self.predictor.parameters()
+        )
+
+    @torch.no_grad()
+    def update_target_network(self, mm):
+        r"""Performs a momentum update of the target network's weights.
+        Args:
+            mm (float): Momentum used in moving average update.
+        """
+        for param_q, param_k in zip(
+            self.online_encoder.parameters(), self.target_encoder.parameters()
+        ):
+            param_k.data.mul_(mm).add_(param_q.data, alpha=1.0 - mm)
+
+    def forward(self, graph, feat):
+        transform_1 = get_graph_drop_transform(
+        drop_edge_p=0.3, feat_mask_p=0.3
+        )
+        transform_2 = get_graph_drop_transform(
+        drop_edge_p=0.2, feat_mask_p=0.4
+        )
+        online_x = transform_1(graph)
+        target_x = transform_2(graph)
+        online_x, target_x = dgl.add_self_loop(online_x), dgl.add_self_loop(target_x)
+        online_feats, target_feats = online_x.ndata["feat"], target_x.ndata["feat"]
+        
+        # forward online network
+        online_y1 = self.online_encoder(online_x, online_feats)
+        # prediction
+        online_q1 = self.predictor(online_y1)
+        # forward target network
+        with torch.no_grad():
+            target_y1 = self.target_encoder(target_x, target_feats).detach()
+            
+        # forward online network 2
+        online_y2 = self.online_encoder(target_x, target_feats)
+        # prediction
+        online_q2 = self.predictor(online_y2)
+        # forward target network
+        with torch.no_grad():
+            target_y2 = self.target_encoder(online_x, online_feats).detach()
+        
+        loss = (
+            2
+            - cosine_similarity(online_q1, target_y1.detach(), dim=-1).mean()
+            - cosine_similarity(online_q2, target_y2.detach(), dim=-1).mean()
+        )
+        return loss
+    
+    def get_embedding(self, graph, feats):
+        """
+        Get the node embeddings from the encoder without computing gradients.
+
+        Parameters
+        ----------
+        graph : dgl.DGLGraph
+            The input graph.
+        feats : torch.Tensor
+            Node features.
+
+        Returns
+        -------
+        torch.Tensor
+            Node embeddings.
+        """
+        h = self.target_encoder(graph, feats)  # Encode the node features with GCN
+        return h.detach()  # Detach from computation graph for evaluation
+
+def compute_representations(net, dataset, device):
+    r"""Pre-computes the representations for the entire data.
+    Returns:
+        [torch.Tensor, torch.Tensor]: Representations and labels.
+    """
+    net.eval()
+    reps = []
+    labels = []
+
+    if len(dataset) == 1:
+        g = dataset[0]
+        g = dgl.add_self_loop(g)
+        g = g.to(device)
+        with torch.no_grad():
+            reps.append(net(g))
+            labels.append(g.ndata["label"])
+    else:
+        for g in dataset:
+            # forward
+            g = g.to(device)
+            with torch.no_grad():
+                reps.append(net(g))
+                labels.append(g.ndata["label"])
+
+    reps = torch.cat(reps, dim=0)
+    labels = torch.cat(labels, dim=0)
+    return [reps, labels]
+
+class CosineDecayScheduler:
+    def __init__(self, max_val, warmup_steps, total_steps):
+        self.max_val = max_val
+        self.warmup_steps = warmup_steps
+        self.total_steps = total_steps
+
+    def get(self, step):
+        if step < self.warmup_steps:
+            return self.max_val * step / self.warmup_steps
+        elif self.warmup_steps <= step <= self.total_steps:
+            return (
+                self.max_val
+                * (
+                    1
+                    + np.cos(
+                        (step - self.warmup_steps)
+                        * np.pi
+                        / (self.total_steps - self.warmup_steps)
+                    )
+                )
+                / 2
+            )
+        else:
+            raise ValueError(
+                "Step ({}) > total number of steps ({}).".format(
+                    step, self.total_steps
+                )
+            )
+
+def get_graph_drop_transform(drop_edge_p, feat_mask_p):
+    transforms = list()
+
+    # make copy of graph
+    transforms.append(copy.deepcopy)
+
+    # drop edges
+    if drop_edge_p > 0.0:
+        transforms.append(DropEdge(drop_edge_p))
+
+    # drop features
+    if feat_mask_p > 0.0:
+        transforms.append(FeatMask(feat_mask_p, node_feat_names=["feat"]))
+
+    return Compose(transforms)
\ No newline at end of file
diff --git a/hugegraph-ml/src/hugegraph_ml/models/care_gnn.py b/hugegraph-ml/src/hugegraph_ml/models/care_gnn.py
new file mode 100644
index 00000000..7046569a
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/models/care_gnn.py
@@ -0,0 +1,232 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+CAmouflage-REsistant GNN (CARE-GNN)
+
+References
+----------
+Paper: https://arxiv.org/abs/2008.08692
+Author's code: https://github.com/YingtongDou/CARE-GNN
+DGL code: https://github.com/dmlc/dgl/tree/master/examples/pytorch/caregnn
+"""
+
+import dgl.function as fn
+import numpy as np
+import torch as th
+import torch.nn as nn
+
+
+class CAREConv(nn.Module):
+    """One layer of CARE-GNN."""
+
+    def __init__(
+        self,
+        in_dim,
+        out_dim,
+        num_classes,
+        edges,
+        activation=None,
+        step_size=0.02,
+    ):
+        super(CAREConv, self).__init__()
+
+        self.activation = activation
+        self.step_size = step_size
+        self.in_dim = in_dim
+        self.out_dim = out_dim
+        self.num_classes = num_classes
+        self.edges = edges
+        self.dist = {}
+
+        self.linear = nn.Linear(self.in_dim, self.out_dim)
+        self.MLP = nn.Linear(self.in_dim, self.num_classes)
+
+        self.p = {}
+        self.last_avg_dist = {}
+        self.f = {}
+        self.cvg = {}
+        for etype in edges:
+            self.p[etype] = 0.5
+            self.last_avg_dist[etype] = 0
+            self.f[etype] = []
+            self.cvg[etype] = False
+
+    def _calc_distance(self, edges):
+        # formula 2
+        d = th.norm(
+            th.tanh(self.MLP(edges.src["h"])) - th.tanh(self.MLP(edges.dst["h"])),
+            1,
+            1,
+        )
+        return {"d": d}
+
+    def _top_p_sampling(self, g, p):
+        # this implementation is low efficient
+        # optimization requires dgl.sampling.select_top_p requested in issue #3100
+        dist = g.edata["d"]
+        neigh_list = []
+        for node in g.nodes():
+            edges = g.in_edges(node, form="eid")
+            num_neigh = th.ceil(g.in_degrees(node) * p).int().item()
+            neigh_dist = dist[edges]
+            if neigh_dist.shape[0] > num_neigh:
+                neigh_index = np.argpartition(neigh_dist.cpu().detach(), num_neigh)[
+                    :num_neigh
+                ]
+            else:
+                neigh_index = np.arange(num_neigh)
+            neigh_list.append(edges[neigh_index])
+        return th.cat(neigh_list)
+
+    def forward(self, g, feat):
+        with g.local_scope():
+            g.ndata["h"] = feat
+
+            hr = {}
+            for i, etype in enumerate(g.canonical_etypes):
+                g.apply_edges(self._calc_distance, etype=etype)
+                self.dist[etype] = g.edges[etype].data["d"]
+                sampled_edges = self._top_p_sampling(g[etype], self.p[etype])
+
+                # formula 8
+                g.send_and_recv(
+                    sampled_edges,
+                    fn.copy_u("h", "m"),
+                    fn.mean("m", "h_%s" % etype[1]),
+                    etype=etype,
+                )
+                hr[etype] = g.ndata["h_%s" % etype[1]]
+                if self.activation is not None:
+                    hr[etype] = self.activation(hr[etype])
+
+            # formula 9 using mean as inter-relation aggregator
+            p_tensor = th.Tensor(list(self.p.values())).view(-1, 1, 1).to(g.device)
+            h_homo = th.sum(th.stack(list(hr.values())) * p_tensor, dim=0)
+            h_homo += feat
+            if self.activation is not None:
+                h_homo = self.activation(h_homo)
+
+            return self.linear(h_homo)
+
+
+class CAREGNN(nn.Module):
+    def __init__(
+        self,
+        in_dim,
+        num_classes,
+        hid_dim=64,
+        edges=None,
+        num_layers=2,
+        activation=None,
+        step_size=0.02,
+    ):
+        super(CAREGNN, self).__init__()
+        self.in_dim = in_dim
+        self.hid_dim = hid_dim
+        self.num_classes = num_classes
+        self.edges = edges
+        self.activation = activation
+        self.step_size = step_size
+        self.num_layers = num_layers
+
+        self.layers = nn.ModuleList()
+
+        if self.num_layers == 1:
+            # Single layer
+            self.layers.append(
+                CAREConv(
+                    self.in_dim,
+                    self.num_classes,
+                    self.num_classes,
+                    self.edges,
+                    activation=self.activation,
+                    step_size=self.step_size,
+                )
+            )
+
+        else:
+            # Input layer
+            self.layers.append(
+                CAREConv(
+                    self.in_dim,
+                    self.hid_dim,
+                    self.num_classes,
+                    self.edges,
+                    activation=self.activation,
+                    step_size=self.step_size,
+                )
+            )
+
+            # Hidden layers with n - 2 layers
+            for i in range(self.num_layers - 2):
+                self.layers.append(
+                    CAREConv(
+                        self.hid_dim,
+                        self.hid_dim,
+                        self.num_classes,
+                        self.edges,
+                        activation=self.activation,
+                        step_size=self.step_size,
+                    )
+                )
+
+            # Output layer
+            self.layers.append(
+                CAREConv(
+                    self.hid_dim,
+                    self.num_classes,
+                    self.num_classes,
+                    self.edges,
+                    activation=self.activation,
+                    step_size=self.step_size,
+                )
+            )
+
+    def forward(self, graph, feat):
+        # For full graph training, directly use the graph
+        # formula 4
+        sim = th.tanh(self.layers[0].MLP(feat))
+
+        # Forward of n layers of CARE-GNN
+        for layer in self.layers:
+            feat = layer(graph, feat)
+
+        return feat, sim
+
+    def RLModule(self, graph, epoch, idx):
+        for layer in self.layers:
+            for etype in self.edges:
+                if not layer.cvg[etype]:
+                    # formula 5
+                    eid = graph.in_edges(idx, form="eid", etype=etype)
+                    avg_dist = th.mean(layer.dist[etype][eid])
+
+                    # formula 6
+                    if layer.last_avg_dist[etype] < avg_dist:
+                        if layer.p[etype] - self.step_size > 0:
+                            layer.p[etype] -= self.step_size
+                        layer.f[etype].append(-1)
+                    else:
+                        if layer.p[etype] + self.step_size <= 1:
+                            layer.p[etype] += self.step_size
+                        layer.f[etype].append(+1)
+                    layer.last_avg_dist[etype] = avg_dist
+
+                    # formula 7
+                    if epoch >= 9 and abs(sum(layer.f[etype][-10:])) <= 2:
+                        layer.cvg[etype] = True
diff --git a/hugegraph-ml/src/hugegraph_ml/models/cluster_gcn.py b/hugegraph-ml/src/hugegraph_ml/models/cluster_gcn.py
new file mode 100644
index 00000000..ce4fb944
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/models/cluster_gcn.py
@@ -0,0 +1,58 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Cluster-GCN
+
+References
+----------
+Paper: https://arxiv.org/abs/1905.07953
+Author's code: https://github.com/google-research/google-research/tree/master/cluster_gcn
+DGL code: https://github.com/dmlc/dgl/tree/master/examples/pytorch/cluster_gcn
+"""
+
+import dgl
+import dgl.nn as dglnn
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchmetrics.functional as MF
+
+
+class SAGE(nn.Module):
+    def __init__(self, in_feats, n_hidden, n_classes):
+        super().__init__()
+        self.layers = nn.ModuleList()
+        self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean"))
+        self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean"))
+        self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean"))
+        self.dropout = nn.Dropout(0.5)
+
+    def forward(self, sg, x):
+        h = x
+        for l, layer in enumerate(self.layers):
+            h = layer(sg, h)
+            if l != len(self.layers) - 1:
+                h = F.relu(h)
+                h = self.dropout(h)
+        return h
+
+    def loss(self, logits, labels):
+        return nn.CrossEntropyLoss()(logits, labels)
+
+    def inference(self, sg, x):
+        return self.forward(sg, x)
diff --git a/hugegraph-ml/src/hugegraph_ml/models/correct_and_smooth.py b/hugegraph-ml/src/hugegraph_ml/models/correct_and_smooth.py
new file mode 100644
index 00000000..34f8ec75
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/models/correct_and_smooth.py
@@ -0,0 +1,262 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+ Correct and Smooth (C&S)
+
+References
+----------
+Paper: https://arxiv.org/abs/2010.13993
+Author's code: https://github.com/CUAI/CorrectAndSmooth
+DGL code: https://github.com/dmlc/dgl/tree/master/examples/pytorch/correct_and_smooth
+"""
+
+import dgl.function as fn
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class MLPLinear(nn.Module):
+    def __init__(self, in_dim, out_dim):
+        super(MLPLinear, self).__init__()
+        self.linear = nn.Linear(in_dim, out_dim)
+        self.reset_parameters()
+        self.criterion = nn.CrossEntropyLoss()
+
+    def reset_parameters(self):
+        self.linear.reset_parameters()
+
+    def forward(self, graph, x):
+        return F.log_softmax(self.linear(x), dim=-1)
+
+    def loss(self, logits, labels):
+        return self.criterion(logits, labels)
+
+    def inference(self, graph, feats):
+        return self.forward(graph, feats)
+
+
+class MLP(nn.Module):
+    def __init__(self, in_dim, hid_dim, out_dim, num_layers, dropout=0.0):
+        super(MLP, self).__init__()
+        assert num_layers >= 2
+
+        self.linears = nn.ModuleList()
+        self.bns = nn.ModuleList()
+        self.linears.append(nn.Linear(in_dim, hid_dim))
+        self.bns.append(nn.BatchNorm1d(hid_dim))
+
+        for _ in range(num_layers - 2):
+            self.linears.append(nn.Linear(hid_dim, hid_dim))
+            self.bns.append(nn.BatchNorm1d(hid_dim))
+
+        self.linears.append(nn.Linear(hid_dim, out_dim))
+        self.dropout = dropout
+        self.reset_parameters()
+
+        self.criterion = nn.CrossEntropyLoss()
+
+    def reset_parameters(self):
+        for layer in self.linears:
+            layer.reset_parameters()
+        for layer in self.bns:
+            layer.reset_parameters()
+
+    def forward(self, graph, x):
+        for linear, bn in zip(self.linears[:-1], self.bns):
+            x = linear(x)
+            x = F.relu(x, inplace=True)
+            x = bn(x)
+            x = F.dropout(x, p=self.dropout, training=self.training)
+        x = self.linears[-1](x)
+        return F.log_softmax(x, dim=-1)
+
+    def loss(self, logits, labels):
+        return self.criterion(logits, labels)
+
+    def inference(self, graph, feats):
+        return self.forward(graph, feats)
+
+
+class LabelPropagation(nn.Module):
+    r"""
+
+    Description
+    -----------
+    Introduced in `Learning from Labeled and Unlabeled Data with Label Propagation <https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.14.3864&rep=rep1&type=pdf>`_
+
+    .. math::
+        \mathbf{Y}^{\prime} = \alpha \cdot \mathbf{D}^{-1/2} \mathbf{A}
+        \mathbf{D}^{-1/2} \mathbf{Y} + (1 - \alpha) \mathbf{Y},
+
+    where unlabeled data is inferred by labeled data via propagation.
+
+    Parameters
+    ----------
+        num_layers: int
+            The number of propagations.
+        alpha: float
+            The :math:`\alpha` coefficient.
+        adj: str
+            'DAD': D^-0.5 * A * D^-0.5
+            'DA': D^-1 * A
+            'AD': A * D^-1
+    """
+
+    def __init__(self, num_layers, alpha, adj="DAD"):
+        super(LabelPropagation, self).__init__()
+
+        self.num_layers = num_layers
+        self.alpha = alpha
+        self.adj = adj
+
+    @torch.no_grad()
+    def forward(self, g, labels, mask=None, post_step=lambda y: y.clamp_(0.0, 1.0)):
+        with g.local_scope():
+            if labels.dtype == torch.long:
+                labels = F.one_hot(labels.view(-1)).to(torch.float32)
+
+            y = labels
+            if mask is not None:
+                y = torch.zeros_like(labels)
+                y[mask] = labels[mask]
+
+            last = (1 - self.alpha) * y
+            degs = g.in_degrees().float().clamp(min=1)
+            norm = (
+                torch.pow(degs, -0.5 if self.adj == "DAD" else -1)
+                .to(labels.device)
+                .unsqueeze(1)
+            )
+
+            for _ in range(self.num_layers):
+                # Assume the graphs to be undirected
+                if self.adj in ["DAD", "AD"]:
+                    y = norm * y
+
+                g.ndata["h"] = y
+                g.update_all(fn.copy_u("h", "m"), fn.sum("m", "h"))
+                y = self.alpha * g.ndata.pop("h")
+
+                if self.adj in ["DAD", "DA"]:
+                    y = y * norm
+
+                y = post_step(last + y)
+
+            return y
+
+
+class CorrectAndSmooth(nn.Module):
+    r"""
+
+    Description
+    -----------
+    Introduced in `Combining Label Propagation and Simple Models Out-performs Graph Neural Networks <https://arxiv.org/abs/2010.13993>`_
+
+    Parameters
+    ----------
+        num_correction_layers: int
+            The number of correct propagations.
+        correction_alpha: float
+            The coefficient of correction.
+        correction_adj: str
+            'DAD': D^-0.5 * A * D^-0.5
+            'DA': D^-1 * A
+            'AD': A * D^-1
+        num_smoothing_layers: int
+            The number of smooth propagations.
+        smoothing_alpha: float
+            The coefficient of smoothing.
+        smoothing_adj: str
+            'DAD': D^-0.5 * A * D^-0.5
+            'DA': D^-1 * A
+            'AD': A * D^-1
+        autoscale: bool, optional
+            If set to True, will automatically determine the scaling factor :math:`\sigma`. Default is True.
+        scale: float, optional
+            The scaling factor :math:`\sigma`, in case :obj:`autoscale = False`. Default is 1.
+    """
+
+    def __init__(
+        self,
+        num_correction_layers,
+        correction_alpha,
+        correction_adj,
+        num_smoothing_layers,
+        smoothing_alpha,
+        smoothing_adj,
+        autoscale=True,
+        scale=1.0,
+    ):
+        super(CorrectAndSmooth, self).__init__()
+
+        self.autoscale = autoscale
+        self.scale = scale
+
+        self.prop1 = LabelPropagation(
+            num_correction_layers, correction_alpha, correction_adj
+        )
+        self.prop2 = LabelPropagation(
+            num_smoothing_layers, smoothing_alpha, smoothing_adj
+        )
+
+    def correct(self, g, y_soft, y_true, mask):
+        with g.local_scope():
+            assert abs(float(y_soft.sum()) / y_soft.size(0) - 1.0) < 1e-2
+            numel = int(mask.sum()) if mask.dtype == torch.bool else mask.size(0)
+            assert y_true.size(0) == numel
+
+            if y_true.dtype == torch.long:
+                y_true = F.one_hot(y_true.view(-1), y_soft.size(-1)).to(y_soft.dtype)
+
+            error = torch.zeros_like(y_soft)
+            error[mask] = y_true - y_soft[mask]
+
+            if self.autoscale:
+                smoothed_error = self.prop1(
+                    g, error, post_step=lambda x: x.clamp_(-1.0, 1.0)
+                )
+                sigma = error[mask].abs().sum() / numel
+                scale = sigma / smoothed_error.abs().sum(dim=1, keepdim=True)
+                scale[scale.isinf() | (scale > 1000)] = 1.0
+
+                result = y_soft + scale * smoothed_error
+                result[result.isnan()] = y_soft[result.isnan()]
+                return result
+            else:
+
+                def fix_input(x):
+                    x[mask] = error[mask]
+                    return x
+
+                smoothed_error = self.prop1(g, error, post_step=fix_input)
+
+                result = y_soft + self.scale * smoothed_error
+                result[result.isnan()] = y_soft[result.isnan()]
+                return result
+
+    def smooth(self, g, y_soft, y_true, mask):
+        with g.local_scope():
+            numel = int(mask.sum()) if mask.dtype == torch.bool else mask.size(0)
+            assert y_true.size(0) == numel
+
+            if y_true.dtype == torch.long:
+                y_true = F.one_hot(y_true.view(-1), y_soft.size(-1)).to(y_soft.dtype)
+
+            y_soft[mask] = y_true
+            return self.prop2(g, y_soft)
diff --git a/hugegraph-ml/src/hugegraph_ml/models/dagnn.py b/hugegraph-ml/src/hugegraph_ml/models/dagnn.py
new file mode 100644
index 00000000..2e9d260f
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/models/dagnn.py
@@ -0,0 +1,145 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Deep Adaptive Graph Neural Network (DAGNN)
+
+References
+----------
+Paper: https://arxiv.org/abs/2007.09296
+Author's code: https://github.com/divelab/DeeperGNN
+DGL code: https://github.com/dmlc/dgl/tree/master/examples/pytorch/dagnn
+"""
+
+import dgl.function as fn
+
+import numpy as np
+import torch
+from torch import nn
+from torch.nn import functional as F, Parameter
+import random
+
+
+class DAGNNConv(nn.Module):
+    def __init__(self, in_dim, k):
+        super(DAGNNConv, self).__init__()
+
+        self.s = Parameter(torch.FloatTensor(in_dim, 1))
+        self.k = k
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        gain = nn.init.calculate_gain("sigmoid")
+        nn.init.xavier_uniform_(self.s, gain=gain)
+
+    def forward(self, graph, feats):
+        with graph.local_scope():
+            results = [feats]
+
+            degs = graph.in_degrees().float()
+            norm = torch.pow(degs, -0.5)
+            norm = norm.to(feats.device).unsqueeze(1)
+
+            for _ in range(self.k):
+                feats = feats * norm
+                graph.ndata["h"] = feats
+                graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "h"))
+                feats = graph.ndata["h"]
+                feats = feats * norm
+                results.append(feats)
+
+            H = torch.stack(results, dim=1)
+            S = F.sigmoid(torch.matmul(H, self.s))
+            S = S.permute(0, 2, 1)
+            H = torch.matmul(S, H).squeeze()
+
+            return H
+
+
+class MLPLayer(nn.Module):
+    def __init__(self, in_dim, out_dim, bias=True, activation=None, dropout=0):
+        super(MLPLayer, self).__init__()
+
+        self.linear = nn.Linear(in_dim, out_dim, bias=bias)
+        self.activation = activation
+        self.dropout = nn.Dropout(dropout)
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        gain = 1.0
+        if self.activation is F.relu:
+            gain = nn.init.calculate_gain("relu")
+        nn.init.xavier_uniform_(self.linear.weight, gain=gain)
+        if self.linear.bias is not None:
+            nn.init.zeros_(self.linear.bias)
+
+    def forward(self, feats):
+        feats = self.dropout(feats)
+        feats = self.linear(feats)
+        if self.activation:
+            feats = self.activation(feats)
+
+        return feats
+
+
+class DAGNN(nn.Module):
+    def __init__(
+        self,
+        k,
+        in_dim,
+        hid_dim,
+        out_dim,
+        bias=True,
+        activation=F.relu,
+        dropout=0,
+    ):
+        super(DAGNN, self).__init__()
+        self.mlp = nn.ModuleList()
+        self.mlp.append(
+            MLPLayer(
+                in_dim=in_dim,
+                out_dim=hid_dim,
+                bias=bias,
+                activation=activation,
+                dropout=dropout,
+            )
+        )
+        self.mlp.append(
+            MLPLayer(
+                in_dim=hid_dim,
+                out_dim=out_dim,
+                bias=bias,
+                activation=None,
+                dropout=dropout,
+            )
+        )
+        self.dagnn = DAGNNConv(in_dim=out_dim, k=k)
+
+        self.criterion = nn.CrossEntropyLoss()
+
+    def forward(self, graph, feats):
+        for layer in self.mlp:
+            feats = layer(feats)
+        feats = self.dagnn(graph, feats)
+        return feats
+
+    def loss(self, logits, labels):
+        return self.criterion(logits, labels)
+
+    def inference(self, graph, feats):
+        return self.forward(graph, feats)
diff --git a/hugegraph-ml/src/hugegraph_ml/models/deepergcn.py b/hugegraph-ml/src/hugegraph_ml/models/deepergcn.py
new file mode 100644
index 00000000..03fc2927
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/models/deepergcn.py
@@ -0,0 +1,287 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+DeeperGCN
+
+References
+----------
+Paper: https://arxiv.org/abs/2006.07739
+Author's code: https://github.com/lightaime/deep_gcns_torch
+DGL code: https://github.com/dmlc/dgl/tree/master/examples/pytorch/deepergcn
+"""
+
+import dgl.function as fn
+import torch.nn as nn
+import torch.nn.functional as F
+from dgl.nn.functional import edge_softmax
+from dgl.nn.pytorch.glob import AvgPooling
+from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder
+import torch
+
+
+class DeeperGCN(nn.Module):
+    r"""
+
+    Description
+    -----------
+    Introduced in "DeeperGCN: All You Need to Train Deeper GCNs <https://arxiv.org/abs/2006.07739>"
+
+    Parameters
+    ----------
+    node_feat_dim: int
+        Size of node feature.
+    edge_feat_dim: int
+        Size of edge feature.
+    hid_dim: int
+        Size of hidden representations.
+    out_dim: int
+        Size of output.
+    num_layers: int
+        Number of graph convolutional layers.
+    dropout: float
+        Dropout rate. Default is 0.
+    beta: float
+        A continuous variable called an inverse temperature. Default is 1.0.
+    learn_beta: bool
+        Whether beta is a learnable weight. Default is False.
+    aggr: str
+        Type of aggregation. Default is 'softmax'.
+    mlp_layers: int
+        Number of MLP layers in message normalization. Default is 1.
+    """
+
+    def __init__(
+        self,
+        node_feat_dim,
+        edge_feat_dim,
+        hid_dim,
+        out_dim,
+        num_layers,
+        dropout=0.0,
+        beta=1.0,
+        learn_beta=False,
+        aggr="softmax",
+        mlp_layers=1,
+    ):
+        super(DeeperGCN, self).__init__()
+
+        self.num_layers = num_layers
+        self.dropout = dropout
+        self.gcns = nn.ModuleList()
+        self.norms = nn.ModuleList()
+
+        for _ in range(self.num_layers):
+            conv = GENConv(
+                edge_feat_dim=edge_feat_dim,
+                in_dim=hid_dim,
+                out_dim=hid_dim,
+                aggregator=aggr,
+                beta=beta,
+                learn_beta=learn_beta,
+                mlp_layers=mlp_layers,
+            )
+
+            self.gcns.append(conv)
+            self.norms.append(nn.BatchNorm1d(hid_dim, affine=True))
+
+        # self.node_encoder = AtomEncoder(hid_dim)
+        self.node_encoder = torch.nn.Sequential(
+            torch.nn.Linear(node_feat_dim, 512),
+            torch.nn.ReLU(),
+            torch.nn.Linear(512, hid_dim),
+        )
+        # self.pooling = AvgPooling()
+        self.output = nn.Linear(hid_dim, out_dim)
+
+        self.criterion = nn.CrossEntropyLoss()
+
+    def forward(self, g, edge_feats, node_feats=None):
+        with g.local_scope():
+            hv = self.node_encoder(node_feats.float())
+            he = edge_feats
+
+            for layer in range(self.num_layers):
+                hv1 = self.norms[layer](hv)
+                hv1 = F.relu(hv1)
+                hv1 = F.dropout(hv1, p=self.dropout, training=self.training)
+                hv = self.gcns[layer](g, hv1, he) + hv
+
+            # h_g = self.pooling(g, hv)
+
+            return self.output(hv)
+
+    def loss(self, logits, labels):
+        return self.criterion(logits, labels)
+
+    def inference(self, g, edge_feats, node_feats):
+        return self.forward(g, edge_feats, node_feats)
+
+
+class GENConv(nn.Module):
+    r"""
+
+    Description
+    -----------
+    Generalized Message Aggregator was introduced in "DeeperGCN: All You Need to Train Deeper GCNs <https://arxiv.org/abs/2006.07739>"
+
+    Parameters
+    ----------
+    in_dim: int
+        Input size.
+    out_dim: int
+        Output size.
+    aggregator: str
+        Type of aggregation. Default is 'softmax'.
+    beta: float
+        A continuous variable called an inverse temperature. Default is 1.0.
+    learn_beta: bool
+        Whether beta is a learnable variable or not. Default is False.
+    p: float
+        Initial power for power mean aggregation. Default is 1.0.
+    learn_p: bool
+        Whether p is a learnable variable or not. Default is False.
+    msg_norm: bool
+        Whether message normalization is used. Default is False.
+    learn_msg_scale: bool
+        Whether s is a learnable scaling factor or not in message normalization. Default is False.
+    mlp_layers: int
+        The number of MLP layers. Default is 1.
+    eps: float
+        A small positive constant in message construction function. Default is 1e-7.
+    """
+
+    def __init__(
+        self,
+        edge_feat_dim,
+        in_dim,
+        out_dim,
+        aggregator="softmax",
+        beta=1.0,
+        learn_beta=False,
+        p=1.0,
+        learn_p=False,
+        msg_norm=False,
+        learn_msg_scale=False,
+        mlp_layers=1,
+        eps=1e-7,
+    ):
+        super(GENConv, self).__init__()
+
+        self.aggr = aggregator
+        self.eps = eps
+
+        channels = [in_dim]
+        for _ in range(mlp_layers - 1):
+            channels.append(in_dim * 2)
+        channels.append(out_dim)
+
+        self.mlp = MLP(channels)
+        self.msg_norm = MessageNorm(learn_msg_scale) if msg_norm else None
+
+        self.beta = (
+            nn.Parameter(torch.Tensor([beta]), requires_grad=True)
+            if learn_beta and self.aggr == "softmax"
+            else beta
+        )
+        self.p = nn.Parameter(torch.Tensor([p]), requires_grad=True) if learn_p else p
+
+        # self.edge_encoder = BondEncoder(in_dim)
+        self.edge_encoder = torch.nn.Sequential(
+            torch.nn.Linear(edge_feat_dim, 512),
+            torch.nn.ReLU(),
+            torch.nn.Linear(512, in_dim),
+        )
+
+    def forward(self, g, node_feats, edge_feats):
+        with g.local_scope():
+            # Node and edge feature size need to match.
+            g.ndata["h"] = node_feats
+            g.edata["h"] = self.edge_encoder(edge_feats.float())
+            g.apply_edges(fn.u_add_e("h", "h", "m"))
+
+            if self.aggr == "softmax":
+                g.edata["m"] = F.relu(g.edata["m"]) + self.eps
+                g.edata["a"] = edge_softmax(g, g.edata["m"] * self.beta)
+                g.update_all(
+                    lambda edge: {"x": edge.data["m"] * edge.data["a"]},
+                    fn.sum("x", "m"),
+                )
+
+            elif self.aggr == "power":
+                minv, maxv = 1e-7, 1e1
+                torch.clamp_(g.edata["m"], minv, maxv)
+                g.update_all(
+                    lambda edge: {"x": torch.pow(edge.data["m"], self.p)},
+                    fn.mean("x", "m"),
+                )
+                torch.clamp_(g.ndata["m"], minv, maxv)
+                g.ndata["m"] = torch.pow(g.ndata["m"], self.p)
+
+            else:
+                raise NotImplementedError(f"Aggregator {self.aggr} is not supported.")
+
+            if self.msg_norm is not None:
+                g.ndata["m"] = self.msg_norm(node_feats, g.ndata["m"])
+
+            feats = node_feats + g.ndata["m"]
+
+            return self.mlp(feats)
+
+
+class MLP(nn.Sequential):
+    r"""
+
+    Description
+    -----------
+    From equation (5) in "DeeperGCN: All You Need to Train Deeper GCNs <https://arxiv.org/abs/2006.07739>"
+    """
+
+    def __init__(self, channels, act="relu", dropout=0.0, bias=True):
+        layers = []
+
+        for i in range(1, len(channels)):
+            layers.append(nn.Linear(channels[i - 1], channels[i], bias))
+            if i < len(channels) - 1:
+                layers.append(nn.BatchNorm1d(channels[i], affine=True))
+                layers.append(nn.ReLU())
+                layers.append(nn.Dropout(dropout))
+
+        super(MLP, self).__init__(*layers)
+
+
+class MessageNorm(nn.Module):
+    r"""
+
+    Description
+    -----------
+    Message normalization was introduced in "DeeperGCN: All You Need to Train Deeper GCNs <https://arxiv.org/abs/2006.07739>"
+
+    Parameters
+    ----------
+    learn_scale: bool
+        Whether s is a learnable scaling factor or not. Default is False.
+    """
+
+    def __init__(self, learn_scale=False):
+        super(MessageNorm, self).__init__()
+        self.scale = nn.Parameter(torch.FloatTensor([1.0]), requires_grad=learn_scale)
+
+    def forward(self, feats, msg, p=2):
+        msg = F.normalize(msg, p=2, dim=-1)
+        feats_norm = feats.norm(p=p, dim=-1, keepdim=True)
+        return msg * feats_norm * self.scale
diff --git a/hugegraph-ml/src/hugegraph_ml/models/gatne.py b/hugegraph-ml/src/hugegraph_ml/models/gatne.py
new file mode 100644
index 00000000..794d1ccb
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/models/gatne.py
@@ -0,0 +1,273 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.right (c) 2024 by jinsong, All Rights Reserved.
+
+"""
+General Attributed Multiplex HeTerogeneous Network Embedding (GATNE)
+
+References
+----------
+Paper: https://arxiv.org/abs/1905.01669
+Author's code: https://github.com/THUDM/GATNE
+DGL code: https://github.com/dmlc/dgl/tree/master/examples/pytorch/GATNE-T
+"""
+
+import math
+import os
+import sys
+import time
+from collections import defaultdict
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from numpy import random
+from torch.nn.parameter import Parameter
+from tqdm.auto import tqdm
+
+import dgl
+import dgl.function as fn
+import multiprocessing
+from functools import partial, reduce, wraps
+
+
+class NeighborSampler(object):
+    def __init__(self, g, num_fanouts):
+        self.g = g
+        self.num_fanouts = num_fanouts
+
+    def sample(self, pairs):
+        heads, tails, types = zip(*pairs)
+        seeds, head_invmap = torch.unique(torch.LongTensor(heads), return_inverse=True)
+        blocks = []
+        for fanout in reversed(self.num_fanouts):
+            sampled_graph = dgl.sampling.sample_neighbors(self.g, seeds, fanout)
+            sampled_block = dgl.to_block(sampled_graph, seeds)
+            seeds = sampled_block.srcdata[dgl.NID]
+            blocks.insert(0, sampled_block)
+        return (
+            blocks,
+            torch.LongTensor(head_invmap),
+            torch.LongTensor(tails),
+            torch.LongTensor(types),
+        )
+
+
+class DGLGATNE(nn.Module):
+    def __init__(
+        self,
+        num_nodes,
+        embedding_size,
+        embedding_u_size,
+        edge_types,
+        edge_type_count,
+        dim_a,
+    ):
+        super(DGLGATNE, self).__init__()
+        self.num_nodes = num_nodes
+        self.embedding_size = embedding_size
+        self.embedding_u_size = embedding_u_size
+        self.edge_types = edge_types
+        self.edge_type_count = edge_type_count
+        self.dim_a = dim_a
+
+        self.node_embeddings = Parameter(torch.FloatTensor(num_nodes, embedding_size))
+        self.node_type_embeddings = Parameter(
+            torch.FloatTensor(num_nodes, edge_type_count, embedding_u_size)
+        )
+        self.trans_weights = Parameter(
+            torch.FloatTensor(edge_type_count, embedding_u_size, embedding_size)
+        )
+        self.trans_weights_s1 = Parameter(
+            torch.FloatTensor(edge_type_count, embedding_u_size, dim_a)
+        )
+        self.trans_weights_s2 = Parameter(torch.FloatTensor(edge_type_count, dim_a, 1))
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        self.node_embeddings.data.uniform_(-1.0, 1.0)
+        self.node_type_embeddings.data.uniform_(-1.0, 1.0)
+        self.trans_weights.data.normal_(std=1.0 / math.sqrt(self.embedding_size))
+        self.trans_weights_s1.data.normal_(std=1.0 / math.sqrt(self.embedding_size))
+        self.trans_weights_s2.data.normal_(std=1.0 / math.sqrt(self.embedding_size))
+
+    # embs: [batch_size, embedding_size]
+    def forward(self, block):
+        input_nodes = block.srcdata[dgl.NID]
+        output_nodes = block.dstdata[dgl.NID]
+        batch_size = block.number_of_dst_nodes()
+        node_embed = self.node_embeddings
+        node_type_embed = []
+
+        with block.local_scope():
+            for i in range(self.edge_type_count):
+                edge_type = self.edge_types[i]
+                block.srcdata[edge_type] = self.node_type_embeddings[input_nodes, i]
+                block.dstdata[edge_type] = self.node_type_embeddings[output_nodes, i]
+                block.update_all(
+                    fn.copy_u(edge_type, "m"),
+                    fn.sum("m", edge_type),
+                    etype=edge_type,
+                )
+                node_type_embed.append(block.dstdata[edge_type])
+
+            node_type_embed = torch.stack(node_type_embed, 1)
+            tmp_node_type_embed = node_type_embed.unsqueeze(2).view(
+                -1, 1, self.embedding_u_size
+            )
+            trans_w = (
+                self.trans_weights.unsqueeze(0)
+                .repeat(batch_size, 1, 1, 1)
+                .view(-1, self.embedding_u_size, self.embedding_size)
+            )
+            trans_w_s1 = (
+                self.trans_weights_s1.unsqueeze(0)
+                .repeat(batch_size, 1, 1, 1)
+                .view(-1, self.embedding_u_size, self.dim_a)
+            )
+            trans_w_s2 = (
+                self.trans_weights_s2.unsqueeze(0)
+                .repeat(batch_size, 1, 1, 1)
+                .view(-1, self.dim_a, 1)
+            )
+
+            attention = (
+                F.softmax(
+                    torch.matmul(
+                        torch.tanh(torch.matmul(tmp_node_type_embed, trans_w_s1)),
+                        trans_w_s2,
+                    )
+                    .squeeze(2)
+                    .view(-1, self.edge_type_count),
+                    dim=1,
+                )
+                .unsqueeze(1)
+                .repeat(1, self.edge_type_count, 1)
+            )
+
+            node_type_embed = torch.matmul(attention, node_type_embed).view(
+                -1, 1, self.embedding_u_size
+            )
+            node_embed = node_embed[output_nodes].unsqueeze(1).repeat(
+                1, self.edge_type_count, 1
+            ) + torch.matmul(node_type_embed, trans_w).view(
+                -1, self.edge_type_count, self.embedding_size
+            )
+            last_node_embed = F.normalize(node_embed, dim=2)
+
+            return last_node_embed  # [batch_size, edge_type_count, embedding_size]
+
+
+class NSLoss(nn.Module):
+    def __init__(self, num_nodes, num_sampled, embedding_size):
+        super(NSLoss, self).__init__()
+        self.num_nodes = num_nodes
+        self.num_sampled = num_sampled
+        self.embedding_size = embedding_size
+        self.weights = Parameter(torch.FloatTensor(num_nodes, embedding_size))
+        # [ (log(i+2) - log(i+1)) / log(num_nodes + 1)]
+        self.sample_weights = F.normalize(
+            torch.Tensor(
+                [
+                    (math.log(k + 2) - math.log(k + 1)) / math.log(num_nodes + 1)
+                    for k in range(num_nodes)
+                ]
+            ),
+            dim=0,
+        )
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        self.weights.data.normal_(std=1.0 / math.sqrt(self.embedding_size))
+
+    def forward(self, input, embs, label):
+        n = input.shape[0]
+        log_target = torch.log(
+            torch.sigmoid(torch.sum(torch.mul(embs, self.weights[label]), 1))
+        )
+        negs = torch.multinomial(
+            self.sample_weights, self.num_sampled * n, replacement=True
+        ).view(n, self.num_sampled)
+        noise = torch.neg(self.weights[negs])
+        sum_log_sampled = torch.sum(
+            torch.log(torch.sigmoid(torch.bmm(noise, embs.unsqueeze(2)))), 1
+        ).squeeze()
+
+        loss = log_target + sum_log_sampled
+        return -loss.sum() / n
+
+
+def generate_pairs_parallel(walks, skip_window=None, layer_id=None):
+    pairs = []
+    for walk in walks:
+        walk = walk.tolist()
+        for i in range(len(walk)):
+            for j in range(1, skip_window + 1):
+                if i - j >= 0:
+                    pairs.append((walk[i], walk[i - j], layer_id))
+                if i + j < len(walk):
+                    pairs.append((walk[i], walk[i + j], layer_id))
+    return pairs
+
+
+def generate_pairs(all_walks, window_size, num_workers):
+    # for each node, choose the first neighbor and second neighbor of it to form pairs
+    # Get all worker processes
+    start_time = time.time()
+    print("We are generating pairs with {} cores.".format(num_workers))
+
+    # Start all worker processes
+    pool = multiprocessing.Pool(processes=num_workers)
+    pairs = []
+    skip_window = window_size // 2
+    for layer_id, walks in enumerate(all_walks):
+        block_num = len(walks) // num_workers
+        if block_num > 0:
+            walks_list = [
+                walks[i * block_num : min((i + 1) * block_num, len(walks))]
+                for i in range(num_workers)
+            ]
+        else:
+            walks_list = [walks]
+        tmp_result = pool.map(
+            partial(
+                generate_pairs_parallel,
+                skip_window=skip_window,
+                layer_id=layer_id,
+            ),
+            walks_list,
+        )
+        pairs += reduce(lambda x, y: x + y, tmp_result)
+
+    pool.close()
+    end_time = time.time()
+    print("Generate pairs end, use {}s.".format(end_time - start_time))
+    return np.array([list(pair) for pair in set(pairs)])
+
+
+def construct_typenodes_from_graph(graph):
+    nodes = []
+    for etype in graph.etypes:
+        edges = graph.edges(etype=etype)
+        node1, node2 = edges
+        node1_list = node1.cpu().numpy().tolist()
+        node2_list = node2.cpu().numpy().tolist()
+        tmp_nodes = list(set(node1_list + node2_list))
+        nodes.append(tmp_nodes)
+    return nodes
diff --git a/hugegraph-ml/src/hugegraph_ml/models/pgnn.py b/hugegraph-ml/src/hugegraph_ml/models/pgnn.py
new file mode 100644
index 00000000..de0f7408
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/models/pgnn.py
@@ -0,0 +1,462 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Position-aware Graph Neural Networks (P-GNN)
+
+References
+----------
+Paper: http://proceedings.mlr.press/v97/you19b/you19b.pdf
+Author's code: https://github.com/JiaxuanYou/P-GNN
+DGL code: https://github.com/dmlc/dgl/tree/master/examples/pytorch/P-GNN
+"""
+
+import dgl.function as fn
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import multiprocessing as mp
+import random
+from multiprocessing import get_context
+
+import networkx as nx
+import numpy as np
+from tqdm.auto import tqdm
+from sklearn.metrics import roc_auc_score
+
+
+class PGNN_layer(nn.Module):
+    def __init__(self, input_dim, output_dim):
+        super(PGNN_layer, self).__init__()
+        self.input_dim = input_dim
+
+        self.linear_hidden_u = nn.Linear(input_dim, output_dim)
+        self.linear_hidden_v = nn.Linear(input_dim, output_dim)
+        self.linear_out_position = nn.Linear(output_dim, 1)
+        self.act = nn.ReLU()
+
+    def forward(self, graph, feature, anchor_eid, dists_max):
+        with graph.local_scope():
+            u_feat = self.linear_hidden_u(feature)
+            v_feat = self.linear_hidden_v(feature)
+            graph.srcdata.update({"u_feat": u_feat})
+            graph.dstdata.update({"v_feat": v_feat})
+
+            graph.apply_edges(fn.u_mul_e("u_feat", "sp_dist", "u_message"))
+            graph.apply_edges(fn.v_add_e("v_feat", "u_message", "message"))
+
+            messages = torch.index_select(
+                graph.edata["message"],
+                0,
+                torch.LongTensor(anchor_eid).to(feature.device),
+            )
+            messages = messages.reshape(
+                dists_max.shape[0], dists_max.shape[1], messages.shape[-1]
+            )
+
+            messages = self.act(messages)  # n*m*d
+
+            out_position = self.linear_out_position(messages).squeeze(-1)  # n*m_out
+            out_structure = torch.mean(messages, dim=1)  # n*d
+
+            return out_position, out_structure
+
+
+class PGNN(nn.Module):
+    def __init__(self, input_dim, feature_dim=32, dropout=0.5):
+        super(PGNN, self).__init__()
+        self.dropout = nn.Dropout(dropout)
+
+        self.linear_pre = nn.Linear(input_dim, feature_dim)
+        self.conv_first = PGNN_layer(feature_dim, feature_dim)
+        self.conv_out = PGNN_layer(feature_dim, feature_dim)
+
+    def forward(self, data):
+        x = data["graph"].ndata["feat"]
+        graph = data["graph"]
+        x = self.linear_pre(x)
+        x_position, x = self.conv_first(graph, x, data["anchor_eid"], data["dists_max"])
+
+        x = self.dropout(x)
+        x_position, x = self.conv_out(graph, x, data["anchor_eid"], data["dists_max"])
+        x_position = F.normalize(x_position, p=2, dim=-1)
+        return x_position
+
+
+def get_communities(remove_feature, graph):
+    community_size = 20
+    # Randomly rewire 1% edges
+    node_list = list(graph.nodes)
+    for u, v in graph.edges():
+        if random.random() < 0.01:
+            x = random.choice(node_list)
+            if graph.has_edge(u, x):
+                continue
+            graph.remove_edge(u, v)
+            graph.add_edge(u, x)
+
+    # remove self-loops
+    graph.remove_edges_from(nx.selfloop_edges(graph))
+    edge_index = np.array(list(graph.edges))
+    # Add (i, j) for an edge (j, i)
+    edge_index = np.concatenate((edge_index, edge_index[:, ::-1]), axis=0)
+    edge_index = torch.from_numpy(edge_index).long().permute(1, 0)
+
+    n = graph.number_of_nodes()
+    label = np.zeros((n, n), dtype=int)
+    for u in node_list:
+        # the node IDs are simply consecutive integers from 0
+        for v in range(u):
+            if u // community_size == v // community_size:
+                label[u, v] = 1
+
+    if remove_feature:
+        feature = torch.ones((n, 1))
+    else:
+        rand_order = np.random.permutation(n)
+        feature = np.identity(n)[:, rand_order]
+
+    data = {
+        "edge_index": edge_index,
+        "feature": feature,
+        "positive_edges": np.stack(np.nonzero(label)),
+        "num_nodes": feature.shape[0],
+    }
+
+    return data
+
+
+def to_single_directed(edges):
+    edges_new = np.zeros((2, edges.shape[1] // 2), dtype=int)
+    j = 0
+    for i in range(edges.shape[1]):
+        if edges[0, i] < edges[1, i]:
+            edges_new[:, j] = edges[:, i]
+            j += 1
+
+    return edges_new
+
+
+# each node at least remain in the new graph
+def split_edges(p, edges, data, non_train_ratio=0.2):
+    e = edges.shape[1]
+    edges = edges[:, np.random.permutation(e)]
+    split1 = int((1 - non_train_ratio) * e)
+    split2 = int((1 - non_train_ratio / 2) * e)
+
+    data.update(
+        {
+            "{}_edges_train".format(p): edges[:, :split1],  # 80%
+            "{}_edges_val".format(p): edges[:, split1:split2],  # 10%
+            "{}_edges_test".format(p): edges[:, split2:],  # 10%
+        }
+    )
+
+
+def to_bidirected(edges):
+    return np.concatenate((edges, edges[::-1, :]), axis=-1)
+
+
+def get_negative_edges(positive_edges, num_nodes, num_negative_edges):
+    positive_edge_set = []
+    positive_edges = to_bidirected(positive_edges)
+    for i in range(positive_edges.shape[1]):
+        positive_edge_set.append(tuple(positive_edges[:, i]))
+    positive_edge_set = set(positive_edge_set)
+
+    negative_edges = np.zeros((2, num_negative_edges), dtype=positive_edges.dtype)
+    for i in range(num_negative_edges):
+        while True:
+            mask_temp = tuple(np.random.choice(num_nodes, size=(2,), replace=False))
+            if mask_temp not in positive_edge_set:
+                negative_edges[:, i] = mask_temp
+                break
+
+    return negative_edges
+
+
+def get_pos_neg_edges(data, infer_link_positive=True):
+    if infer_link_positive:
+        data["positive_edges"] = to_single_directed(data["edge_index"].numpy())
+    split_edges("positive", data["positive_edges"], data)
+
+    # resample edge mask link negative
+    negative_edges = get_negative_edges(
+        data["positive_edges"],
+        data["num_nodes"],
+        num_negative_edges=data["positive_edges"].shape[1],
+    )
+    split_edges("negative", negative_edges, data)
+
+    return data
+
+
+def shortest_path(graph, node_range, cutoff):
+    dists_dict = {}
+    for node in tqdm(node_range, leave=False):
+        dists_dict[node] = nx.single_source_shortest_path_length(graph, node, cutoff)
+    return dists_dict
+
+
+def merge_dicts(dicts):
+    result = {}
+    for dictionary in dicts:
+        result.update(dictionary)
+    return result
+
+
+def all_pairs_shortest_path(graph, cutoff=None, num_workers=4):
+    nodes = list(graph.nodes)
+    random.shuffle(nodes)
+    pool = mp.Pool(processes=num_workers)
+    interval_size = len(nodes) / num_workers
+    results = [
+        pool.apply_async(
+            shortest_path,
+            args=(
+                graph,
+                nodes[int(interval_size * i) : int(interval_size * (i + 1))],
+                cutoff,
+            ),
+        )
+        for i in range(num_workers)
+    ]
+    output = [p.get() for p in results]
+    dists_dict = merge_dicts(output)
+    pool.close()
+    pool.join()
+    return dists_dict
+
+
+def precompute_dist_data(edge_index, num_nodes, approximate=0):
+    """
+    Here dist is 1/real_dist, higher actually means closer, 0 means disconnected
+    :return:
+    """
+    graph = nx.Graph()
+    edge_list = edge_index.transpose(1, 0).tolist()
+    graph.add_edges_from(edge_list)
+
+    n = num_nodes
+    dists_array = np.zeros((n, n))
+    dists_dict = all_pairs_shortest_path(
+        graph, cutoff=approximate if approximate > 0 else None
+    )
+    node_list = graph.nodes()
+    for node_i in node_list:
+        shortest_dist = dists_dict[node_i]
+        for node_j in node_list:
+            dist = shortest_dist.get(node_j, -1)
+            if dist != -1:
+                dists_array[node_i, node_j] = 1 / (dist + 1)
+    return dists_array
+
+
+def get_dataset(graph):
+    # Generate graph data
+    data_info = get_communities(False, graph)
+    # Get positive and negative edges
+    data = get_pos_neg_edges(data_info, infer_link_positive=True)
+    # Pre-compute shortest path length
+    dists_removed = precompute_dist_data(
+        data["positive_edges_train"],
+        data["num_nodes"],
+        approximate=-1,
+    )
+    data["dists"] = torch.from_numpy(dists_removed).float()
+    data["edge_index"] = torch.from_numpy(
+        to_bidirected(data["positive_edges_train"])
+    ).long()
+
+    return data
+
+
+def get_anchors(n):
+    """Get a list of NumPy arrays, each of them is an anchor node set"""
+    m = int(np.log2(n))
+    anchor_set_id = []
+    for i in range(m):
+        anchor_size = int(n / np.exp2(i + 1))
+        for _ in range(m):
+            anchor_set_id.append(np.random.choice(n, size=anchor_size, replace=False))
+    return anchor_set_id
+
+
+def get_dist_max(anchor_set_id, dist):
+    # N x K, N is number of nodes, K is the number of anchor sets
+    dist_max = torch.zeros((dist.shape[0], len(anchor_set_id)))
+    dist_argmax = torch.zeros((dist.shape[0], len(anchor_set_id))).long()
+    for i in range(len(anchor_set_id)):
+        temp_id = torch.as_tensor(anchor_set_id[i], dtype=torch.long)
+        # Get reciprocal of shortest distance to each node in the i-th anchor set
+        dist_temp = torch.index_select(dist, 1, temp_id)
+        # For each node in the graph, find its closest anchor node in the set
+        # and the reciprocal of shortest distance
+        dist_max_temp, dist_argmax_temp = torch.max(dist_temp, dim=-1)
+        dist_max[:, i] = dist_max_temp
+        dist_argmax[:, i] = torch.index_select(temp_id, 0, dist_argmax_temp)
+    return dist_max, dist_argmax
+
+
+def get_a_graph(dists_max, dists_argmax):
+    src = []
+    dst = []
+    real_src = []
+    real_dst = []
+    edge_weight = []
+    dists_max = dists_max.numpy()
+    for i in range(dists_max.shape[0]):
+        # Get unique closest anchor nodes for node i across all anchor sets
+        tmp_dists_argmax, tmp_dists_argmax_idx = np.unique(dists_argmax[i, :], True)
+        src.extend([i] * tmp_dists_argmax.shape[0])
+        real_src.extend([i] * dists_argmax[i, :].shape[0])
+        real_dst.extend(list(dists_argmax[i, :].numpy()))
+        dst.extend(list(tmp_dists_argmax))
+        edge_weight.extend(dists_max[i, tmp_dists_argmax_idx].tolist())
+    eid_dict = {(u, v): i for i, (u, v) in enumerate(list(zip(dst, src)))}
+    anchor_eid = [eid_dict.get((u, v)) for u, v in zip(real_dst, real_src)]
+    g = (dst, src)
+    return g, anchor_eid, edge_weight
+
+
+def get_graphs(data, anchor_sets):
+    graphs = []
+    anchor_eids = []
+    dists_max_list = []
+    edge_weights = []
+    for anchor_set in tqdm(anchor_sets, leave=False):
+        dists_max, dists_argmax = get_dist_max(anchor_set, data["dists"])
+        g, anchor_eid, edge_weight = get_a_graph(dists_max, dists_argmax)
+        graphs.append(g)
+        anchor_eids.append(anchor_eid)
+        dists_max_list.append(dists_max)
+        edge_weights.append(edge_weight)
+
+    return graphs, anchor_eids, dists_max_list, edge_weights
+
+
+def merge_result(outputs):
+    graphs = []
+    anchor_eids = []
+    dists_max_list = []
+    edge_weights = []
+
+    for g, anchor_eid, dists_max, edge_weight in outputs:
+        graphs.extend(g)
+        anchor_eids.extend(anchor_eid)
+        dists_max_list.extend(dists_max)
+        edge_weights.extend(edge_weight)
+
+    return graphs, anchor_eids, dists_max_list, edge_weights
+
+
+def preselect_anchor(data, num_workers=4):
+    pool = get_context("spawn").Pool(processes=num_workers)
+    # Pre-compute anchor sets, a collection of anchor sets per epoch
+    anchor_set_ids = [get_anchors(data["num_nodes"]) for _ in range(200)]
+    interval_size = len(anchor_set_ids) / num_workers
+    results = [
+        pool.apply_async(
+            get_graphs,
+            args=(
+                data,
+                anchor_set_ids[int(interval_size * i) : int(interval_size * (i + 1))],
+            ),
+        )
+        for i in range(num_workers)
+    ]
+
+    output = [p.get() for p in results]
+    graphs, anchor_eids, dists_max_list, edge_weights = merge_result(output)
+    pool.close()
+    pool.join()
+
+    return graphs, anchor_eids, dists_max_list, edge_weights
+
+
+def get_loss(p, data, out, loss_func, device, get_auc=True):
+    edge_mask = np.concatenate(
+        (
+            data["positive_edges_{}".format(p)],
+            data["negative_edges_{}".format(p)],
+        ),
+        axis=-1,
+    )
+
+    nodes_first = torch.index_select(
+        out, 0, torch.from_numpy(edge_mask[0, :]).long().to(out.device)
+    )
+    nodes_second = torch.index_select(
+        out, 0, torch.from_numpy(edge_mask[1, :]).long().to(out.device)
+    )
+
+    pred = torch.sum(nodes_first * nodes_second, dim=-1)
+
+    label_positive = torch.ones(
+        [
+            data["positive_edges_{}".format(p)].shape[1],
+        ],
+        dtype=pred.dtype,
+    )
+    label_negative = torch.zeros(
+        [
+            data["negative_edges_{}".format(p)].shape[1],
+        ],
+        dtype=pred.dtype,
+    )
+    label = torch.cat((label_positive, label_negative)).to(device)
+    loss = loss_func(pred, label)
+
+    if get_auc:
+        auc = roc_auc_score(
+            label.flatten().cpu().numpy(),
+            torch.sigmoid(pred).flatten().data.cpu().numpy(),
+        )
+        return loss, auc
+    else:
+        return loss
+
+
+def train_model(data, model, loss_func, optimizer, device, g_data):
+    model.train()
+    out = model(g_data)
+
+    loss = get_loss("train", data, out, loss_func, device, get_auc=False)
+
+    optimizer.zero_grad()
+    loss.backward()
+    optimizer.step()
+    optimizer.zero_grad()
+
+    return g_data
+
+
+def eval_model(data, g_data, model, loss_func, device):
+    model.eval()
+    out = model(g_data)
+
+    # train loss and auc
+    tmp_loss, auc_train = get_loss("train", data, out, loss_func, device)
+    loss_train = tmp_loss.cpu().data.numpy()
+
+    # val loss and auc
+    _, auc_val = get_loss("val", data, out, loss_func, device)
+
+    # test loss and auc
+    _, auc_test = get_loss("test", data, out, loss_func, device)
+
+    return loss_train, auc_train, auc_val, auc_test
diff --git a/hugegraph-ml/src/hugegraph_ml/models/seal.py b/hugegraph-ml/src/hugegraph_ml/models/seal.py
new file mode 100644
index 00000000..09060ed8
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/models/seal.py
@@ -0,0 +1,826 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+SEAL
+
+References
+----------
+Paper: https://arxiv.org/abs/1802.09691
+Author's code: https://github.com/muhanzhang/SEAL
+DGL code: https://github.com/dmlc/dgl/tree/master/examples/pytorch/seal
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from dgl.nn.pytorch import GraphConv, SAGEConv, SortPooling, SumPooling
+import argparse
+
+import dgl
+
+import numpy as np
+import pandas as pd
+from ogb.linkproppred import DglLinkPropPredDataset, Evaluator
+from scipy.sparse.csgraph import shortest_path
+
+import os.path as osp
+from copy import deepcopy
+
+from dgl import add_self_loop, DGLGraph, NID
+from dgl.dataloading.negative_sampler import Uniform
+from torch.utils.data import DataLoader, Dataset
+from tqdm import tqdm
+
+import logging
+import os
+import time
+
+
+class GCN(nn.Module):
+    """
+    GCN Model
+
+    Attributes:
+        num_layers(int): num of gcn layers
+        hidden_units(int): num of hidden units
+        gcn_type(str): type of gcn layer, 'gcn' for GraphConv and 'sage' for SAGEConv
+        pooling_type(str): type of graph pooling to get subgraph representation
+                           'sum' for sum pooling and 'center' for center pooling.
+        node_attributes(Tensor, optional): node attribute
+        edge_weights(Tensor, optional): edge weight
+        node_embedding(Tensor, optional): pre-trained node embedding
+        use_embedding(bool, optional): whether to use node embedding. Note that if 'use_embedding' is set True
+                             and 'node_embedding' is None, will automatically randomly initialize node embedding.
+        num_nodes(int, optional): num of nodes
+        dropout(float, optional): dropout rate
+        max_z(int, optional): default max vocab size of node labeling, default 1000.
+
+    """
+
+    def __init__(
+        self,
+        num_layers,
+        hidden_units,
+        gcn_type="gcn",
+        pooling_type="sum",
+        node_attributes=None,
+        edge_weights=None,
+        node_embedding=None,
+        use_embedding=False,
+        num_nodes=None,
+        dropout=0.5,
+        max_z=1000,
+    ):
+        super(GCN, self).__init__()
+        self.num_layers = num_layers
+        self.dropout = dropout
+        self.pooling_type = pooling_type
+        self.use_attribute = False if node_attributes is None else True
+        self.use_embedding = use_embedding
+        self.use_edge_weight = False if edge_weights is None else True
+
+        self.z_embedding = nn.Embedding(max_z, hidden_units)
+        if node_attributes is not None:
+            self.node_attributes_lookup = nn.Embedding.from_pretrained(node_attributes)
+            self.node_attributes_lookup.weight.requires_grad = False
+        if edge_weights is not None:
+            self.edge_weights_lookup = nn.Embedding.from_pretrained(edge_weights)
+            self.edge_weights_lookup.weight.requires_grad = False
+        if node_embedding is not None:
+            self.node_embedding = nn.Embedding.from_pretrained(node_embedding)
+            self.node_embedding.weight.requires_grad = False
+        elif use_embedding:
+            self.node_embedding = nn.Embedding(num_nodes, hidden_units)
+
+        initial_dim = hidden_units
+        if self.use_attribute:
+            initial_dim += self.node_attributes_lookup.embedding_dim
+        if self.use_embedding:
+            initial_dim += self.node_embedding.embedding_dim
+
+        self.layers = nn.ModuleList()
+        if gcn_type == "gcn":
+            self.layers.append(
+                GraphConv(initial_dim, hidden_units, allow_zero_in_degree=True)
+            )
+            for _ in range(num_layers - 1):
+                self.layers.append(
+                    GraphConv(hidden_units, hidden_units, allow_zero_in_degree=True)
+                )
+        elif gcn_type == "sage":
+            self.layers.append(
+                SAGEConv(initial_dim, hidden_units, aggregator_type="gcn")
+            )
+            for _ in range(num_layers - 1):
+                self.layers.append(
+                    SAGEConv(hidden_units, hidden_units, aggregator_type="gcn")
+                )
+        else:
+            raise ValueError("Gcn type error.")
+
+        self.linear_1 = nn.Linear(hidden_units, hidden_units)
+        self.linear_2 = nn.Linear(hidden_units, 1)
+        if pooling_type != "sum":
+            raise ValueError("Pooling type error.")
+        self.pooling = SumPooling()
+
+    def reset_parameters(self):
+        for layer in self.layers:
+            layer.reset_parameters()
+
+    def forward(self, g, z, node_id=None, edge_id=None):
+        """
+        Args:
+            g(DGLGraph): the graph
+            z(Tensor): node labeling tensor, shape [N, 1]
+            node_id(Tensor, optional): node id tensor, shape [N, 1]
+            edge_id(Tensor, optional): edge id tensor, shape [E, 1]
+        Returns:
+            x(Tensor): output tensor
+
+        """
+
+        z_emb = self.z_embedding(z)
+
+        if self.use_attribute:
+            x = self.node_attributes_lookup(node_id)
+            x = torch.cat([z_emb, x], 1)
+        else:
+            x = z_emb
+
+        if self.use_edge_weight:
+            edge_weight = self.edge_weights_lookup(edge_id)
+        else:
+            edge_weight = None
+
+        if self.use_embedding:
+            n_emb = self.node_embedding(node_id)
+            x = torch.cat([x, n_emb], 1)
+
+        for layer in self.layers[:-1]:
+            x = layer(g, x, edge_weight=edge_weight)
+            x = F.relu(x)
+            x = F.dropout(x, p=self.dropout, training=self.training)
+        x = self.layers[-1](g, x, edge_weight=edge_weight)
+
+        x = self.pooling(g, x)
+        x = F.relu(self.linear_1(x))
+        F.dropout(x, p=self.dropout, training=self.training)
+        x = self.linear_2(x)
+
+        return x
+
+
+class DGCNN(nn.Module):
+    """
+    An end-to-end deep learning architecture for graph classification.
+    paper link: https://muhanzhang.github.io/papers/AAAI_2018_DGCNN.pdf
+
+    Attributes:
+        num_layers(int): num of gcn layers
+        hidden_units(int): num of hidden units
+        k(int, optional): The number of nodes to hold for each graph in SortPooling.
+        gcn_type(str): type of gcn layer, 'gcn' for GraphConv and 'sage' for SAGEConv
+        node_attributes(Tensor, optional): node attribute
+        edge_weights(Tensor, optional): edge weight
+        node_embedding(Tensor, optional): pre-trained node embedding
+        use_embedding(bool, optional): whether to use node embedding. Note that if 'use_embedding' is set True
+                             and 'node_embedding' is None, will automatically randomly initialize node embedding.
+        num_nodes(int, optional): num of nodes
+        dropout(float, optional): dropout rate
+        max_z(int, optional): default max vocab size of node labeling, default 1000.
+    """
+
+    def __init__(
+        self,
+        num_layers,
+        hidden_units,
+        k=10,
+        gcn_type="gcn",
+        node_attributes=None,
+        edge_weights=None,
+        node_embedding=None,
+        use_embedding=False,
+        num_nodes=None,
+        dropout=0.5,
+        max_z=1000,
+    ):
+        super(DGCNN, self).__init__()
+        self.num_layers = num_layers
+        self.dropout = dropout
+        self.use_attribute = False if node_attributes is None else True
+        self.use_embedding = use_embedding
+        self.use_edge_weight = False if edge_weights is None else True
+
+        self.z_embedding = nn.Embedding(max_z, hidden_units)
+
+        if node_attributes is not None:
+            self.node_attributes_lookup = nn.Embedding.from_pretrained(node_attributes)
+            self.node_attributes_lookup.weight.requires_grad = False
+        if edge_weights is not None:
+            self.edge_weights_lookup = nn.Embedding.from_pretrained(edge_weights)
+            self.edge_weights_lookup.weight.requires_grad = False
+        if node_embedding is not None:
+            self.node_embedding = nn.Embedding.from_pretrained(node_embedding)
+            self.node_embedding.weight.requires_grad = False
+        elif use_embedding:
+            self.node_embedding = nn.Embedding(num_nodes, hidden_units)
+
+        initial_dim = hidden_units
+        if self.use_attribute:
+            initial_dim += self.node_attributes_lookup.embedding_dim
+        if self.use_embedding:
+            initial_dim += self.node_embedding.embedding_dim
+
+        self.layers = nn.ModuleList()
+        if gcn_type == "gcn":
+            self.layers.append(
+                GraphConv(initial_dim, hidden_units, allow_zero_in_degree=True)
+            )
+            for _ in range(num_layers - 1):
+                self.layers.append(
+                    GraphConv(hidden_units, hidden_units, allow_zero_in_degree=True)
+                )
+            self.layers.append(GraphConv(hidden_units, 1, allow_zero_in_degree=True))
+        elif gcn_type == "sage":
+            self.layers.append(
+                SAGEConv(initial_dim, hidden_units, aggregator_type="gcn")
+            )
+            for _ in range(num_layers - 1):
+                self.layers.append(
+                    SAGEConv(hidden_units, hidden_units, aggregator_type="gcn")
+                )
+            self.layers.append(SAGEConv(hidden_units, 1, aggregator_type="gcn"))
+        else:
+            raise ValueError("Gcn type error.")
+
+        self.pooling = SortPooling(k=k)
+        conv1d_channels = [16, 32]
+        total_latent_dim = hidden_units * num_layers + 1
+        conv1d_kws = [total_latent_dim, 5]
+        self.conv_1 = nn.Conv1d(1, conv1d_channels[0], conv1d_kws[0], conv1d_kws[0])
+        self.maxpool1d = nn.MaxPool1d(2, 2)
+        self.conv_2 = nn.Conv1d(
+            conv1d_channels[0], conv1d_channels[1], conv1d_kws[1], 1
+        )
+        dense_dim = int((k - 2) / 2 + 1)
+        dense_dim = (dense_dim - conv1d_kws[1] + 1) * conv1d_channels[1]
+        self.linear_1 = nn.Linear(dense_dim, 128)
+        self.linear_2 = nn.Linear(128, 1)
+
+    def forward(self, g, z, node_id=None, edge_id=None):
+        """
+        Args:
+            g(DGLGraph): the graph
+            z(Tensor): node labeling tensor, shape [N, 1]
+            node_id(Tensor, optional): node id tensor, shape [N, 1]
+            edge_id(Tensor, optional): edge id tensor, shape [E, 1]
+        Returns:
+            x(Tensor): output tensor
+        """
+        z_emb = self.z_embedding(z)
+        if self.use_attribute:
+            x = self.node_attributes_lookup(node_id)
+            x = torch.cat([z_emb, x], 1)
+        else:
+            x = z_emb
+        if self.use_edge_weight:
+            edge_weight = self.edge_weights_lookup(edge_id)
+        else:
+            edge_weight = None
+
+        if self.use_embedding:
+            n_emb = self.node_embedding(node_id)
+            x = torch.cat([x, n_emb], 1)
+
+        xs = [x]
+        for layer in self.layers:
+            out = torch.tanh(layer(g, xs[-1], edge_weight=edge_weight))
+            xs += [out]
+
+        x = torch.cat(xs[1:], dim=-1)
+
+        # SortPooling
+        x = self.pooling(g, x)
+        x = x.unsqueeze(1)
+        x = F.relu(self.conv_1(x))
+        x = self.maxpool1d(x)
+        x = F.relu(self.conv_2(x))
+        x = x.view(x.size(0), -1)
+
+        x = F.relu(self.linear_1(x))
+        F.dropout(x, p=self.dropout, training=self.training)
+        x = self.linear_2(x)
+
+        return x
+
+
+def parse_arguments():
+    """
+    Parse arguments
+    """
+    parser = argparse.ArgumentParser(description="SEAL")
+    parser.add_argument("--dataset", type=str, default="ogbl-ddi")
+    parser.add_argument("--gpu_id", type=int, default=0)
+    parser.add_argument("--hop", type=int, default=1)
+    parser.add_argument("--model", type=str, default="dgcnn")
+    parser.add_argument("--gcn_type", type=str, default="gcn")
+    parser.add_argument("--num_layers", type=int, default=3)
+    parser.add_argument("--hidden_units", type=int, default=32)
+    parser.add_argument("--sort_k", type=int, default=30)
+    parser.add_argument("--pooling", type=str, default="sum")
+    parser.add_argument("--dropout", type=str, default=0.5)
+    parser.add_argument("--hits_k", type=int, default=50)
+    parser.add_argument("--lr", type=float, default=0.0001)
+    parser.add_argument("--neg_samples", type=int, default=1)
+    parser.add_argument("--subsample_ratio", type=float, default=0.1)
+    parser.add_argument("--epochs", type=int, default=60)
+    parser.add_argument("--batch_size", type=int, default=32)
+    parser.add_argument("--eval_steps", type=int, default=5)
+    parser.add_argument("--num_workers", type=int, default=32)
+    parser.add_argument("--random_seed", type=int, default=2021)
+    parser.add_argument("--save_dir", type=str, default="./processed")
+    args = parser.parse_args()
+
+    return args
+
+
+def load_ogb_dataset(dataset):
+    """
+    Load OGB dataset
+    Args:
+        dataset(str): name of dataset (ogbl-collab, ogbl-ddi, ogbl-citation)
+
+    Returns:
+        graph(DGLGraph): graph
+        split_edge(dict): split edge
+
+    """
+    dataset = DglLinkPropPredDataset(name=dataset)
+    split_edge = dataset.get_edge_split()
+    graph = dataset[0]
+
+    return graph, split_edge
+
+
+def drnl_node_labeling(subgraph, src, dst):
+    """
+    Double Radius Node labeling
+    d = r(i,u)+r(i,v)
+    label = 1+ min(r(i,u),r(i,v))+ (d//2)*(d//2+d%2-1)
+    Isolated nodes in subgraph will be set as zero.
+    Extreme large graph may cause memory error.
+
+    Args:
+        subgraph(DGLGraph): The graph
+        src(int): node id of one of src node in new subgraph
+        dst(int): node id of one of dst node in new subgraph
+    Returns:
+        z(Tensor): node labeling tensor
+    """
+    adj = subgraph.adj_external().to_dense().numpy()
+    src, dst = (dst, src) if src > dst else (src, dst)
+
+    idx = list(range(src)) + list(range(src + 1, adj.shape[0]))
+    adj_wo_src = adj[idx, :][:, idx]
+
+    idx = list(range(dst)) + list(range(dst + 1, adj.shape[0]))
+    adj_wo_dst = adj[idx, :][:, idx]
+
+    dist2src = shortest_path(adj_wo_dst, directed=False, unweighted=True, indices=src)
+    dist2src = np.insert(dist2src, dst, 0, axis=0)
+    dist2src = torch.from_numpy(dist2src)
+
+    dist2dst = shortest_path(
+        adj_wo_src, directed=False, unweighted=True, indices=dst - 1
+    )
+    dist2dst = np.insert(dist2dst, src, 0, axis=0)
+    dist2dst = torch.from_numpy(dist2dst)
+
+    dist = dist2src + dist2dst
+    dist_over_2, dist_mod_2 = dist // 2, dist % 2
+
+    z = 1 + torch.min(dist2src, dist2dst)
+    z += dist_over_2 * (dist_over_2 + dist_mod_2 - 1)
+    z[src] = 1.0
+    z[dst] = 1.0
+    z[torch.isnan(z)] = 0.0
+
+    return z.to(torch.long)
+
+
+def evaluate_hits(name, pos_pred, neg_pred, K):
+    """
+    Compute hits
+    Args:
+        name(str): name of dataset
+        pos_pred(Tensor): predict value of positive edges
+        neg_pred(Tensor): predict value of negative edges
+        K(int): num of hits
+
+    Returns:
+        hits(float): score of hits
+
+
+    """
+    evaluator = Evaluator(name)
+    evaluator.K = K
+    hits = evaluator.eval(
+        {
+            "y_pred_pos": pos_pred,
+            "y_pred_neg": neg_pred,
+        }
+    )[f"hits@{K}"]
+
+    return hits
+
+
+class GraphDataSet(Dataset):
+    """
+    GraphDataset for torch DataLoader
+    """
+
+    def __init__(self, graph_list, tensor):
+        self.graph_list = graph_list
+        self.tensor = tensor
+
+    def __len__(self):
+        return len(self.graph_list)
+
+    def __getitem__(self, index):
+        return (self.graph_list[index], self.tensor[index])
+
+
+class PosNegEdgesGenerator(object):
+    """
+    Generate positive and negative samples
+    Attributes:
+        g(dgl.DGLGraph): graph
+        split_edge(dict): split edge
+        neg_samples(int): num of negative samples per positive sample
+        subsample_ratio(float): ratio of subsample
+        shuffle(bool): if shuffle generated graph list
+    """
+
+    def __init__(self, g, split_edge, neg_samples=1, subsample_ratio=0.1, shuffle=True):
+        self.neg_sampler = Uniform(neg_samples)
+        self.subsample_ratio = subsample_ratio
+        self.split_edge = split_edge
+        self.g = g
+        self.shuffle = shuffle
+
+    def __call__(self, split_type):
+        if split_type == "train":
+            subsample_ratio = self.subsample_ratio
+        else:
+            subsample_ratio = 1
+
+        pos_edges = self.split_edge[split_type]["edge"]
+        if split_type == "train":
+            # Adding self loop in train avoids sampling the source node itself.
+            g = add_self_loop(self.g)
+            eids = g.edge_ids(pos_edges[:, 0], pos_edges[:, 1])
+            neg_edges = torch.stack(self.neg_sampler(g, eids), dim=1)
+        else:
+            neg_edges = self.split_edge[split_type]["edge_neg"]
+        pos_edges = self.subsample(pos_edges, subsample_ratio).long()
+        neg_edges = self.subsample(neg_edges, subsample_ratio).long()
+
+        edges = torch.cat([pos_edges, neg_edges])
+        labels = torch.cat(
+            [
+                torch.ones(pos_edges.size(0), 1),
+                torch.zeros(neg_edges.size(0), 1),
+            ]
+        )
+        if self.shuffle:
+            perm = torch.randperm(edges.size(0))
+            edges = edges[perm]
+            labels = labels[perm]
+        return edges, labels
+
+    def subsample(self, edges, subsample_ratio):
+        """
+        Subsample generated edges.
+        Args:
+            edges(Tensor): edges to subsample
+            subsample_ratio(float): ratio of subsample
+
+        Returns:
+            edges(Tensor):  edges
+
+        """
+
+        num_edges = edges.size(0)
+        perm = torch.randperm(num_edges)
+        perm = perm[: int(subsample_ratio * num_edges)]
+        edges = edges[perm]
+        return edges
+
+
+class EdgeDataSet(Dataset):
+    """
+    Assistant Dataset for speeding up the SEALSampler
+    """
+
+    def __init__(self, edges, labels, transform):
+        self.edges = edges
+        self.transform = transform
+        self.labels = labels
+
+    def __len__(self):
+        return len(self.edges)
+
+    def __getitem__(self, index):
+        subgraph = self.transform(self.edges[index])
+        return (subgraph, self.labels[index])
+
+
+class SEALSampler(object):
+    """
+    Sampler for SEAL in paper(no-block version)
+    The  strategy is to sample all the k-hop neighbors around the two target nodes.
+    Attributes:
+        graph(DGLGraph): The graph
+        hop(int): num of hop
+        num_workers(int): num of workers
+
+    """
+
+    def __init__(self, graph, hop=1, num_workers=32, print_fn=print):
+        self.graph = graph
+        self.hop = hop
+        self.print_fn = print_fn
+        self.num_workers = num_workers
+
+    def sample_subgraph(self, target_nodes):
+        """
+        Args:
+            target_nodes(Tensor): Tensor of two target nodes
+        Returns:
+            subgraph(DGLGraph): subgraph
+        """
+        sample_nodes = [target_nodes]
+        frontiers = target_nodes
+
+        for i in range(self.hop):
+            frontiers = self.graph.out_edges(frontiers)[1]
+            frontiers = torch.unique(frontiers)
+            sample_nodes.append(frontiers)
+
+        sample_nodes = torch.cat(sample_nodes)
+        sample_nodes = torch.unique(sample_nodes)
+        subgraph = dgl.node_subgraph(self.graph, sample_nodes)
+
+        # Each node should have unique node id in the new subgraph
+        u_id = int(
+            torch.nonzero(subgraph.ndata[NID] == int(target_nodes[0]), as_tuple=False)
+        )
+        v_id = int(
+            torch.nonzero(subgraph.ndata[NID] == int(target_nodes[1]), as_tuple=False)
+        )
+
+        # remove link between target nodes in positive subgraphs.
+        if subgraph.has_edges_between(u_id, v_id):
+            link_id = subgraph.edge_ids(u_id, v_id, return_uv=True)[2]
+            subgraph.remove_edges(link_id)
+        if subgraph.has_edges_between(v_id, u_id):
+            link_id = subgraph.edge_ids(v_id, u_id, return_uv=True)[2]
+            subgraph.remove_edges(link_id)
+
+        z = drnl_node_labeling(subgraph, u_id, v_id)
+        subgraph.ndata["z"] = z
+
+        return subgraph
+
+    def _collate(self, batch):
+        batch_graphs, batch_labels = map(list, zip(*batch))
+
+        batch_graphs = dgl.batch(batch_graphs)
+        batch_labels = torch.stack(batch_labels)
+        return batch_graphs, batch_labels
+
+    def __call__(self, edges, labels):
+        subgraph_list = []
+        labels_list = []
+        edge_dataset = EdgeDataSet(edges, labels, transform=self.sample_subgraph)
+        self.print_fn("Using {} workers in sampling job.".format(self.num_workers))
+        sampler = DataLoader(
+            edge_dataset,
+            batch_size=32,
+            num_workers=self.num_workers,
+            shuffle=False,
+            collate_fn=self._collate,
+        )
+        for subgraph, label in tqdm(sampler, ncols=100):
+            label_copy = deepcopy(label)
+            subgraph = dgl.unbatch(subgraph)
+
+            del label
+            subgraph_list += subgraph
+            labels_list.append(label_copy)
+
+        return subgraph_list, torch.cat(labels_list)
+
+
+class SEALData(object):
+    """
+    1. Generate positive and negative samples
+    2. Subgraph sampling
+
+    Attributes:
+        g(dgl.DGLGraph): graph
+        split_edge(dict): split edge
+        hop(int): num of hop
+        neg_samples(int): num of negative samples per positive sample
+        subsample_ratio(float): ratio of subsample
+        use_coalesce(bool): True for coalesce graph. Graph with multi-edge need to coalesce
+    """
+
+    def __init__(
+        self,
+        g,
+        split_edge,
+        hop=1,
+        neg_samples=1,
+        subsample_ratio=1,
+        prefix=None,
+        save_dir=None,
+        num_workers=32,
+        shuffle=True,
+        use_coalesce=True,
+        print_fn=print,
+    ):
+        self.g = g
+        self.hop = hop
+        self.subsample_ratio = subsample_ratio
+        self.prefix = prefix
+        self.save_dir = save_dir
+        self.print_fn = print_fn
+
+        self.generator = PosNegEdgesGenerator(
+            g=self.g,
+            split_edge=split_edge,
+            neg_samples=neg_samples,
+            subsample_ratio=subsample_ratio,
+            shuffle=shuffle,
+        )
+        if use_coalesce:
+            for k, v in g.edata.items():
+                g.edata[k] = v.float()  # dgl.to_simple() requires data is float
+            self.g = dgl.to_simple(
+                g, copy_ndata=True, copy_edata=True, aggregator="sum"
+            )
+
+        self.ndata = {k: v for k, v in self.g.ndata.items()}
+        self.edata = {k: v for k, v in self.g.edata.items()}
+        self.g.ndata.clear()
+        self.g.edata.clear()
+        self.print_fn("Save ndata and edata in class.")
+        self.print_fn("Clear ndata and edata in graph.")
+
+        self.sampler = SEALSampler(
+            graph=self.g, hop=hop, num_workers=num_workers, print_fn=print_fn
+        )
+
+    def __call__(self, split_type):
+        if split_type == "train":
+            subsample_ratio = self.subsample_ratio
+        else:
+            subsample_ratio = 1
+
+        path = osp.join(
+            self.save_dir or "",
+            "{}_{}_{}-hop_{}-subsample.bin".format(
+                self.prefix, split_type, self.hop, subsample_ratio
+            ),
+        )
+
+        if osp.exists(path):
+            self.print_fn("Load existing processed {} files".format(split_type))
+            graph_list, data = dgl.load_graphs(path)
+            dataset = GraphDataSet(graph_list, data["labels"])
+
+        else:
+            self.print_fn("Processed {} files not exist.".format(split_type))
+
+            edges, labels = self.generator(split_type)
+            self.print_fn("Generate {} edges totally.".format(edges.size(0)))
+
+            graph_list, labels = self.sampler(edges, labels)
+            dataset = GraphDataSet(graph_list, labels)
+            dgl.save_graphs(path, graph_list, {"labels": labels})
+            self.print_fn("Save preprocessed subgraph to {}".format(path))
+        return dataset
+
+
+def _transform_log_level(str_level):
+    if str_level == "info":
+        return logging.INFO
+    elif str_level == "warning":
+        return logging.WARNING
+    elif str_level == "critical":
+        return logging.CRITICAL
+    elif str_level == "debug":
+        return logging.DEBUG
+    elif str_level == "error":
+        return logging.ERROR
+    else:
+        raise KeyError("Log level error")
+
+
+class LightLogging(object):
+    def __init__(self, log_path=None, log_name="lightlog", log_level="debug"):
+        log_level = _transform_log_level(log_level)
+
+        if log_path:
+            if not log_path.endswith("/"):
+                log_path += "/"
+            if not os.path.exists(log_path):
+                os.mkdir(log_path)
+
+            if log_name.endswith("-") or log_name.endswith("_"):
+                log_name = (
+                    log_path
+                    + log_name
+                    + time.strftime("%Y-%m-%d-%H:%M", time.localtime(time.time()))
+                    + ".log"
+                )
+            else:
+                log_name = (
+                    log_path
+                    + log_name
+                    + "_"
+                    + time.strftime("%Y-%m-%d-%H-%M", time.localtime(time.time()))
+                    + ".log"
+                )
+
+            logging.basicConfig(
+                level=log_level,
+                format="%(asctime)s %(levelname)s: %(message)s",
+                datefmt="%Y-%m-%d-%H:%M",
+                handlers=[
+                    logging.FileHandler(log_name, mode="w"),
+                    logging.StreamHandler(),
+                ],
+            )
+            logging.info("Start Logging")
+            logging.info("Log file path: {}".format(log_name))
+
+        else:
+            logging.basicConfig(
+                level=log_level,
+                format="%(asctime)s %(levelname)s: %(message)s",
+                datefmt="%Y-%m-%d-%H:%M",
+                handlers=[logging.StreamHandler()],
+            )
+            logging.info("Start Logging")
+
+    def debug(self, msg):
+        logging.debug(msg)
+
+    def info(self, msg):
+        logging.info(msg)
+
+    def critical(self, msg):
+        logging.critical(msg)
+
+    def warning(self, msg):
+        logging.warning(msg)
+
+    def error(self, msg):
+        logging.error(msg)
+
+
+def data_prepare(graph, split_edge):
+    seal_data = SEALData(
+        g=graph,
+        split_edge=split_edge,
+        hop=1,
+        neg_samples=1,
+        subsample_ratio=0.1,
+        use_coalesce=True,
+        prefix="ogbl-collab",
+        save_dir="./processed",
+        num_workers=32,
+        print_fn=print,
+    )
+    node_attribute = seal_data.ndata["feat"]
+    edge_weight = seal_data.edata["weight"].float()
+    return node_attribute, edge_weight
diff --git a/hugegraph-ml/src/hugegraph_ml/tasks/fraud_detector_caregnn.py b/hugegraph-ml/src/hugegraph_ml/tasks/fraud_detector_caregnn.py
new file mode 100644
index 00000000..fe14d6db
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/tasks/fraud_detector_caregnn.py
@@ -0,0 +1,122 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import torch
+from dgl import DGLGraph
+from torch import nn
+from sklearn.metrics import recall_score, roc_auc_score
+from torch.nn.functional import softmax
+
+
+class DetectorCaregnn:
+    def __init__(self, graph: DGLGraph, model: nn.Module):
+        self.graph = graph
+        self._model = model
+        self._device = ""
+
+    def train(
+        self,
+        lr: float = 1e-3,
+        weight_decay: float = 0,
+        n_epochs: int = 200,
+        gpu: int = -1,
+    ):
+
+        self._device = (
+            f"cuda:{gpu}" if gpu != -1 and torch.cuda.is_available() else "cpu"
+        )
+        self._model.to(self._device)
+        self.graph = self.graph.to(self._device)
+        labels = self.graph.ndata["label"].to(self._device)
+        feat = self.graph.ndata["feature"].to(self._device)
+        train_mask = self.graph.ndata["train_mask"]
+        val_mask = self.graph.ndata["val_mask"]
+        test_mask = self.graph.ndata["test_mask"]
+        train_idx = (
+            torch.nonzero(train_mask, as_tuple=False).squeeze(1).to(self._device)
+        )
+        val_idx = torch.nonzero(val_mask, as_tuple=False).squeeze(1).to(self._device)
+        test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze(1).to(self._device)
+        rl_idx = torch.nonzero(
+            train_mask.to(self._device) & labels.bool(), as_tuple=False
+        ).squeeze(1)
+        _, cnt = torch.unique(labels, return_counts=True)
+        loss_fn = torch.nn.CrossEntropyLoss(weight=1 / cnt)
+        optimizer = torch.optim.Adam(
+            self._model.parameters(), lr=lr, weight_decay=weight_decay
+        )
+        for epoch in range(n_epochs):
+            self._model.train()
+            logits_gnn, logits_sim = self._model(self.graph, feat)
+            tr_loss = loss_fn(logits_gnn[train_idx], labels[train_idx]) + 2 * loss_fn(
+                logits_sim[train_idx], labels[train_idx]
+            )
+
+            tr_recall = recall_score(
+                labels[train_idx].cpu(),
+                logits_gnn.data[train_idx].argmax(dim=1).cpu(),
+            )
+            tr_auc = roc_auc_score(
+                labels[train_idx].cpu(),
+                softmax(logits_gnn, dim=1).data[train_idx][:, 1].cpu(),
+            )
+            val_loss = loss_fn(logits_gnn[val_idx], labels[val_idx]) + 2 * loss_fn(
+                logits_sim[val_idx], labels[val_idx]
+            )
+            val_recall = recall_score(
+                labels[val_idx].cpu(), logits_gnn.data[val_idx].argmax(dim=1).cpu()
+            )
+            val_auc = roc_auc_score(
+                labels[val_idx].cpu(),
+                softmax(logits_gnn, dim=1).data[val_idx][:, 1].cpu(),
+            )
+            optimizer.zero_grad()
+            tr_loss.backward()
+            optimizer.step()
+            print(
+                "Epoch {}, Train: Recall: {:.4f} AUC: {:.4f} Loss: {:.4f} | Val: Recall: {:.4f} AUC: {:.4f} Loss: {:.4f}".format(
+                    epoch,
+                    tr_recall,
+                    tr_auc,
+                    tr_loss.item(),
+                    val_recall,
+                    val_auc,
+                    val_loss.item(),
+                )
+            )
+        self._model.RLModule(self.graph, epoch, rl_idx)
+
+    def evaluate(self):
+        labels = self.graph.ndata["label"].to(self._device)
+        feat = self.graph.ndata["feature"].to(self._device)
+        test_mask = self.graph.ndata["test_mask"]
+        test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze(1).to(self._device)
+        _, cnt = torch.unique(labels, return_counts=True)
+        loss_fn = torch.nn.CrossEntropyLoss(weight=1 / cnt)
+        self._model.eval()
+        logits_gnn, logits_sim = self._model.forward(self.graph, feat)
+        test_loss = loss_fn(logits_gnn[test_idx], labels[test_idx]) + 2 * loss_fn(
+            logits_sim[test_idx], labels[test_idx]
+        )
+        test_recall = recall_score(
+            labels[test_idx].cpu(), logits_gnn[test_idx].argmax(dim=1).cpu()
+        )
+        test_auc = roc_auc_score(
+            labels[test_idx].cpu(),
+            softmax(logits_gnn, dim=1).data[test_idx][:, 1].cpu(),
+        )
+        return {"recall": test_recall, "accuracy": test_auc, "loss": test_loss.item()}
diff --git a/hugegraph-ml/src/hugegraph_ml/tasks/hetero_sample_embed_gatne.py b/hugegraph-ml/src/hugegraph_ml/tasks/hetero_sample_embed_gatne.py
new file mode 100644
index 00000000..62af5d2c
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/tasks/hetero_sample_embed_gatne.py
@@ -0,0 +1,122 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import dgl
+import torch
+from torch import nn
+from tqdm.auto import tqdm
+import random
+from hugegraph_ml.models.gatne import (
+    construct_typenodes_from_graph,
+    generate_pairs,
+    NSLoss,
+    NeighborSampler,
+)
+
+
+class HeteroSampleEmbedGATNE:
+    def __init__(self, graph, model: nn.Module):
+        self.graph = graph
+        self._model = model
+        self._device = ""
+
+    def train_and_embed(
+        self,
+        lr: float = 1e-3,
+        n_epochs: int = 200,
+        gpu: int = -1,
+    ):
+        self._device = (
+            f"cuda:{gpu}" if gpu != -1 and torch.cuda.is_available() else "cpu"
+        )
+        self._model = self._model.to(self._device)
+        self.graph = self.graph.to(self._device)
+        type_nodes = construct_typenodes_from_graph(self.graph)
+        edge_type_count = len(self.graph.etypes)
+        neighbor_samples = 10
+        num_walks = 20
+        num_workers = 4
+        window_size = 5
+        batch_size = 64
+        num_sampled = 5
+        embedding_size = 200
+        all_walks = []
+        for i in range(edge_type_count):
+            nodes = torch.LongTensor(type_nodes[i] * num_walks).to(self._device)
+            traces, types = dgl.sampling.random_walk(
+                self.graph,
+                nodes,
+                metapath=[self.graph.etypes[i]] * (neighbor_samples - 1),
+            )
+            all_walks.append(traces)
+
+        train_pairs = generate_pairs(all_walks, window_size, num_workers)
+        neighbor_sampler = NeighborSampler(self.graph, [neighbor_samples])
+        train_dataloader = torch.utils.data.DataLoader(
+            train_pairs,
+            batch_size=batch_size,
+            collate_fn=neighbor_sampler.sample,
+            shuffle=True,
+            num_workers=num_workers,
+            pin_memory=True,
+        )
+        nsloss = NSLoss(self.graph.number_of_nodes(), num_sampled, embedding_size)
+        self._model.to(self._device)
+        nsloss.to(self._device)
+
+        optimizer = torch.optim.Adam(
+            [{"params": self._model.parameters()}, {"params": nsloss.parameters()}],
+            lr=lr,
+        )
+
+        tensors = []
+        for epoch in range(n_epochs):
+            self._model.train()
+            random.shuffle(train_pairs)
+
+            data_iter = tqdm(
+                train_dataloader,
+                desc="epoch %d" % (epoch),
+                total=(len(train_pairs) + (batch_size - 1)) // batch_size,
+            )
+            avg_loss = 0.0
+            for i, (block, head_invmap, tails, block_types) in enumerate(data_iter):
+                optimizer.zero_grad()
+                # embs: [batch_size, edge_type_count, embedding_size]
+                block_types = block_types.to(self._device)
+                embs = self._model(block[0].to(self._device))[head_invmap]
+                embs = embs.gather(
+                    1,
+                    block_types.view(-1, 1, 1).expand(embs.shape[0], 1, embs.shape[2]),
+                )[:, 0]
+                loss = nsloss(
+                    block[0].dstdata[dgl.NID][head_invmap].to(self._device),
+                    embs,
+                    tails.to(self._device),
+                )
+                loss.backward()
+                optimizer.step()
+                avg_loss += loss.item()
+
+                post_fix = {
+                    "epoch": epoch,
+                    "iter": i,
+                    "avg_loss": avg_loss / (i + 1),
+                    "loss": loss.item(),
+                }
+                data_iter.set_postfix(post_fix)
diff --git a/hugegraph-ml/src/hugegraph_ml/tasks/link_prediction_pgnn.py b/hugegraph-ml/src/hugegraph_ml/tasks/link_prediction_pgnn.py
new file mode 100644
index 00000000..8b03cbd6
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/tasks/link_prediction_pgnn.py
@@ -0,0 +1,94 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import torch
+import dgl
+from torch import nn
+from tqdm import trange
+import numpy as np
+from hugegraph_ml.models.pgnn import (
+    get_dataset,
+    preselect_anchor,
+    train_model,
+    eval_model,
+)
+
+
+class LinkPredictionPGNN:
+    def __init__(self, graph, model: nn.Module):
+        self.graph = graph
+        self._model = model
+        self._device = ""
+
+    def train(
+        self,
+        lr: float = 1e-3,
+        weight_decay: float = 0,
+        n_epochs: int = 200,
+        gpu: int = -1,
+    ):
+        self._device = (
+            f"cuda:{gpu}" if gpu != -1 and torch.cuda.is_available() else "cpu"
+        )
+        self._model.to(self._device)
+        data = get_dataset(self.graph)
+        # pre-sample anchor nodes and compute shortest distance values for all epochs
+        (
+            g_list,
+            anchor_eid_list,
+            dist_max_list,
+            edge_weight_list,
+        ) = preselect_anchor(data)
+        optimizer = torch.optim.Adam(
+            self._model.parameters(), lr=lr, weight_decay=weight_decay
+        )
+        loss_func = nn.BCEWithLogitsLoss()
+        best_auc_val = -1
+        best_auc_test = -1
+        for epoch in range(n_epochs):
+            if epoch == 200:
+                for param_group in optimizer.param_groups:
+                    param_group["lr"] /= 10
+
+            g = dgl.graph(g_list[epoch])
+            g.ndata["feat"] = torch.FloatTensor(data["feature"])
+            g.edata["sp_dist"] = torch.FloatTensor(edge_weight_list[epoch])
+            g_data = {
+                "graph": g.to(self._device),
+                "anchor_eid": anchor_eid_list[epoch],
+                "dists_max": dist_max_list[epoch],
+            }
+
+            train_model(data, self._model, loss_func, optimizer, self._device, g_data)
+
+            loss_train, auc_train, auc_val, auc_test = eval_model(
+                data, g_data, self._model, loss_func, self._device
+            )
+            if auc_val > best_auc_val:
+                best_auc_val = auc_val
+                best_auc_test = auc_test
+
+            if epoch % 100 == 0:
+                print(
+                    epoch,
+                    "Loss {:.4f}".format(loss_train),
+                    "Train AUC: {:.4f}".format(auc_train),
+                    "Val AUC: {:.4f}".format(auc_val),
+                    "Test AUC: {:.4f}".format(auc_test),
+                    "Best Val AUC: {:.4f}".format(best_auc_val),
+                    "Best Test AUC: {:.4f}".format(best_auc_test),
+                )
diff --git a/hugegraph-ml/src/hugegraph_ml/tasks/link_prediction_seal.py b/hugegraph-ml/src/hugegraph_ml/tasks/link_prediction_seal.py
new file mode 100644
index 00000000..c307cc10
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/tasks/link_prediction_seal.py
@@ -0,0 +1,172 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from typing import Literal
+
+import torch
+from dgl import DGLGraph, NID, EID
+from torch import nn
+from tqdm import tqdm
+from dgl.dataloading import GraphDataLoader
+from torch.nn import BCEWithLogitsLoss
+import time
+import numpy as np
+from hugegraph_ml.models.seal import SEALData, DGCNN, evaluate_hits
+from hugegraph_ml.utils.early_stopping import EarlyStopping
+
+
+class LinkPredictionSeal:
+    def __init__(self, graph: DGLGraph, split_edge, model):
+        self.graph = graph
+        self._model = model
+        self.split_edge = split_edge
+        self._device = ""
+        self.train_loader = None
+        self.val_loader = None
+        self.test_loader = None
+        self.train_graphs = None
+        self.data_prepare()
+
+    def data_prepare(self):
+        seal_data = SEALData(
+            g=self.graph,
+            split_edge=self.split_edge,
+            hop=1,
+            neg_samples=1,
+            subsample_ratio=0.1,
+            use_coalesce=True,
+            prefix="ogbl-collab",
+            save_dir="./processed",
+            num_workers=32,
+            print_fn=print,
+        )
+        node_attribute = seal_data.ndata["feat"]
+        edge_weight = seal_data.edata["weight"].float()
+        train_data = seal_data("train")
+        val_data = seal_data("valid")
+        test_data = seal_data("test")
+        self.train_graphs = len(train_data.graph_list)
+        self.train_loader = GraphDataLoader(train_data, batch_size=32, num_workers=32)
+        self.val_loader = GraphDataLoader(val_data, batch_size=32, num_workers=32)
+        self.test_loader = GraphDataLoader(test_data, batch_size=32, num_workers=32)
+
+    def _train(
+        self,
+        dataloader,
+        loss_fn,
+        optimizer,
+        num_graphs=32,
+        total_graphs=None,
+    ):
+        self._model.train()
+
+        total_loss = 0
+        for g, labels in tqdm(dataloader, ncols=100):
+            g = g.to(self._device)
+            labels = labels.to(self._device)
+            optimizer.zero_grad()
+            logits = self._model(g, g.ndata["z"], g.ndata[NID], g.edata[EID])
+            loss = loss_fn(logits, labels)
+            loss.backward()
+            optimizer.step()
+            total_loss += loss.item() * num_graphs
+        return total_loss / total_graphs
+
+    def train(
+        self,
+        lr: float = 1e-3,
+        n_epochs: int = 200,
+        gpu: int = -1,
+    ):
+        torch.manual_seed(2021)
+        self._device = (
+            f"cuda:{gpu}" if gpu != -1 and torch.cuda.is_available() else "cpu"
+        )
+        self._model.to(self._device)
+        self.graph = self.graph.to(self._device)
+        parameters = self._model.parameters()
+        optimizer = torch.optim.Adam(parameters, lr=lr)
+        loss_fn = BCEWithLogitsLoss()
+        print(
+            "Total parameters: {}".format(
+                sum([p.numel() for p in self._model.parameters()])
+            )
+        )
+
+        # train and evaluate loop
+        summary_val = []
+        summary_test = []
+        for epoch in range(n_epochs):
+            start_time = time.time()
+            loss = self._train(
+                dataloader=self.train_loader,
+                loss_fn=loss_fn,
+                optimizer=optimizer,
+                num_graphs=32,
+                total_graphs=self.train_graphs,
+            )
+            train_time = time.time()
+            if epoch % 5 == 0:
+                val_pos_pred, val_neg_pred = self.evaluate(dataloader=self.val_loader)
+                test_pos_pred, test_neg_pred = self.evaluate(
+                    dataloader=self.test_loader
+                )
+
+                val_metric = evaluate_hits(
+                    "ogbl-collab", val_pos_pred, val_neg_pred, 50
+                )
+                test_metric = evaluate_hits(
+                    "ogbl-collab", test_pos_pred, test_neg_pred, 50
+                )
+                evaluate_time = time.time()
+                print(
+                    "Epoch-{}, train loss: {:.4f}, hits@{}: val-{:.4f}, test-{:.4f}, "
+                    "cost time: train-{:.1f}s, total-{:.1f}s".format(
+                        epoch,
+                        loss,
+                        50,
+                        val_metric,
+                        test_metric,
+                        train_time - start_time,
+                        evaluate_time - start_time,
+                    )
+                )
+                summary_val.append(val_metric)
+                summary_test.append(test_metric)
+        summary_test = np.array(summary_test)
+
+        print("Experiment Results:")
+        print(
+            "Best hits@{}: {:.4f}, epoch: {}".format(
+                50, np.max(summary_test), np.argmax(summary_test)
+            )
+        )
+
+    @torch.no_grad()
+    def evaluate(self, dataloader):
+        self._model.eval()
+        y_pred, y_true = [], []
+        for g, labels in tqdm(dataloader, ncols=100):
+            g = g.to(self._device)
+            logits = self._model(g, g.ndata["z"], g.ndata[NID], g.edata[EID])
+            y_pred.append(logits.view(-1).cpu())
+            y_true.append(labels.view(-1).cpu().to(torch.float))
+        y_pred, y_true = torch.cat(y_pred), torch.cat(y_true)
+        pos_pred = y_pred[y_true == 1]
+        neg_pred = y_pred[y_true == 0]
+        return pos_pred, neg_pred
diff --git a/hugegraph-ml/src/hugegraph_ml/tasks/node_classify_with_edge.py b/hugegraph-ml/src/hugegraph_ml/tasks/node_classify_with_edge.py
new file mode 100644
index 00000000..57276ff6
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/tasks/node_classify_with_edge.py
@@ -0,0 +1,123 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from typing import Literal
+
+import torch
+from dgl import DGLGraph
+from torch import nn
+from tqdm import trange
+
+from hugegraph_ml.utils.early_stopping import EarlyStopping
+
+
+class NodeClassifyWithEdge:
+    def __init__(self, graph: DGLGraph, model: nn.Module):
+        self.graph = graph
+        self._model = model
+        self._device = ""
+        self._early_stopping = None
+        self._is_trained = False
+        self._check_graph()
+
+    def _check_graph(self):
+        required_node_attrs = ["feat", "label", "train_mask", "val_mask", "test_mask"]
+        for attr in required_node_attrs:
+            if attr not in self.graph.ndata:
+                raise ValueError(
+                    f"Graph is missing required node attribute '{attr}' in ndata."
+                )
+        required_edge_attrs = ["feat"]
+        for attr in required_edge_attrs:
+            if attr not in self.graph.edata:
+                raise ValueError(
+                    f"Graph is missing required edge attribute '{attr}' in edata."
+                )
+
+    def _evaluate(self, edge_feats, node_feats, labels, mask):
+        self._model.eval()
+        labels = labels[mask]
+        with torch.no_grad():
+            logits = self._model.inference(self.graph, edge_feats, node_feats)[mask]
+            loss = self._model.loss(logits, labels)
+            _, predicted = torch.max(logits, dim=1)
+            accuracy = (predicted == labels).sum().item() / len(labels)
+        return {"accuracy": accuracy, "loss": loss.item()}
+
+    def train(
+        self,
+        lr: float = 1e-3,
+        weight_decay: float = 0,
+        n_epochs: int = 200,
+        patience: int = float("inf"),
+        early_stopping_monitor: Literal["loss", "accuracy"] = "loss",
+        gpu: int = -1,
+    ):
+        # Set device for training
+        self._device = (
+            f"cuda:{gpu}" if gpu != -1 and torch.cuda.is_available() else "cpu"
+        )
+        self._early_stopping = EarlyStopping(
+            patience=patience, monitor=early_stopping_monitor
+        )
+        self._model.to(self._device)
+        self.graph = self.graph.to(self._device)
+        # Get node features, labels, masks and move to device
+        edge_feats = self.graph.edata["feat"].to(self._device)
+        node_feats = self.graph.ndata["feat"].to(self._device)
+        labels = self.graph.ndata["label"].to(self._device)
+        train_mask = self.graph.ndata["train_mask"].to(self._device)
+        val_mask = self.graph.ndata["val_mask"].to(self._device)
+        optimizer = torch.optim.Adam(
+            self._model.parameters(), lr=lr, weight_decay=weight_decay
+        )
+        # Training model
+        epochs = trange(n_epochs)
+        for epoch in epochs:
+            # train
+            self._model.train()
+            optimizer.zero_grad()
+            # forward pass, get logits, compute loss
+            logits = self._model(self.graph, edge_feats, node_feats)
+            logits_train_masked = logits[train_mask]
+            loss = self._model.loss(logits_train_masked, labels[train_mask])
+            loss.backward()
+            optimizer.step()
+            # validation
+            valid_metrics = self._evaluate(edge_feats, node_feats, labels, val_mask)
+            # logs
+            epochs.set_description(
+                f"epoch {epoch} | train loss {loss.item():.4f} | val loss {valid_metrics['loss']:.4f}"
+            )
+            # early stopping
+            self._early_stopping(
+                valid_metrics[self._early_stopping.monitor], self._model
+            )
+            torch.cuda.empty_cache()
+            if self._early_stopping.early_stop:
+                break
+        self._early_stopping.load_best_model(self._model)
+        self._is_trained = True
+
+    def evaluate(self):
+        test_mask = self.graph.ndata["test_mask"].to(self._device)
+        edge_feats = self.graph.edata["feat"].to(self._device)
+        node_feats = self.graph.ndata["feat"].to(self._device)
+        labels = self.graph.ndata["label"].to(self._device)
+        metrics = self._evaluate(edge_feats, node_feats, labels, test_mask)
+        return metrics
diff --git a/hugegraph-ml/src/hugegraph_ml/tasks/node_classify_with_sample.py b/hugegraph-ml/src/hugegraph_ml/tasks/node_classify_with_sample.py
new file mode 100644
index 00000000..393cd09c
--- /dev/null
+++ b/hugegraph-ml/src/hugegraph_ml/tasks/node_classify_with_sample.py
@@ -0,0 +1,156 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from typing import Literal
+
+import torch
+from dgl import DGLGraph
+from torch import nn
+from tqdm import trange
+import dgl
+import numpy as np
+
+from hugegraph_ml.utils.early_stopping import EarlyStopping
+
+
+class NodeClassifyWithSample:
+    def __init__(self, graph: DGLGraph, model: nn.Module):
+        self.graph = graph
+        self._model = model
+        self.gpu = -1
+        self._device = (
+            f"cuda:{self.gpu}"
+            if self.gpu != -1 and torch.cuda.is_available()
+            else "cpu"
+        )
+        self._early_stopping = None
+        self._is_trained = False
+        self.num_partitions = 100
+        self.batch_size = 100
+        self.sampler = dgl.dataloading.ClusterGCNSampler(
+            graph,
+            self.num_partitions,
+        )
+        self.dataloader = dgl.dataloading.DataLoader(
+            self.graph,
+            torch.arange(self.num_partitions).to(self._device),
+            self.sampler,
+            device=self._device,
+            batch_size=self.batch_size,
+            shuffle=True,
+            drop_last=False,
+            num_workers=0,
+            use_uva=True,
+        )
+        self._check_graph()
+
+    def _check_graph(self):
+        required_node_attrs = ["feat", "label", "train_mask", "val_mask", "test_mask"]
+        for attr in required_node_attrs:
+            if attr not in self.graph.ndata:
+                raise ValueError(
+                    f"Graph is missing required node attribute '{attr}' in ndata."
+                )
+
+    def train(
+        self,
+        lr: float = 1e-3,
+        weight_decay: float = 0,
+        n_epochs: int = 200,
+        patience: int = float("inf"),
+        early_stopping_monitor: Literal["loss", "accuracy"] = "loss",
+    ):
+        # Set device for training
+        early_stopping = EarlyStopping(
+            patience=patience, monitor=early_stopping_monitor
+        )
+        self._model.to(self._device)
+        # Get node features, labels, masks and move to device
+        feats = self.graph.ndata["feat"].to(self._device)
+        labels = self.graph.ndata["label"].to(self._device)
+        train_mask = self.graph.ndata["train_mask"].to(self._device)
+        val_mask = self.graph.ndata["val_mask"].to(self._device)
+        optimizer = torch.optim.Adam(
+            self._model.parameters(), lr=lr, weight_decay=weight_decay
+        )
+        # Training model
+        loss_fn = nn.CrossEntropyLoss()
+        epochs = trange(n_epochs)
+        for epoch in epochs:
+            # train
+            self._model.train()
+            for it, sg in enumerate(self.dataloader):
+                sg_feats = feats[sg.ndata["_ID"]]
+                sg_labels = labels[sg.ndata["_ID"]]
+                sg_train_msak = train_mask[sg.ndata["_ID"]].bool()
+                logits = self._model(sg, sg_feats)
+                train_loss = loss_fn(logits[sg_train_msak], sg_labels[sg_train_msak])
+                optimizer.zero_grad()
+                train_loss.backward()
+                optimizer.step()
+                # validation
+                valid_metrics = self.evaluate_sg(
+                    sg=sg,
+                    sg_feats=sg_feats,
+                    labels=labels,
+                    val_mask=val_mask,
+                )
+                # logs
+                epochs.set_description(
+                    f"epoch {epoch} | it {it} | train loss {train_loss.item():.4f} | val loss {valid_metrics['loss']:.4f}"
+                )
+                # early stopping
+                early_stopping(valid_metrics[early_stopping.monitor], self._model)
+                torch.cuda.empty_cache()
+                if early_stopping.early_stop:
+                    break
+            early_stopping.load_best_model(self._model)
+
+    def evaluate_sg(self, sg, sg_feats, labels, val_mask):
+        self._model.eval()
+        sg_val_msak = val_mask[sg.ndata["_ID"]].bool()
+        sg_val_labels = labels[sg_val_msak]
+        with torch.no_grad():
+            sg_val_logits = self._model.inference(sg, sg_feats)[sg_val_msak]
+            val_loss = self._model.loss(sg_val_logits, sg_val_labels)
+            _, predicted = torch.max(sg_val_logits, dim=1)
+            accuracy = (predicted == sg_val_labels).sum().item() / len(sg_val_labels)
+        return {"accuracy": accuracy, "loss": val_loss.item()}
+
+    def evaluate(self):
+        test_mask = self.graph.ndata["test_mask"]
+        feats = self.graph.ndata["feat"]
+        labels = self.graph.ndata["label"]
+        test_logits = []
+        test_labels = []
+        total_loss = 0
+        with torch.no_grad():
+            for it, sg in enumerate(self.dataloader):
+                sg_feats = feats[sg.ndata["_ID"]]
+                sg_labels = labels[sg.ndata["_ID"]]
+                sg_test_msak = test_mask[sg.ndata["_ID"]].bool()
+                sg_test_labels = sg_labels[sg_test_msak]
+                sg_test_logits = self._model.inference(sg, sg_feats)[sg_test_msak]
+                loss = self._model.loss(sg_test_logits, sg_test_labels)
+                total_loss += loss
+                test_logits.append(sg_test_logits)
+                test_labels.append(sg_test_labels)
+            test_logits = torch.tensor(np.vstack(test_logits))
+            _, predicted = torch.max(test_logits, dim=1)
+            accuracy = (predicted == test_labels[0]).sum().item() / len(test_labels[0])
+        return {"accuracy": accuracy, "total_loss": total_loss.item()}
diff --git a/hugegraph-ml/src/hugegraph_ml/utils/dgl2hugegraph_utils.py b/hugegraph-ml/src/hugegraph_ml/utils/dgl2hugegraph_utils.py
index c1900422..cdc4ea3f 100644
--- a/hugegraph-ml/src/hugegraph_ml/utils/dgl2hugegraph_utils.py
+++ b/hugegraph-ml/src/hugegraph_ml/utils/dgl2hugegraph_utils.py
@@ -27,7 +27,11 @@
 import torch
 from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset, LegacyTUDataset, GINDataset, \
     get_download_dir
-from dgl.data.utils import _get_dgl_url, download
+from dgl.data.utils import _get_dgl_url, download, load_graphs
+import networkx as nx
+from ogb.linkproppred import DglLinkPropPredDataset
+import pandas as pd
+import json
 from pyhugegraph.api.graph import GraphManager
 from pyhugegraph.api.schema import SchemaManager
 from pyhugegraph.client import PyHugeClient
@@ -280,6 +284,647 @@ def import_hetero_graph_from_dgl(
     if len(edatas) > 0:
         _add_batch_edges(client_graph, edatas)
 
+def import_hetero_graph_from_dgl_no_feat(
+    dataset_name,
+    ip: str = "127.0.0.1",
+    port: str = "8080",
+    graph: str = "hugegraph",
+    user: str = "",
+    pwd: str = "",
+    graphspace: Optional[str] = None,
+):
+    # dataset download from:
+    # https://s3.us-west-2.amazonaws.com/dgl-data/dataset/recsys/GATNE/amazon.zip
+    dataset_name = dataset_name.upper()
+    if dataset_name == "AMAZONGATNE":
+        hetero_graph = load_training_data_gatne()
+    else:
+        raise ValueError("dataset not supported")
+    client: PyHugeClient = PyHugeClient(
+        ip=ip, port=port, graph=graph, user=user, pwd=pwd, graphspace=graphspace
+    )
+    client_schema: SchemaManager = client.schema()
+    client_graph: GraphManager = client.graph()
+
+    ntype_to_vertex_label = {}
+    ntype_idx_to_vertex_id = {}
+    for ntype in hetero_graph.ntypes:
+        # create vertex schema
+        vertex_label = f"{dataset_name}_{ntype}_v"
+        ntype_to_vertex_label[ntype] = vertex_label
+        client_schema.vertexLabel(vertex_label).useAutomaticId().ifNotExist().create()
+        # add vertices for batch of ntype
+        idx_to_vertex_id = {}
+        vdatas = []
+        idxs = []
+        for idx in range(hetero_graph.number_of_nodes(ntype=ntype)):
+            properties = {}
+            vdata = [vertex_label, properties]
+            vdatas.append(vdata)
+            idxs.append(idx)
+            if len(vdatas) == MAX_BATCH_NUM:
+                idx_to_vertex_id.update(_add_batch_vertices(client_graph, vdatas, idxs))
+                vdatas.clear()
+                idxs.clear()
+        if len(vdatas) > 0:
+            idx_to_vertex_id.update(_add_batch_vertices(client_graph, vdatas, idxs))
+        ntype_idx_to_vertex_id[ntype] = idx_to_vertex_id
+
+    # add edges
+    edatas = []
+    for canonical_etype in hetero_graph.canonical_etypes:
+        # create edge schema
+        src_type, etype, dst_type = canonical_etype
+        edge_label = f"{dataset_name}_{etype}_e"
+        client_schema.edgeLabel(edge_label).sourceLabel(
+            ntype_to_vertex_label[src_type]
+        ).targetLabel(ntype_to_vertex_label[dst_type]).ifNotExist().create()
+        # add edges for batch of canonical_etype
+        srcs, dsts = hetero_graph.edges(etype=canonical_etype)
+        for src, dst in zip(srcs.numpy(), dsts.numpy()):
+            edata = [
+                edge_label,
+                ntype_idx_to_vertex_id[src_type][src],
+                ntype_idx_to_vertex_id[dst_type][dst],
+                ntype_to_vertex_label[src_type],
+                ntype_to_vertex_label[dst_type],
+                {},
+            ]
+            edatas.append(edata)
+            if len(edatas) == MAX_BATCH_NUM:
+                _add_batch_edges(client_graph, edatas)
+                edatas.clear()
+    if len(edatas) > 0:
+        _add_batch_edges(client_graph, edatas)
+
+
+def import_graph_from_nx(
+    dataset_name,
+    ip: str = "127.0.0.1",
+    port: str = "8080",
+    graph: str = "hugegraph",
+    user: str = "",
+    pwd: str = "",
+    graphspace: Optional[str] = None,
+):
+    dataset_name = dataset_name.upper()
+    if dataset_name == "CAVEMAN":
+        dataset = nx.connected_caveman_graph(20, 20)
+    else:
+        raise ValueError("dataset not supported")
+
+    client: PyHugeClient = PyHugeClient(
+        ip=ip, port=port, graph=graph, user=user, pwd=pwd, graphspace=graphspace
+    )
+    client_schema: SchemaManager = client.schema()
+    client_graph: GraphManager = client.graph()
+    # create property schema
+    # check props and create vertex label
+    vertex_label = f"{dataset_name}_vertex"
+    props_value = {}
+    client_schema.vertexLabel(vertex_label).useAutomaticId().ifNotExist().create()
+    # add vertices for batch (note MAX_BATCH_NUM)
+    idx_to_vertex_id = {}
+    vdatas = []
+    vidxs = []
+    for idx in dataset.nodes:
+        vdata = [vertex_label, {}]
+        vdatas.append(vdata)
+        vidxs.append(idx)
+        if len(vdatas) == MAX_BATCH_NUM:
+            idx_to_vertex_id.update(_add_batch_vertices(client_graph, vdatas, vidxs))
+            vdatas.clear()
+            vidxs.clear()
+    # add rest vertices
+    if len(vdatas) > 0:
+        idx_to_vertex_id.update(_add_batch_vertices(client_graph, vdatas, vidxs))
+
+    # add edges for batch
+    edge_label = f"{dataset_name}_edge"
+    client_schema.edgeLabel(edge_label).sourceLabel(vertex_label).targetLabel(
+        vertex_label
+    ).ifNotExist().create()
+    edatas = []
+    for edge in dataset.edges:
+        edata = [
+            edge_label,
+            idx_to_vertex_id[edge[0]],
+            idx_to_vertex_id[edge[1]],
+            vertex_label,
+            vertex_label,
+            {},
+        ]
+        edatas.append(edata)
+        if len(edatas) == MAX_BATCH_NUM:
+            _add_batch_edges(client_graph, edatas)
+            edatas.clear()
+    if len(edatas) > 0:
+        _add_batch_edges(client_graph, edatas)
+
+
+def import_graph_from_dgl_with_edge_feat(
+    dataset_name,
+    ip: str = "127.0.0.1",
+    port: str = "8080",
+    graph: str = "hugegraph",
+    user: str = "",
+    pwd: str = "",
+    graphspace: Optional[str] = None,
+):
+    dataset_name = dataset_name.upper()
+    if dataset_name == "CORA":
+        dataset_dgl = CoraGraphDataset(verbose=False)
+    elif dataset_name == "CITESEER":
+        dataset_dgl = CiteseerGraphDataset(verbose=False)
+    elif dataset_name == "PUBMED":
+        dataset_dgl = PubmedGraphDataset(verbose=False)
+    else:
+        raise ValueError("dataset not supported")
+    graph_dgl = dataset_dgl[0]
+
+    client: PyHugeClient = PyHugeClient(
+        ip=ip, port=port, graph=graph, user=user, pwd=pwd, graphspace=graphspace
+    )
+    client_schema: SchemaManager = client.schema()
+    client_graph: GraphManager = client.graph()
+    # create property schema
+    client_schema.propertyKey(
+        "feat"
+    ).asDouble().valueList().ifNotExist().create()  # node features
+    client_schema.propertyKey("edge_feat").asDouble().valueList().ifNotExist().create()
+    client_schema.propertyKey("label").asLong().ifNotExist().create()
+    client_schema.propertyKey("train_mask").asInt().ifNotExist().create()
+    client_schema.propertyKey("val_mask").asInt().ifNotExist().create()
+    client_schema.propertyKey("test_mask").asInt().ifNotExist().create()
+    # check props and create vertex label
+    vertex_label = f"{dataset_name}_edge_feat_vertex"
+    node_all_props = ["feat", "label", "train_mask", "val_mask", "test_mask"]
+    node_props = [p for p in node_all_props if p in graph_dgl.ndata]
+    node_props_value = {}
+    for p in node_props:
+        node_props_value[p] = graph_dgl.ndata[p].tolist()
+    client_schema.vertexLabel(vertex_label).useAutomaticId().properties(
+        *node_props
+    ).ifNotExist().create()
+    # add vertices for batch (note MAX_BATCH_NUM)
+    idx_to_vertex_id = {}
+    vdatas = []
+    vidxs = []
+    for idx in range(graph_dgl.number_of_nodes()):
+        # extract props
+        properties = {
+            p: (
+                int(node_props_value[p][idx])
+                if isinstance(node_props_value[p][idx], bool)
+                else node_props_value[p][idx]
+            )
+            for p in node_props
+        }
+        vdata = [vertex_label, properties]
+        vdatas.append(vdata)
+        vidxs.append(idx)
+        if len(vdatas) == MAX_BATCH_NUM:
+            idx_to_vertex_id.update(_add_batch_vertices(client_graph, vdatas, vidxs))
+            vdatas.clear()
+            vidxs.clear()
+    # add rest vertices
+    if len(vdatas) > 0:
+        idx_to_vertex_id.update(_add_batch_vertices(client_graph, vdatas, vidxs))
+
+    # add edges for batch
+    edge_label = f"{dataset_name}_edge_feat_edge"
+    edge_all_props = ["edge_feat"]
+
+    client_schema.edgeLabel(edge_label).sourceLabel(vertex_label).targetLabel(
+        vertex_label
+    ).properties(*edge_all_props).ifNotExist().create()
+    edges_src, edges_dst = graph_dgl.edges()
+    edatas = []
+    for src, dst in zip(edges_src.numpy(), edges_dst.numpy()):
+        properties = {p: (torch.rand(8).tolist()) for p in edge_all_props}
+        edata = [
+            edge_label,
+            idx_to_vertex_id[src],
+            idx_to_vertex_id[dst],
+            vertex_label,
+            vertex_label,
+            properties,
+        ]
+        edatas.append(edata)
+        if len(edatas) == MAX_BATCH_NUM:
+            _add_batch_edges(client_graph, edatas)
+            edatas.clear()
+    if len(edatas) > 0:
+        _add_batch_edges(client_graph, edatas)
+
+
+def import_graph_from_ogb(
+    dataset_name,
+    ip: str = "127.0.0.1",
+    port: str = "8080",
+    graph: str = "hugegraph",
+    user: str = "",
+    pwd: str = "",
+    graphspace: Optional[str] = None,
+):
+    if dataset_name == "ogbl-collab":
+        dataset_dgl = DglLinkPropPredDataset(name=dataset_name)
+    else:
+        raise ValueError("dataset not supported")
+    graph_dgl = dataset_dgl[0]
+    split_edges = dataset_dgl.get_edge_split()
+
+    client: PyHugeClient = PyHugeClient(
+        ip=ip, port=port, graph=graph, user=user, pwd=pwd, graphspace=graphspace
+    )
+    client_schema: SchemaManager = client.schema()
+    client_graph: GraphManager = client.graph()
+    # create property schema
+    client_schema.propertyKey(
+        "feat"
+    ).asDouble().valueList().ifNotExist().create()  # node features
+    client_schema.propertyKey("year").asDouble().valueList().ifNotExist().create()
+    client_schema.propertyKey("weight").asDouble().valueList().ifNotExist().create()
+
+    # check props and create vertex label
+    vertex_label = f"{dataset_name}_vertex"
+    node_all_props = ["feat"]
+    node_props = [p for p in node_all_props if p in graph_dgl.ndata]
+    node_props_value = {}
+    for p in node_props:
+        node_props_value[p] = graph_dgl.ndata[p].tolist()
+    client_schema.vertexLabel(vertex_label).useAutomaticId().properties(
+        *node_props
+    ).ifNotExist().create()
+
+    # add vertices for batch (note MAX_BATCH_NUM)
+    idx_to_vertex_id = {}
+    vdatas = []
+    vidxs = []
+    max_nodes = 10000
+    for idx in range(graph_dgl.number_of_nodes()):
+        if idx <= max_nodes:
+            # extract props
+            properties = {
+                p: (
+                    int(node_props_value[p][idx])
+                    if isinstance(node_props_value[p][idx], bool)
+                    else node_props_value[p][idx]
+                )
+                for p in node_props
+            }
+            vdata = [vertex_label, properties]
+            vdatas.append(vdata)
+            vidxs.append(idx)
+            if len(vdatas) == MAX_BATCH_NUM:
+                idx_to_vertex_id.update(
+                    _add_batch_vertices(client_graph, vdatas, vidxs)
+                )
+                vdatas.clear()
+                vidxs.clear()
+    # add rest vertices
+    if len(vdatas) > 0:
+        idx_to_vertex_id.update(_add_batch_vertices(client_graph, vdatas, vidxs))
+
+    # add edges for batch
+    edge_label = f"{dataset_name}_edge"
+    edge_all_props = ["year", "weight"]
+    edge_props_value = {}
+    for p in edge_all_props:
+        edge_props_value[p] = graph_dgl.edata[p].tolist()
+    client_schema.edgeLabel(edge_label).sourceLabel(vertex_label).targetLabel(
+        vertex_label
+    ).properties(*edge_all_props).ifNotExist().create()
+    edges_src, edges_dst = graph_dgl.edges()
+    edatas = []
+    for src, dst in zip(edges_src.numpy(), edges_dst.numpy()):
+        if src <= max_nodes and dst <= max_nodes:
+            properties = {
+                p: (
+                    int(edge_props_value[p][idx])
+                    if isinstance(edge_props_value[p][idx], bool)
+                    else edge_props_value[p][idx]
+                )
+                for p in edge_all_props
+            }
+            edata = [
+                edge_label,
+                idx_to_vertex_id[src],
+                idx_to_vertex_id[dst],
+                vertex_label,
+                vertex_label,
+                properties,
+            ]
+            edatas.append(edata)
+            if len(edatas) == MAX_BATCH_NUM:
+                _add_batch_edges(client_graph, edatas)
+                edatas.clear()
+    if len(edatas) > 0:
+        _add_batch_edges(client_graph, edatas)
+    print("begin edge split")
+    import_split_edge_from_ogb(
+        dataset_name=dataset_name,
+        idx_to_vertex_id=idx_to_vertex_id,
+        max_nodes=max_nodes,
+    )
+
+
+def import_split_edge_from_ogb(
+    dataset_name,
+    idx_to_vertex_id,
+    max_nodes: int,
+    ip: str = "127.0.0.1",
+    port: str = "8080",
+    graph: str = "hugegraph",
+    user: str = "",
+    pwd: str = "",
+    graphspace: Optional[str] = None,
+):
+    if dataset_name == "ogbl-collab":
+        dataset_dgl = DglLinkPropPredDataset(name=dataset_name)
+    else:
+        raise ValueError("dataset not supported")
+    split_edges = dataset_dgl.get_edge_split()
+
+    client: PyHugeClient = PyHugeClient(
+        ip=ip, port=port, graph=graph, user=user, pwd=pwd, graphspace=graphspace
+    )
+    client_schema: SchemaManager = client.schema()
+    client_graph: GraphManager = client.graph()
+    # create property schema
+    client_schema.propertyKey("train_edge_mask").asInt().ifNotExist().create()
+    client_schema.propertyKey("train_year_mask").asInt().ifNotExist().create()
+    client_schema.propertyKey("train_weight_mask").asInt().ifNotExist().create()
+    client_schema.propertyKey("valid_edge_mask").asInt().ifNotExist().create()
+    client_schema.propertyKey("valid_weight_mask").asInt().ifNotExist().create()
+    client_schema.propertyKey("valid_year_mask").asInt().ifNotExist().create()
+    client_schema.propertyKey("valid_edge_neg_mask").asInt().ifNotExist().create()
+    client_schema.propertyKey("test_edge_mask").asInt().ifNotExist().create()
+    client_schema.propertyKey("test_weight_mask").asInt().ifNotExist().create()
+    client_schema.propertyKey("test_year_mask").asInt().ifNotExist().create()
+    client_schema.propertyKey("test_edge_neg_mask").asInt().ifNotExist().create()
+    edge_all_props = [
+        "train_edge_mask",
+        "train_year_mask",
+        "train_weight_mask",
+        "valid_edge_mask",
+        "valid_weight_mask",
+        "valid_year_mask",
+        "valid_edge_neg_mask",
+        "test_edge_mask",
+        "test_weight_mask",
+        "test_year_mask",
+        "test_edge_neg_mask",
+    ]
+    edge_props = [
+        "train_edge_mask",
+        "valid_edge_mask",
+        "valid_edge_neg_mask",
+        "test_edge_mask",
+        "test_edge_neg_mask",
+    ]
+    # add edges for batch
+    vertex_label = f"{dataset_name}_vertex"
+    edge_label = f"{dataset_name}_split_edge"
+    client_schema.edgeLabel(edge_label).sourceLabel(vertex_label).targetLabel(
+        vertex_label
+    ).properties(*edge_all_props).ifNotExist().create()
+    edges = {}
+    edges["train_edge_mask"] = split_edges["train"]["edge"]
+    edges["train_year_mask"] = split_edges["train"]["year"]
+    edges["train_weight_mask"] = split_edges["train"]["weight"]
+    edges["valid_edge_mask"] = split_edges["valid"]["edge"]
+    edges["valid_weight_mask"] = split_edges["valid"]["weight"]
+    edges["valid_year_mask"] = split_edges["valid"]["year"]
+    edges["valid_edge_neg_mask"] = split_edges["valid"]["edge_neg"]
+    edges["test_edge_mask"] = split_edges["test"]["edge"]
+    edges["test_weight_mask"] = split_edges["test"]["weight"]
+    edges["test_year_mask"] = split_edges["test"]["year"]
+    edges["test_edge_neg_mask"] = split_edges["test"]["edge_neg"]
+    init_ogb_split_edge(
+        "train",
+        "valid",
+        "test",
+        "",
+        edges,
+        max_nodes,
+        edge_props,
+        vertex_label,
+        edge_label,
+        idx_to_vertex_id,
+        client_graph,
+    )
+    init_ogb_split_edge(
+        "valid",
+        "train",
+        "test",
+        "",
+        edges,
+        max_nodes,
+        edge_props,
+        vertex_label,
+        edge_label,
+        idx_to_vertex_id,
+        client_graph,
+    )
+    init_ogb_split_edge(
+        "valid",
+        "train",
+        "test",
+        "neg_",
+        edges,
+        max_nodes,
+        edge_props,
+        vertex_label,
+        edge_label,
+        idx_to_vertex_id,
+        client_graph,
+    )
+    init_ogb_split_edge(
+        "test",
+        "train",
+        "valid",
+        "",
+        edges,
+        max_nodes,
+        edge_props,
+        vertex_label,
+        edge_label,
+        idx_to_vertex_id,
+        client_graph,
+    )
+    init_ogb_split_edge(
+        "test",
+        "train",
+        "valid",
+        "neg_",
+        edges,
+        max_nodes,
+        edge_props,
+        vertex_label,
+        edge_label,
+        idx_to_vertex_id,
+        client_graph,
+    )
+
+
+def import_hetero_graph_from_dgl_bgnn(
+    dataset_name,
+    ip: str = "127.0.0.1",
+    port: str = "8080",
+    graph: str = "hugegraph",
+    user: str = "",
+    pwd: str = "",
+    graphspace: Optional[str] = None,
+):
+    # dataset download from : https://www.dropbox.com/s/verx1evkykzli88/datasets.zip
+    # Extract zip folder in this directory
+    dataset_name = dataset_name.upper()
+    if dataset_name == "AVAZU":
+        hetero_graph = read_input()
+    else:
+        raise ValueError("dataset not supported")
+    client: PyHugeClient = PyHugeClient(
+        ip=ip, port=port, graph=graph, user=user, pwd=pwd, graphspace=graphspace
+    )
+    client_schema: SchemaManager = client.schema()
+    client_graph: GraphManager = client.graph()
+
+    client_schema.propertyKey("feat").asInt().valueList().ifNotExist().create()
+    client_schema.propertyKey("class").asDouble().valueList().ifNotExist().create()
+    client_schema.propertyKey("cat_features").asInt().valueList().ifNotExist().create()
+    client_schema.propertyKey("train_mask").asInt().ifNotExist().create()
+    client_schema.propertyKey("val_mask").asInt().ifNotExist().create()
+    client_schema.propertyKey("test_mask").asInt().ifNotExist().create()
+
+    ntype_to_vertex_label = {}
+    ntype_idx_to_vertex_id = {}
+    for ntype in hetero_graph.ntypes:
+        # create vertex schema
+        vertex_label = f"{dataset_name}_{ntype}_v"
+        ntype_to_vertex_label[ntype] = vertex_label
+        all_props = [
+            "feat",
+            "class",
+            "cat_features",
+            "train_mask",
+            "val_mask",
+            "test_mask",
+        ]
+        # check properties
+        props = [p for p in all_props if p in hetero_graph.nodes[ntype].data]
+        client_schema.vertexLabel(vertex_label).useAutomaticId().properties(
+            *props
+        ).ifNotExist().create()
+        props_value = {}
+        for p in props:
+            props_value[p] = hetero_graph.nodes[ntype].data[p].tolist()
+        # add vertices for batch of ntype
+        idx_to_vertex_id = {}
+        vdatas = []
+        idxs = []
+        for idx in range(hetero_graph.number_of_nodes(ntype=ntype)):
+            properties = {
+                p: (
+                    int(props_value[p][idx])
+                    if isinstance(props_value[p][idx], bool)
+                    else props_value[p][idx]
+                )
+                for p in props
+            }
+            vdata = [vertex_label, properties]
+            vdatas.append(vdata)
+            idxs.append(idx)
+            if len(vdatas) == MAX_BATCH_NUM:
+                idx_to_vertex_id.update(_add_batch_vertices(client_graph, vdatas, idxs))
+                vdatas.clear()
+                idxs.clear()
+        if len(vdatas) > 0:
+            idx_to_vertex_id.update(_add_batch_vertices(client_graph, vdatas, idxs))
+        ntype_idx_to_vertex_id[ntype] = idx_to_vertex_id
+
+    # add edges
+    edatas = []
+    for canonical_etype in hetero_graph.canonical_etypes:
+        # create edge schema
+        src_type, etype, dst_type = canonical_etype
+        edge_label = f"{dataset_name}_{etype}_e"
+        client_schema.edgeLabel(edge_label).sourceLabel(
+            ntype_to_vertex_label[src_type]
+        ).targetLabel(ntype_to_vertex_label[dst_type]).ifNotExist().create()
+        # add edges for batch of canonical_etype
+        srcs, dsts = hetero_graph.edges(etype=canonical_etype)
+        for src, dst in zip(srcs.numpy(), dsts.numpy()):
+            edata = [
+                edge_label,
+                ntype_idx_to_vertex_id[src_type][src],
+                ntype_idx_to_vertex_id[dst_type][dst],
+                ntype_to_vertex_label[src_type],
+                ntype_to_vertex_label[dst_type],
+                {},
+            ]
+            edatas.append(edata)
+            if len(edatas) == MAX_BATCH_NUM:
+                _add_batch_edges(client_graph, edatas)
+                edatas.clear()
+    if len(edatas) > 0:
+        _add_batch_edges(client_graph, edatas)
+
+
+def init_ogb_split_edge(
+    a,
+    b,
+    c,
+    d,
+    edges,
+    max_nodes,
+    edge_props,
+    vertex_label,
+    edge_label,
+    idx_to_vertex_id,
+    client_graph,
+):
+    edatas = []
+    for idx, edge in enumerate(edges[f"{a}_edge_{d}mask"]):
+        if int(edge[0]) <= max_nodes and int(edge[1]) <= max_nodes:
+            properties = {q: (int(q == f"{a}_edge_{d}mask")) for q in edge_props}
+            if d != "neg_":
+                properties2 = {
+                    f"{a}_year_mask": int(edges[f"{a}_year_mask"][idx]),
+                    f"{a}_weight_mask": int(edges[f"{a}_weight_mask"][idx]),
+                }
+                properties3 = {
+                    f"{b}_year_mask": -1,
+                    f"{b}_weight_mask": -1,
+                    f"{c}_year_mask": -1,
+                    f"{c}_weight_mask": -1,
+                }
+                properties.update(properties2)
+                properties.update(properties3)
+            else:
+                properties2 = {
+                    f"{a}_year_mask": -1,
+                    f"{a}_weight_mask": -1,
+                    f"{b}_year_mask": -1,
+                    f"{b}_weight_mask": -1,
+                    f"{c}_year_mask": -1,
+                    f"{c}_weight_mask": -1,
+                }
+                properties.update(properties2)
+            edata = [
+                edge_label,
+                idx_to_vertex_id[int(edge[0])],
+                idx_to_vertex_id[int(edge[1])],
+                vertex_label,
+                vertex_label,
+                properties,
+            ]
+            edatas.append(edata)
+            if len(edatas) == MAX_BATCH_NUM:
+                _add_batch_edges(client_graph, edatas)
+                edatas.clear()
+    if len(edatas) > 0:
+        _add_batch_edges(client_graph, edatas)
 
 def _add_batch_vertices(client_graph, vdatas, vidxs):
     vertices = client_graph.addVertices(vdatas)
@@ -361,6 +1006,99 @@ def load_acm_raw():
 
     return hgraph
 
+def read_input():
+    # reference: https://github.com/dmlc/dgl/blob/master/examples/pytorch/bgnn/run.py
+    # I added X, y, cat_features and masks into graph
+    input_folder = "dataset/avazu"
+    X = pd.read_csv(f"{input_folder}/X.csv")
+    y = pd.read_csv(f"{input_folder}/y.csv")
+
+    categorical_columns = []
+    if os.path.exists(f"{input_folder}/cat_features.txt"):
+        with open(f"{input_folder}/cat_features.txt") as f:
+            for line in f:
+                if line.strip():
+                    categorical_columns.append(line.strip())
+
+    cat_features = None
+    if categorical_columns:
+        columns = X.columns
+        cat_features = np.where(columns.isin(categorical_columns))[0]
+
+        for col in list(columns[cat_features]):
+            X[col] = X[col].astype(str)
+
+    gs, _ = load_graphs(f"{input_folder}/graph.dgl")
+    graph = gs[0]
+
+    with open(f"{input_folder}/masks.json") as f:
+        masks = json.load(f)
+
+    # add X
+    features = [[int(x) for x in row] for row in X.values]
+    features_tensor = torch.tensor(features, dtype=torch.int32)
+    graph.ndata["feat"] = features_tensor
+
+    # add y
+    y_tensor = torch.tensor(y.values, dtype=torch.float64)
+    graph.ndata["class"] = y_tensor
+
+    # add masks
+    for mask_name, node_ids in masks["0"].items():
+        mask_tensor = torch.zeros(graph.number_of_nodes(), dtype=torch.int32)
+        mask_tensor[node_ids] = 1
+        graph.ndata[f"{mask_name}_mask"] = mask_tensor
+
+    # add cat_features
+    cat_features_tensor = torch.tensor(cat_features, dtype=torch.int32)
+    graph.ndata["cat_features"] = torch.repeat_interleave(
+        cat_features_tensor[None, :], repeats=graph.number_of_nodes(), dim=0
+    )
+
+    return graph
+
+
+def load_training_data_gatne():
+    # reference: https://github.com/dmlc/dgl/blob/master/examples/pytorch/GATNE-T/src/utils.py
+    # reference: https://github.com/dmlc/dgl/blob/master/examples/pytorch/GATNE-T/src/main.py
+    f_name = "dataset/amazon/train.txt"
+    print("We are loading data from:", f_name)
+    edge_data_by_type = dict()
+    with open(f_name, "r") as f:
+        for line in f:
+            words = line[:-1].split(" ")  # line[-1] == '\n'
+            if words[0] not in edge_data_by_type:
+                edge_data_by_type[words[0]] = list()
+            x, y = words[1], words[2]
+            edge_data_by_type[words[0]].append((x, y))
+    nodes, index2word = [], []
+    for edge_type in edge_data_by_type:
+        node1, node2 = zip(*edge_data_by_type[edge_type])
+        index2word = index2word + list(node1) + list(node2)
+    index2word = list(set(index2word))
+    vocab = {}
+    i = 0
+    for word in index2word:
+        vocab[word] = i
+        i = i + 1
+    for edge_type in edge_data_by_type:
+        node1, node2 = zip(*edge_data_by_type[edge_type])
+        tmp_nodes = list(set(list(node1) + list(node2)))
+        tmp_nodes = [vocab[word] for word in tmp_nodes]
+        nodes.append(tmp_nodes)
+    node_type = "_N"  # '_N' can be replaced by an arbitrary name
+    data_dict = dict()
+    num_nodes_dict = {node_type: len(vocab)}
+    for edge_type in edge_data_by_type:
+        tmp_data = edge_data_by_type[edge_type]
+        src = []
+        dst = []
+        for edge in tmp_data:
+            src.extend([vocab[edge[0]], vocab[edge[1]]])
+            dst.extend([vocab[edge[1]], vocab[edge[0]]])
+        data_dict[(node_type, edge_type, node_type)] = (src, dst)
+    graph = dgl.heterograph(data_dict, num_nodes_dict)
+    return graph
 
 def _get_mask(size, indices):
     mask = torch.zeros(size)
@@ -373,3 +1111,8 @@ def _get_mask(size, indices):
     import_graph_from_dgl("CORA")
     import_graphs_from_dgl("MUTAG")
     import_hetero_graph_from_dgl("ACM")
+    import_graph_from_nx("CAVEMAN")
+    import_graph_from_dgl_with_edge_feat("CORA")
+    import_graph_from_ogb("ogbl-collab")
+    import_hetero_graph_from_dgl_bgnn("AVAZU")
+    import_hetero_graph_from_dgl_no_feat("amazongatne")
diff --git a/hugegraph-ml/src/tests/test_examples/test_examples.py b/hugegraph-ml/src/tests/test_examples/test_examples.py
index 6ed8b812..861b240c 100644
--- a/hugegraph-ml/src/tests/test_examples/test_examples.py
+++ b/hugegraph-ml/src/tests/test_examples/test_examples.py
@@ -23,7 +23,18 @@
 from hugegraph_ml.examples.grace_example import grace_example
 from hugegraph_ml.examples.grand_example import grand_example
 from hugegraph_ml.examples.jknet_example import jknet_example
-
+from hugegraph_ml.examples.agnn_example import agnn_example
+from hugegraph_ml.examples.appnp_example import appnp_example
+from hugegraph_ml.examples.arma_example import arma_example
+from hugegraph_ml.examples.bgnn_example import bgnn_example
+from hugegraph_ml.examples.bgrl_example import bgrl_example
+from hugegraph_ml.examples.care_gnn_example import care_gnn_example
+from hugegraph_ml.examples.cluster_gcn_example import cluster_gcn_example
+from hugegraph_ml.examples.correct_and_smooth_example import cs_example
+from hugegraph_ml.examples.dagnn_example import dagnn_example
+from hugegraph_ml.examples.deepergcn_example import deepergcn_example
+from hugegraph_ml.examples.pgnn_example import pgnn_example
+from hugegraph_ml.examples.seal_example import seal_example
 
 class TestHugegraph2DGL(unittest.TestCase):
     def setUp(self):
@@ -64,3 +75,75 @@ def test_gin_example(self):
             gin_example(n_epochs=self.test_n_epochs)
         except ValueError:
             self.fail("model gin example failed")
+
+    def test_agnn_example(self):
+        try:
+            agnn_example(n_epochs=self.test_n_epochs)
+        except ValueError:
+            self.fail("model agnn example failed")
+
+    def test_appnp_example(self):
+        try:
+            appnp_example(n_epochs=self.test_n_epochs)
+        except ValueError:
+            self.fail("model appnp example failed")
+
+    def test_arma_example(self):
+        try:
+            arma_example(n_epochs=self.test_n_epochs)
+        except ValueError:
+            self.fail("model arma example failed")
+
+    def test_bgnn_example(self):
+        try:
+            bgnn_example(n_epochs=self.test_n_epochs)
+        except ValueError:
+            self.fail("model bgnn example failed")
+
+    def test_bgrl_example(self):
+        try:
+            bgrl_example(n_epochs=self.test_n_epochs)
+        except ValueError:
+            self.fail("model bgrl example failed")
+
+    def test_cluster_gcn_example(self):
+        try:
+            cluster_gcn_example(n_epochs=self.test_n_epochs)
+        except ValueError:
+            self.fail("model cluster-gcn example failed")
+
+    def test_correct_and_smooth_example(self):
+        try:
+            cs_example(n_epochs=self.test_n_epochs)
+        except ValueError:
+            self.fail("model correct and smooth example failed")
+
+    def test_dagnn_example(self):
+        try:
+            dagnn_example(n_epochs=self.test_n_epochs)
+        except ValueError:
+            self.fail("model dagnn example failed")
+
+    def test_deepergcn_example(self):
+        try:
+            deepergcn_example(n_epochs=self.test_n_epochs)
+        except ValueError:
+            self.fail("model deepergcn example failed")
+
+    def test_pgnn_example(self):
+        try:
+            pgnn_example(n_epochs=self.test_n_epochs)
+        except ValueError:
+            self.fail("model p-gnn example failed")
+
+    def test_seal_example(self):
+        try:
+            seal_example(n_epochs=self.test_n_epochs)
+        except ValueError:
+            self.fail("model seal example failed")
+
+    def test_care_gnn_example(self):
+        try:
+            care_gnn_example(n_epochs=self.test_n_epochs)
+        except ValueError:
+            self.fail("model care-gnn example failed")