Merge branch 'coface_cc_lift' of github.com:martin-carrasco/challenge-icml-2024

gbg141 · gbg141 · commit a836af20e8ac · 2025-02-19T19:04:00.000-08:00
diff --git a/configs/datasets/KarateClub.yaml b/configs/datasets/KarateClub.yaml
@@ -0,0 +1,7 @@
+data_name: KarateClub
+data_domain: simplex
+data_dir: datasets/${data_domain}/${data_type}
+data_type: simplex
+
+num_features: 2
+num_classes:  2
diff --git a/configs/transforms/liftings/simplicial2combinatorial/coface_cc_lifting.yaml b/configs/transforms/liftings/simplicial2combinatorial/coface_cc_lifting.yaml
@@ -0,0 +1,4 @@
+transform_type: 'lifting'
+transform_name: "CofaceCCLifting"
+feature_lifting: ProjectionSum
+keep_features: False
diff --git a/modules/data/load/loaders.py b/modules/data/load/loaders.py
@@ -223,7 +223,14 @@ def load(
         torch_geometric.data.Dataset
             torch_geometric.data.Dataset object containing the loaded data.
         """
-        return load_simplicial_dataset(self.parameters)
+        root_folder = rootutils.find_root()
+        root_data_dir = os.path.join(root_folder, self.parameters["data_dir"])
+
+        self.data_dir = os.path.join(
+            root_data_dir, self.parameters["data_name"]
+        )
+        data = load_simplicial_dataset(self.parameters)
+        return CustomDataset([data], self.data_dir)
 
 
 class HypergraphLoader(AbstractLoader):
diff --git a/modules/data/utils/utils.py b/modules/data/utils/utils.py
@@ -25,6 +25,7 @@
     fetch_spiral_2d,
 )
 from topomodelx.utils.sparse import from_sparse
+from toponetx.classes.combinatorial_complex import CombinatorialComplex
 from torch_geometric.data import Data
 from torch_geometric.datasets import GeometricShapes
 from torch_sparse import SparseTensor, coalesce
@@ -71,6 +72,90 @@ def get_ccc_connectivity(complex, max_rank):
     return connectivity
 
 
+def get_combinatorial_complex_connectivity_2(
+    complex: CombinatorialComplex, max_rank, signed=False
+):
+    r"""Gets the connectivity matrices for the Combinatorial Complex.
+
+    Parameters
+    ----------
+    complex : topnetx.CombinatorialComplex
+        Cell complex.
+    max_rank : int
+        Maximum rank of the complex.
+    signed : bool
+        If True, returns signed connectivity matrices.
+
+    Returns
+    -------
+    dict
+        Dictionary containing the connectivity matrices.
+    """
+    practical_shape = list(
+        np.pad(list(complex.shape), (0, max_rank + 1 - len(complex.shape)))
+    )
+    connectivity = {}
+    for rank_idx in range(max_rank + 1):
+        for connectivity_info in [
+            "incidence",
+            "laplacian",
+            "adjacency",
+        ]:
+            try:
+                if connectivity_info == "laplacian":
+                    connectivity[f"{connectivity_info}_{rank_idx}"] = (
+                        from_sparse(complex.laplacian_matrix(rank=rank_idx))
+                    )
+                elif connectivity_info == "adjacency":
+                    connectivity[f"{connectivity_info}_{rank_idx}"] = (
+                        from_sparse(
+                            getattr(complex, f"{connectivity_info}_matrix")(
+                                rank_idx, rank_idx + 1
+                            )
+                        )
+                    )
+                else:  # incidence
+                    connectivity[f"{connectivity_info}_{rank_idx}"] = (
+                        from_sparse(
+                            getattr(complex, f"{connectivity_info}_matrix")(
+                                rank_idx - 1, rank_idx
+                            )
+                        )
+                    )
+            except ValueError:  # noqa: PERF203
+                if connectivity_info == "incidence":
+                    connectivity[f"{connectivity_info}_{rank_idx}"] = (
+                        generate_zero_sparse_connectivity(
+                            m=practical_shape[rank_idx - 1],
+                            n=practical_shape[rank_idx],
+                        )
+                    )
+                else:
+                    connectivity[f"{connectivity_info}_{rank_idx}"] = (
+                        generate_zero_sparse_connectivity(
+                            m=practical_shape[rank_idx],
+                            n=practical_shape[rank_idx],
+                        )
+                    )
+            except AttributeError:
+                if connectivity_info == "incidence":
+                    connectivity[f"{connectivity_info}_{rank_idx}"] = (
+                        generate_zero_sparse_connectivity(
+                            m=practical_shape[rank_idx - 1],
+                            n=practical_shape[rank_idx],
+                        )
+                    )
+                else:
+                    connectivity[f"{connectivity_info}_{rank_idx}"] = (
+                        generate_zero_sparse_connectivity(
+                            m=practical_shape[rank_idx],
+                            n=practical_shape[rank_idx],
+                        )
+                    )
+    connectivity["shape"] = practical_shape
+    return connectivity
+
+
 def get_complex_connectivity(complex, max_rank, signed=False):
     r"""Gets the connectivity matrices for the complex.
 
@@ -474,6 +559,46 @@ def load_point_cloud(
     return torch_geometric.data.Data(x=features, y=classes, pos=points)
 
 
+def load_manual_simplicial_complex():
+    """Create a manual simplicial complex for testing purposes."""
+    num_feats = 2
+    one_cells = [i for i in range(5)]
+    two_cells = [[0, 1], [0, 2], [1, 2], [1, 3], [2, 3], [0, 4], [2, 4]]
+    three_cells = [[0, 1, 2], [1, 2, 3], [0, 2, 4]]
+    incidence_1 = [
+        [1, 1, 0, 0, 0, 1, 0],
+        [1, 0, 1, 1, 0, 0, 0],
+        [0, 1, 1, 0, 1, 0, 1],
+        [0, 0, 0, 1, 1, 0, 0],
+        [0, 0, 0, 0, 0, 1, 1],
+    ]
+    incidence_2 = [
+        [1, 0, 0],
+        [1, 0, 1],
+        [1, 1, 0],
+        [0, 1, 0],
+        [0, 1, 0],
+        [0, 0, 1],
+        [0, 0, 1],
+    ]
+
+    y = [1]
+
+    return torch_geometric.data.Data(
+        x_0=torch.rand(len(one_cells), num_feats),
+        x_1=torch.rand(len(two_cells), num_feats),
+        x_2=torch.rand(len(three_cells), num_feats),
+        incidence_0=torch.zeros((1, 5)).to_sparse(),
+        adjacency_1=torch.zeros((len(one_cells), len(one_cells))).to_sparse(),
+        adjacency_2=torch.zeros((len(two_cells), len(two_cells))).to_sparse(),
+        adjacency_0=torch.zeros((5, 5)).to_sparse(),
+        incidence_1=torch.tensor(incidence_1).to_sparse(),
+        incidence_2=torch.tensor(incidence_2).to_sparse(),
+        num_nodes=len(one_cells),
+        y=torch.tensor(y),
+    )
+
+
 def load_manual_graph():
     """Create a manual graph for testing purposes."""
     # Define the vertices (just 8 vertices)
diff --git a/modules/models/combinatorial/hmc.py b/modules/models/combinatorial/hmc.py
@@ -20,7 +20,6 @@ def __init__(self, model_config, dataset_config):
             if isinstance(dataset_config["num_features"], int)
             else dataset_config["num_features"][0]
         )
-
         negative_slope = model_config["negative_slope"]
         hidden_channels = model_config["hidden_channels"]
         out_channels = dataset_config["num_classes"]
diff --git a/modules/transforms/data_transform.py b/modules/transforms/data_transform.py
@@ -83,6 +83,9 @@
 from modules.transforms.liftings.pointcloud2simplicial.random_flag_complex import (
     RandomFlagComplexLifting,
 )
+from modules.transforms.liftings.simplicial2combinatorial.coface_cc_lifting import (
+    CofaceCCLifting,
+)
 
 TRANSFORMS = {
     # Graph -> Hypergraph
@@ -119,6 +122,8 @@
     "UniversalStrictLifting": UniversalStrictLifting,
     # Hypergraph -> Simplicial Complex
     "HypergraphHeatLifting": HypergraphHeatLifting,
+    # Simplicial Complex -> Combinatorial Complex
+    "CofaceCCLifting": CofaceCCLifting,
     # Feature Liftings
     "ProjectionSum": ProjectionSum,
     # Data Manipulations
diff --git a/modules/transforms/liftings/simplicial2combinatorial/base.py b/modules/transforms/liftings/simplicial2combinatorial/base.py
@@ -0,0 +1,16 @@
+from modules.transforms.liftings.lifting import SimplicialLifting
+
+
+class Simplicial2CombinatorialLifting(SimplicialLifting):
+    r"""Abstract class for lifting graphs to combinatorial complexes.
+
+    Parameters
+    ----------
+    **kwargs : optiona""l
+        Additional arguments for the class.
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.type = "simplicial2combinatorial"
+
diff --git a/modules/transforms/liftings/simplicial2combinatorial/coface_cc_lifting.py b/modules/transforms/liftings/simplicial2combinatorial/coface_cc_lifting.py
@@ -0,0 +1,113 @@
+from toponetx.classes.combinatorial_complex import CombinatorialComplex
+from toponetx.classes.hyperedge import HyperEdge
+from torch_geometric.data import Data
+
+from modules.data.utils.utils import get_combinatorial_complex_connectivity
+from modules.transforms.liftings.simplicial2combinatorial.base import (
+    Simplicial2CombinatorialLifting,
+)
+
+
+class CofaceCCLifting(Simplicial2CombinatorialLifting):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.keep_features = kwargs.get("keep_features", False)
+
+    def get_lower_cells(self, data: Data) -> list[HyperEdge]:
+        """ Get the lower cells of the complex
+
+        Parameters:
+            data (Data): The input data
+        Returns:
+            List[HyperEdge]: The lower cells of the complex
+        """
+        cells: list[HyperEdge] = []
+
+        ## Add 0-cells
+        for cell in range(data["x_0"].size(0)):
+            zero_cell = HyperEdge([cell], rank=0)
+            cells.append(zero_cell)
+
+        ## Add 1-cells
+        for inc_c_1 in data["incidence_1"].to_dense().T:
+            # Get the 0-cells that are incident to the 1-cell
+            cell_0_bound = inc_c_1.nonzero().flatten().tolist()
+            assert(len(cell_0_bound) == 2)
+            one_cell = HyperEdge(cell_0_bound, rank=1)
+            cells.append(one_cell)
+
+        ## Add 2-cells
+        for inc_c_2 in data["incidence_2"].to_dense().T:
+            # Get the 1-cells that are incident to the 2-cell
+            cell_1_bound = inc_c_2.nonzero().flatten()
+            # Get the 0-cells that are incident to the 1-cells
+            cell_0_bound = data["incidence_1"].to_dense().T[cell_1_bound].nonzero()
+            # Get the actual 0-cells since nonzero()
+            # indexes in 2D
+            cell_0_bound = cell_0_bound[:, 1]
+            # Remove redudants and convert to tuple
+            two_cell = HyperEdge(tuple(set(cell_0_bound.tolist())), rank=2)
+            cells.append(two_cell)
+
+        return cells
+
+    def lift_topology(self, data: Data) -> dict:
+        """ Lift the simplicial topology to a combinatorial complex
+        """
+
+        # Check that the dataset has the required fields
+        # assume that it's a simplicial dataset
+        assert "incidence_1" in data
+        assert "incidence_2" in data
+
+        cells = self.get_lower_cells(data)
+
+        ccc = CombinatorialComplex(cells, graph_based=False)
+
+        # Iterate over the 2-cells and add the 3-cells
+        for r_cell in ccc.skeleton(rank=2):
+            # Get the coface of the 2-cell
+            indices, coface = ccc.coadjacency_matrix(2, 1, index=True)
+
+            # Get the indices of the 2-cell that are co-adjacent
+            coface_indices = coface.todense()[indices[r_cell]].nonzero()[1].tolist()
+            cell_3 = set(r_cell)
+
+            # Iterate over the indices of the 2-cells
+            # and add their 0-cells as a 3-cell
+            for idx in coface_indices:
+                cell_3 = cell_3.union(set(ccc.skeleton(rank=2)[idx]))
+
+            # Adding a rank 3 cell with less than 4 vertices
+            # will take this cell from the skeleton of 2-cells if it exists
+            # so in the interest of keeping features the user
+            # can choose to recompute all feature embeddings
+            if len(cell_3) < 4 and self.keep_features:
+                continue
+            # Get the cofaces incident to the 2-cell `cell` and add `cell` to the set
+            ccc.add_cell(cell_3, rank=3)
+
+        # Create the incidence, adjacency and laplacian matrices
+        lifted_data = get_combinatorial_complex_connectivity(ccc, 3)
+
+        # If the user wants to keep the features
+        # from the r-cells aside from the first x_0
+        if self.keep_features:
+            lifted_data = {"x_0": data["x_0"], "x_1": data["x_1"], "x_2": data["x_2"], **lifted_data}
+        else:
+            lifted_data = {"x_0": data["x_0"], **lifted_data}
+
+        return lifted_data
+
+    def forward(self, data: Data) -> Data:
+        initial_data = data.to_dict()
+        lifted_topology = self.lift_topology(data)
+        lifted_topology = self.feature_lifting(lifted_topology)
+
+        # Make sure to remove passing of duplicated data
+        # so that the constructor of Data does not raise an error
+
+        for k in lifted_topology:
+            if k in initial_data:
+                del initial_data[k]
+        return Data(**initial_data, **lifted_topology)
diff --git a/test/transforms/liftings/simplicial2combinatorial/test_coface_cc_lifting.py b/test/transforms/liftings/simplicial2combinatorial/test_coface_cc_lifting.py
diff --git a/tutorials/simplicial2combinatorial/coface_cc_lifting.ipynb b/tutorials/simplicial2combinatorial/coface_cc_lifting.ipynb

Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,6 @@ def __init__(self, model_config, dataset_config):`
`20`	`20`	`if isinstance(dataset_config["num_features"], int)`
`21`	`21`	`else dataset_config["num_features"][0]`
`22`	`22`	`)`
`23`		`-`
`24`	`23`	`negative_slope = model_config["negative_slope"]`
`25`	`24`	`hidden_channels = model_config["hidden_channels"]`
`26`	`25`	`out_channels = dataset_config["num_classes"]`