From 3a3262b36b0cd61076df9f2d7621943ab221a4a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=80lex=20Sol=C3=A9?= Date: Sun, 6 Oct 2024 11:46:16 +0200 Subject: [PATCH] debugged icomformer/ecomformer --- dataset/utils.py | 19 ++++++++++++------- loader/loader.py | 2 +- main.py | 1 + models/comformer.py | 8 +++++--- models/utils.py | 1 + train/train.py | 3 +-- train_scripts/train_ecomformer_adp.sh | 11 +++++++++++ train_scripts/train_icomformer_adp.sh | 20 ++++++++------------ 8 files changed, 40 insertions(+), 25 deletions(-) create mode 100644 train_scripts/train_ecomformer_adp.sh diff --git a/dataset/utils.py b/dataset/utils.py index d9af0d7..4d6e710 100644 --- a/dataset/utils.py +++ b/dataset/utils.py @@ -1,11 +1,11 @@ import logging import os +import os.path as osp from tqdm import tqdm import numpy as np import torch from torch_geometric.data import Data, Batch from torch_scatter import segment_coo, segment_csr -import roma @@ -454,30 +454,35 @@ def optmize_lattice(lattice_vectors): def compute_knn(max_neigh, radius, path, refcodes): - final_root = os.path.join(path, "data_"+str(max_neigh)+"/") - + print(max_neigh) + + final_root = os.path.join(path, "data_"+str(max_neigh)+"_"+str(radius)+"/") + print(final_root) + if os.path.exists(final_root) and os.path.isdir(final_root): - logging.info("Already computed PBC for knn "+str(max_neigh)) + logging.info("Already computed PBC for knn "+str(max_neigh) + " and radius "+str(radius)) return final_root else: os.makedirs(final_root) + os.makedirs(osp.join(final_root,"data/")) for split in refcodes: with open(split, 'r') as file: file_names = [line.strip() for line in file.readlines()] - for file_name in tqdm(file_names, ncols=50, desc="Computing PBC"): - data = torch.load(osp.join(original_root,file_name+".pt")) + for file_name in tqdm(file_names, ncols=100, desc="Computing PBC"): + data = torch.load(osp.join(path,"data/"+file_name+".pt")) data.pbc = torch.tensor([[True, True, True]]) batch = Batch.from_data_list([data]) edge_index, _, _, cart_vector = radius_graph_pbc(batch, radius, max_neigh) + data.edge_index = edge_index data.cart_dist = torch.norm(cart_vector, p=2, dim=-1).unsqueeze(-1) data.cart_dir = torch.nn.functional.normalize(cart_vector, p=2, dim=-1) - torch.save(data, osp.join(final_root,file_name+".pt")) + torch.save(data, osp.join(final_root,"data/"+file_name+".pt")) return final_root diff --git a/loader/loader.py b/loader/loader.py index 941527c..f5e96ed 100644 --- a/loader/loader.py +++ b/loader/loader.py @@ -18,7 +18,7 @@ def create_loader(): refcodes = [osp.join(cfg.dataset_path,"train_files.csv"), osp.join(cfg.dataset_path,"val_files.csv"), osp.join(cfg.dataset_path,"test_files.csv")] if cfg.model in ["icomformer", "ecomformer"]: assert cfg.max_neighbours is not None, "max_neighbours are needed for e/iComformer" - cfg.dataset_path = compute_knn(cfg.max_neighbours, cfg.radius, cfg.path, refcodes) + cfg.dataset_path = compute_knn(cfg.max_neighbours, cfg.radius, cfg.dataset_path, refcodes) optimize_cell = True if cfg.model == "icomformer" else False dataset_train, dataset_val, dataset_test = (DatasetADP(root=osp.join(cfg.dataset_path, "data/"), file_names=refcodes[0], hydrogens=cfg.use_H, standarize_temp = cfg.standarize_temp, augment=cfg.augment, optimize_cell=optimize_cell), diff --git a/main.py b/main.py index ecaa429..e094fe9 100644 --- a/main.py +++ b/main.py @@ -134,6 +134,7 @@ def montecarlo(model, loader): cfg.use_H = args.disable_H cfg.workers = args.workers + torch.set_num_threads(args.threads) set_printing() diff --git a/models/comformer.py b/models/comformer.py index 94971c3..3b5b4d1 100644 --- a/models/comformer.py +++ b/models/comformer.py @@ -9,8 +9,10 @@ import torch from torch import nn -from network.transformer import ComformerConv, ComformerConv_edge, ComformerConvEqui +from models.comformer_conv import ComformerConv, ComformerConv_edge, ComformerConvEqui from models.cartnet import Cholesky_head +from models.utils import RBFExpansion + def bond_cosine(r1, r2): @@ -51,7 +53,7 @@ def __init__(self, dim_in): self.equi_update = ComformerConvEqui(in_channels=self.dim_in, out_channels=self.dim_in, edge_dim=self.dim_in, use_second_order_repr=True) - self.cholesky = Cholesky_head(self.dim_in, 6) + self.cholesky = Cholesky_head(self.dim_in) def forward(self, data) -> torch.Tensor: node_features = self.embedding(data.x) + self.temperature_proj_atom(data.temperature.unsqueeze(-1))[data.batch] @@ -108,7 +110,7 @@ def __init__(self, dim_in): self.edge_update_layer = ComformerConv_edge(in_channels=self.dim_in, out_channels=self.dim_in, heads=1, edge_dim=self.dim_in) - self.cholesky = Cholesky_head(self.dim_in, 6) + self.cholesky = Cholesky_head(self.dim_in) def forward(self, data) -> torch.Tensor: node_features = self.embedding(data.x) + self.temperature_proj_atom(data.temperature.unsqueeze(-1))[data.batch] diff --git a/models/utils.py b/models/utils.py index 340e652..c697c3d 100644 --- a/models/utils.py +++ b/models/utils.py @@ -1,5 +1,6 @@ import torch import math +import numpy as np from torch import nn, Tensor import torch.nn.functional as F from typing import Optional diff --git a/train/train.py b/train/train.py index 35ba80c..8e39987 100644 --- a/train/train.py +++ b/train/train.py @@ -44,8 +44,7 @@ def train(model, loaders, optimizer, loggers): run = wandb.init(entity=cfg.wandb_entity, project=cfg.wandb_project, - name=cfg.name) - + name=cfg.name, config=cfg) num_splits = len(loggers) full_epoch_times = [] diff --git a/train_scripts/train_ecomformer_adp.sh b/train_scripts/train_ecomformer_adp.sh new file mode 100644 index 0000000..4355655 --- /dev/null +++ b/train_scripts/train_ecomformer_adp.sh @@ -0,0 +1,11 @@ +CUDA_VISIBLE_DEVICES=2 python main.py --seed 0 --name "ecomformer" --model "ecomformer" --dataset "ADP" --dataset_path "/scratch/g1alexs/ADP_DATASET" \ + --wandb_project "CartNet Paper" --batch 4 --batch_accumulation 16 --lr 0.001 --epochs 50 & + +CUDA_VISIBLE_DEVICES=4 python main.py --seed 1 --name "ecomformer" --model "ecomformer" --dataset "ADP" --dataset_path "/scratch/g1alexs/ADP_DATASET" \ + --wandb_project "CartNet Paper" --batch 4 --batch_accumulation 16 --lr 0.001 --epochs 50 & + +CUDA_VISIBLE_DEVICES=2 python main.py --seed 2 --name "ecomformer" --model "ecomformer" --dataset "ADP" --dataset_path "/scratch/g1alexs/ADP_DATASET" \ + --wandb_project "CartNet Paper" --batch 4 --batch_accumulation 16 --lr 0.001 --epochs 50 & + +CUDA_VISIBLE_DEVICES=3 python main.py --seed 3 --name "ecomformer" --model "ecomformer" --dataset "ADP" --dataset_path "/scratch/g1alexs/ADP_DATASET" \ + --wandb_project "CartNet Paper" --batch 4 --batch_accumulation 16 --lr 0.001 --epochs 50 & \ No newline at end of file diff --git a/train_scripts/train_icomformer_adp.sh b/train_scripts/train_icomformer_adp.sh index 4ae4d91..481b43b 100644 --- a/train_scripts/train_icomformer_adp.sh +++ b/train_scripts/train_icomformer_adp.sh @@ -1,14 +1,10 @@ -CUDA_VISIBLE_DEVICES=0 python main.py --seed 0 --name "CartNet" --model "CartNet" --dataset "ADP" --dataset_path "/scratch/g1alexs/ADP_DATASET" \ - --wandb_project "CartNet Paper" --batch_size 64 --lr 0.001 --epochs 50 \ - --augment & -CUDA_VISIBLE_DEVICES=4 python main.py --seed 1 --name "CartNet" --model "CartNet" --dataset "ADP" --dataset_path "/scratch/g1alexs/ADP_DATASET" \ - --wandb_project "CartNet Paper" --batch_size 64 --lr 0.001 --epochs 50 \ - --augment & +CUDA_VISIBLE_DEVICES=0 python main.py --seed 0 --name "icomformer" --model "icomformer" --dataset "ADP" --dataset_path "/scratch/g1alexs/ADP_DATASET" \ + --wandb_project "CartNet Paper" --batch_size 64 --lr 0.001 --epochs 50 & +CUDA_VISIBLE_DEVICES=4 python main.py --seed 1 --name "icomformer" --model "icomformer" --dataset "ADP" --dataset_path "/scratch/g1alexs/ADP_DATASET" \ + --wandb_project "CartNet Paper" --batch_size 64 --lr 0.001 --epochs 50 & -CUDA_VISIBLE_DEVICES=2 python main.py --seed 2 --name "CartNet" --model "CartNet" --dataset "ADP" --dataset_path "/scratch/g1alexs/ADP_DATASET" \ - --wandb_project "CartNet Paper" --batch_size 64 --lr 0.001 --epochs 50 \ - --augment & +CUDA_VISIBLE_DEVICES=2 python main.py --seed 2 --name "icomformer" --model "icomformer" --dataset "ADP" --dataset_path "/scratch/g1alexs/ADP_DATASET" \ + --wandb_project "CartNet Paper" --batch_size 64 --lr 0.001 --epochs 50 & -CUDA_VISIBLE_DEVICES=3 python main.py --seed 3 --name "CartNet" --model "CartNet" --dataset "ADP" --dataset_path "/scratch/g1alexs/ADP_DATASET" \ - --wandb_project "CartNet Paper" --batch_size 64 --lr 0.001 --epochs 50 \ - --augment & \ No newline at end of file +CUDA_VISIBLE_DEVICES=3 python main.py --seed 3 --name "icomformer" --model "icomformer" --dataset "ADP" --dataset_path "/scratch/g1alexs/ADP_DATASET" \ + --wandb_project "CartNet Paper" --batch_size 64 --lr 0.001 --epochs 50 & \ No newline at end of file