From c649385c6cb85494d73139ff9c463e98b0fc2b12 Mon Sep 17 00:00:00 2001 From: essharom Date: Thu, 22 Aug 2024 19:48:51 +0100 Subject: [PATCH] clean hparams, add readme --- README.md | 11 ++++++++--- bin/hyperopt.py | 16 ++++++++++++++-- bin/hyperopt_all_2.py | 40 ++++++++++++++++++++-------------------- conf/string.config | 2 +- main.nf | 1 + 5 files changed, 44 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 1d587e7..6babf6f 100644 --- a/README.md +++ b/README.md @@ -127,13 +127,18 @@ docker pull ghcr.io/stracquadaniolab/gnn-suite:latest models = ["gcn", "gat", ..., "new_model"] ``` -## Running the Hyperparameter Optimization Workflow +## Hyperparameter Optimization with Optuna + +To run the hyperparameter optimization workflow using `optuna` defined in `hyperopt.py`, run the hyperparameter optimization workflow: -To run the hyperparameter optimization workflow using Nextflow: ```bash nextflow run main.nf -profile docker, -entry hyperopt ``` -The results of the search will be in `results/hyperparameters`. + +The results of the search will be stored in the `results/hyperparameters//` directory. You can find the best trial information in the `best_trial__.txt` file. + +For more information on `optuna`, you can visit the official documentation at [https://optuna.readthedocs.io](https://optuna.readthedocs.io). + ## FAQ In case: diff --git a/bin/hyperopt.py b/bin/hyperopt.py index 4290c0d..ebea114 100755 --- a/bin/hyperopt.py +++ b/bin/hyperopt.py @@ -7,6 +7,15 @@ from gnn import run +import os +import sys +import contextlib + +def run_silently(func, *args, **kwargs): + with open(os.devnull, 'w') as fnull: + with contextlib.redirect_stdout(fnull): + return func(*args, **kwargs) + def objective_gnn(trial, model_name, gene_filename, network_filename, num_epochs=300): # Define hyperparameters to optimize @@ -27,6 +36,9 @@ def objective_gnn(trial, model_name, gene_filename, network_filename, num_epochs verbose_interval= 10, dropout= dropout ) + + + return bacc @@ -64,8 +76,8 @@ def run_optuna(data_pair, model): model_name = model #testing num_epochs - num_epochs = 4 - #num_epochs = 250 + #num_epochs = 5 + num_epochs = 250 study = optuna.create_study(study_name=model_name+"_hp_search", direction="maximize") diff --git a/bin/hyperopt_all_2.py b/bin/hyperopt_all_2.py index 2ed13ee..a42bccf 100644 --- a/bin/hyperopt_all_2.py +++ b/bin/hyperopt_all_2.py @@ -62,7 +62,7 @@ def run_optuna(data_pair, model): data_name = data_pair['name'] model_name = model - num_epochs = 250 + num_epochs = 25 # RENAME FOR DIFFERENT MODELS AND IN OBJECTIVE FUNCTION above #model_name = "GCN2" @@ -85,7 +85,7 @@ def run_optuna(data_pair, model): network_filename, num_epochs), n_jobs=-1, - n_trials=300) + n_trials=10) # Print the best trial @@ -115,26 +115,26 @@ def run_optuna(data_pair, model): 'name': 'string', 'networkFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_stringhc.tsv', 'geneFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_fpancanall_nstringhc_lbailey.csv' - }, - { - 'name': 'string_cosmic', - 'networkFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_stringhc.tsv', - 'geneFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_fpancanall_nstringhc_lcosmic.csv' - }, - { - 'name': 'biogrid', - 'networkFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_biogridhc.tsv', - 'geneFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_fpancanall_nbiogridhc_lbailey.csv' - }, - { - 'name': 'biogrid_cosmic', - 'networkFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_biogridhc.tsv', - 'geneFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_fpancanall_nbiogridhc_lcosmic.csv' - } + }#, + # { + # 'name': 'string_cosmic', + # 'networkFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_stringhc.tsv', + # 'geneFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_fpancanall_nstringhc_lcosmic.csv' + # }, + # { + # 'name': 'biogrid', + # 'networkFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_biogridhc.tsv', + # 'geneFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_fpancanall_nbiogridhc_lbailey.csv' + # }, + # { + # 'name': 'biogrid_cosmic', + # 'networkFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_biogridhc.tsv', + # 'geneFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_fpancanall_nbiogridhc_lcosmic.csv' + # } ] - models = ["sage", "gin", "gtn", "gcn2", "gcn", "gat", "gat3h", "hgcn", "phgcn"] - + #models = ["sage", "gin", "gtn", "gcn2", "gcn", "gat", "gat3h", "hgcn", "phgcn"] + models = ["gcn"] for data_pair in data_pairs: for model in models: diff --git a/conf/string.config b/conf/string.config index 96da112..e7b196a 100644 --- a/conf/string.config +++ b/conf/string.config @@ -4,7 +4,7 @@ params { networkFile = "${baseDir}/data/entrez_stringhc.tsv" geneFile = "${baseDir}/data/entrez_fpancanall_nstringhc_lbailey.csv" epochs = [100] - models = ["gcn",] + models = ["gcn", "gat"] replicates = 3 verbose_interval = 1 dropout = 0.2 diff --git a/main.nf b/main.nf index 882fed5..8936920 100644 --- a/main.nf +++ b/main.nf @@ -131,6 +131,7 @@ process HyperparameterOptimization { hyperopt.py ${geneFile} ${networkFile}\ ${model} \ ${dataSet} > best_trial_${model}_${dataSet}.txt + clean_hparams.py best_trial_${model}_${dataSet}.txt """ }