Skip to content

Commit

Permalink
clean hparams, add readme
Browse files Browse the repository at this point in the history
  • Loading branch information
essharom committed Aug 22, 2024
1 parent 65b0515 commit c649385
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 26 deletions.
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,18 @@ docker pull ghcr.io/stracquadaniolab/gnn-suite:latest
models = ["gcn", "gat", ..., "new_model"]
```
## Running the Hyperparameter Optimization Workflow
## Hyperparameter Optimization with Optuna
To run the hyperparameter optimization workflow using `optuna` defined in `hyperopt.py`, run the hyperparameter optimization workflow:
To run the hyperparameter optimization workflow using Nextflow:
```bash
nextflow run main.nf -profile docker,<experiment_file> -entry hyperopt
```
The results of the search will be in `results/hyperparameters`.

The results of the search will be stored in the `results/hyperparameters/<experiment_file>/` directory. You can find the best trial information in the `best_trial_<model>_<experiment>.txt` file.

For more information on `optuna`, you can visit the official documentation at [https://optuna.readthedocs.io](https://optuna.readthedocs.io).


## FAQ
In case:
Expand Down
16 changes: 14 additions & 2 deletions bin/hyperopt.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@

from gnn import run

import os
import sys
import contextlib

def run_silently(func, *args, **kwargs):
with open(os.devnull, 'w') as fnull:
with contextlib.redirect_stdout(fnull):
return func(*args, **kwargs)

def objective_gnn(trial, model_name, gene_filename, network_filename, num_epochs=300):
# Define hyperparameters to optimize

Expand All @@ -27,6 +36,9 @@ def objective_gnn(trial, model_name, gene_filename, network_filename, num_epochs
verbose_interval= 10,
dropout= dropout
)




return bacc

Expand Down Expand Up @@ -64,8 +76,8 @@ def run_optuna(data_pair, model):
model_name = model

#testing num_epochs
num_epochs = 4
#num_epochs = 250
#num_epochs = 5
num_epochs = 250

study = optuna.create_study(study_name=model_name+"_hp_search",
direction="maximize")
Expand Down
40 changes: 20 additions & 20 deletions bin/hyperopt_all_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def run_optuna(data_pair, model):
data_name = data_pair['name']
model_name = model

num_epochs = 250
num_epochs = 25
# RENAME FOR DIFFERENT MODELS AND IN OBJECTIVE FUNCTION above
#model_name = "GCN2"

Expand All @@ -85,7 +85,7 @@ def run_optuna(data_pair, model):
network_filename,
num_epochs),
n_jobs=-1,
n_trials=300)
n_trials=10)


# Print the best trial
Expand Down Expand Up @@ -115,26 +115,26 @@ def run_optuna(data_pair, model):
'name': 'string',
'networkFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_stringhc.tsv',
'geneFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_fpancanall_nstringhc_lbailey.csv'
},
{
'name': 'string_cosmic',
'networkFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_stringhc.tsv',
'geneFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_fpancanall_nstringhc_lcosmic.csv'
},
{
'name': 'biogrid',
'networkFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_biogridhc.tsv',
'geneFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_fpancanall_nbiogridhc_lbailey.csv'
},
{
'name': 'biogrid_cosmic',
'networkFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_biogridhc.tsv',
'geneFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_fpancanall_nbiogridhc_lcosmic.csv'
}
}#,
# {
# 'name': 'string_cosmic',
# 'networkFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_stringhc.tsv',
# 'geneFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_fpancanall_nstringhc_lcosmic.csv'
# },
# {
# 'name': 'biogrid',
# 'networkFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_biogridhc.tsv',
# 'geneFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_fpancanall_nbiogridhc_lbailey.csv'
# },
# {
# 'name': 'biogrid_cosmic',
# 'networkFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_biogridhc.tsv',
# 'geneFile': '/home/essharom/code/cancer-gnn-nf/data/entrez_fpancanall_nbiogridhc_lcosmic.csv'
# }
]

models = ["sage", "gin", "gtn", "gcn2", "gcn", "gat", "gat3h", "hgcn", "phgcn"]

#models = ["sage", "gin", "gtn", "gcn2", "gcn", "gat", "gat3h", "hgcn", "phgcn"]
models = ["gcn"]

for data_pair in data_pairs:
for model in models:
Expand Down
2 changes: 1 addition & 1 deletion conf/string.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ params {
networkFile = "${baseDir}/data/entrez_stringhc.tsv"
geneFile = "${baseDir}/data/entrez_fpancanall_nstringhc_lbailey.csv"
epochs = [100]
models = ["gcn",]
models = ["gcn", "gat"]
replicates = 3
verbose_interval = 1
dropout = 0.2
Expand Down
1 change: 1 addition & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ process HyperparameterOptimization {
hyperopt.py ${geneFile} ${networkFile}\
${model} \
${dataSet} > best_trial_${model}_${dataSet}.txt
clean_hparams.py best_trial_${model}_${dataSet}.txt
"""
}

Expand Down

0 comments on commit c649385

Please sign in to comment.