diff --git a/.github/linters/.flake8 b/.github/linters/.flake8 index 6deafc2..470bea4 100644 --- a/.github/linters/.flake8 +++ b/.github/linters/.flake8 @@ -1,2 +1,3 @@ [flake8] max-line-length = 120 +extend-ignore = E402 diff --git a/.gitignore b/.gitignore index 7d2fafe..b8d56c9 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,4 @@ dist super-linter.log .eggs build +debug diff --git a/src/rpfbagr/__main__.py b/src/rpfbagr/__main__.py index a751325..752a4f9 100644 --- a/src/rpfbagr/__main__.py +++ b/src/rpfbagr/__main__.py @@ -2,7 +2,9 @@ import logging import os import sys +import tempfile +os.environ["XDG_CACHE_HOME"] = tempfile.TemporaryDirectory().name from rpfbagr.medium import associate_flux_env, load_medium from rpfbagr.metabolic import gene_ko, gene_ou from rpfbagr.preprocess import build_model, genes_annotate, save_results @@ -95,6 +97,18 @@ def main(): default=1, help="Number of threads to use", ) + parser_helper.add_argument( + "--seed", + type=int, + default=0, + help="Seed", + ) + parser_helper.add_argument( + "--max-time", + type=int, + help="Max time to search the best combination (minutes)", + ) + parser_helper.add_argument( "--log-level", choices=["ERROR", "WARNING", "INFO", "DEBUG"], @@ -173,16 +187,23 @@ def main(): biomass_id=args.biomass_rxn_id, target_id=args.target_rxn_id, substrate_id=args.substrate_rxn_id, + max_time=args.max_time, logger=logger, + seed=args.seed, thread=args.thread, ) elif args.strategy == "ou": logger.info("Run OptKnock") + if args.substrate_rxn_id: + logger.warning("Substrate reaction will be ignored with OptKnock") + if args.seed: + logger.warning("Seed will be ignored with OptKnock") res = gene_ou( model=model, max_knockouts=args.max_knockouts, biomass_id=args.biomass_rxn_id, target_id=args.target_rxn_id, + max_time=args.max_time, logger=logger, thread=args.thread, ) diff --git a/src/rpfbagr/medium.py b/src/rpfbagr/medium.py index 1a6907e..98c4c93 100644 --- a/src/rpfbagr/medium.py +++ b/src/rpfbagr/medium.py @@ -3,7 +3,7 @@ from collections import OrderedDict import pandas as pd -from cobra import Model +from cobra.core.model import Model def load_medium(path: str) -> dict: diff --git a/src/rpfbagr/metabolic.py b/src/rpfbagr/metabolic.py index 5f63aaa..8d56019 100644 --- a/src/rpfbagr/metabolic.py +++ b/src/rpfbagr/metabolic.py @@ -1,10 +1,11 @@ import logging +from typing import Optional import pandas as pd from cameo.flux_analysis.simulation import lmoma from cameo.strain_design.deterministic.linear_programming import OptKnock from cameo.strain_design.heuristic.evolutionary_based import OptGene -from cobra import Model +from cobra.core.model import Model def gene_ko( @@ -13,17 +14,26 @@ def gene_ko( biomass_id: str, target_id: str, substrate_id: str, + max_time: Optional[int], logger: logging.Logger, + seed: int, thread: int = 1, ) -> pd.DataFrame: optgene = OptGene(model) - results = optgene.run( + # Init. + args = dict( target=target_id, biomass=biomass_id, substrate=substrate_id, max_knockouts=max_knockouts, simulation_method=lmoma, + seed=seed, ) + if max_time: + args.update(dict(max_time=(max_time, 0))) + # Run. + results = optgene.run(**args) + df = pd.DataFrame( columns=[ "reactions", @@ -40,7 +50,7 @@ def gene_ko( try: df = results.data_frame except Exception: - logging.warning("An error occurred, maybe there is no solution") + logger.warning("An error occurred, maybe there is no solution") return df @@ -49,15 +59,22 @@ def gene_ou( max_knockouts: int, biomass_id: str, target_id: str, + max_time: Optional[int], logger: logging.Logger, thread: int = 1, ) -> pd.DataFrame: optknock = OptKnock(model, fraction_of_optimum=0.1) - results = optknock.run( + # Init. + args = dict( target=target_id, biomass=biomass_id, max_knockouts=max_knockouts, ) + if max_time: + args.update(dict(max_time=(max_time, 0))) + # Run. + results = optknock.run(**args) + df = pd.DataFrame( columns=[ "reactions", @@ -71,5 +88,5 @@ def gene_ou( try: df = results.data_frame except Exception: - logging.warning("An error occurred, maybe there is no solution") + logger.warning("An error occurred, maybe there is no solution") return df diff --git a/src/rpfbagr/preprocess.py b/src/rpfbagr/preprocess.py index 554638e..f85c1ea 100644 --- a/src/rpfbagr/preprocess.py +++ b/src/rpfbagr/preprocess.py @@ -2,10 +2,10 @@ import logging from typing import Dict -import cobra import pandas as pd from Bio import Entrez from cameo import load_model +from cobra.core.model import Model def build_model( @@ -35,7 +35,7 @@ def build_model( logger.error("Reaction not found in the model: %s" % (target_id,)) return None - logging.info("Set objective") + logger.info("Set objective") model.objective = { model.reactions.get_by_id(biomass_id): 1.0, model.reactions.get_by_id(target_id): 0.5, @@ -44,7 +44,7 @@ def build_model( return model -def genes_annotate(model: cobra.Model, df: pd.DataFrame, email: str) -> pd.DataFrame: +def genes_annotate(model: Model, df: pd.DataFrame, email: str) -> pd.DataFrame: if df.empty: return df diff --git a/tests/test_software.py b/tests/test_software.py index a911028..cf68942 100644 --- a/tests/test_software.py +++ b/tests/test_software.py @@ -56,6 +56,7 @@ def test_software_butanol_light(self): args += ["--output-file-csv", fd.name] args += ["--strategy", "ko"] args += ["--max-knockouts", "3"] + args += ["--max-time", "2"] args += ["--input-medium-file", self.medium_butanol_tsv] args += ["--thread", "1"] @@ -88,6 +89,7 @@ def test_software_butanol_iml1515(self): args += ["--output-file-tsv", fd.name] args += ["--strategy", "ko"] args += ["--max-knockouts", "3"] + args += ["--max-time", "2"] args += ["--input-medium-file", self.medium_butanol_csv] args += ["--thread", "1"] @@ -106,3 +108,68 @@ def test_software_butanol_iml1515(self): assert dialect.delimiter == "\t" os.remove(fd.name) + + def test_software_galaxy(self): + # Be careful: can not test gene annotation into + # worflows running simultaneously + with tempfile.NamedTemporaryFile(delete=False) as fd: + args = ["python", "-m", __app_name__] + args += ["--input-model-file", self.model_ecoli_gz] + args += ["--input-pathway-file", self.pathway_butanol] + args += ["--biomass-rxn-id", "BIOMASS_Ec_iAF1260_core_59p81M"] + args += ["--target-rxn-id", "EX_1btol_e"] + args += ["--substrate-rxn-id", "EX_glc__D_e"] + args += ["--output-file-tsv", fd.name] + args += ["--strategy", "ko"] + args += ["--max-knockouts", "3"] + args += ["--max-time", "2"] + args += ["--input-medium-file", self.medium_butanol_tsv] + args += ["--thread", "1"] + + ret = Test_software.launch(args) + if ret.returncode > 0: + print(ret.stderr) + print(ret.stdout) + sys.exit(1) + lines = [] + with open(fd.name) as fid: + lines = fid.read().splitlines() + self.assertGreater(len(lines), 0) + + # Check delimiter + with open(fd.name) as fid: + dialect = csv.Sniffer().sniff(fid.readline()) + assert dialect.delimiter == "\t" + os.remove(fd.name) + + def test_software_butanol_light_ou(self): + # Be careful: can not test gene annotation into + # worflows running simultaneously + with tempfile.NamedTemporaryFile(delete=False) as fd: + args = ["python", "-m", __app_name__] + args += ["--input-model-file", self.model_ecoli_gz] + args += ["--input-pathway-file", self.pathway_butanol] + args += ["--biomass-rxn-id", "BIOMASS_Ec_iAF1260_core_59p81M"] + args += ["--target-rxn-id", "EX_1btol_e"] + args += ["--substrate-rxn-id", "EX_glc__D_e"] + args += ["--output-file-csv", fd.name] + args += ["--strategy", "ou"] + args += ["--max-knockouts", "3"] + args += ["--input-medium-file", self.medium_butanol_tsv] + args += ["--thread", "1"] + + ret = Test_software.launch(args) + if ret.returncode > 0: + print(ret.stderr) + print(ret.stdout) + sys.exit(1) + lines = [] + with open(fd.name) as fid: + lines = fid.read().splitlines() + self.assertGreater(len(lines), 0) + + # Check delimiter + with open(fd.name) as fid: + dialect = csv.Sniffer().sniff(fid.readline()) + assert dialect.delimiter == "," + os.remove(fd.name)