From ada8228f3d2e64f70de84e4b521a35ff1029ca2b Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Thu, 22 Sep 2022 08:13:26 +0000 Subject: [PATCH 01/20] refactor(straindesign): add subcommands --- src/straindesign/__main__.py | 227 ++------------------------------- src/straindesign/commands.py | 234 +++++++++++++++++++++++++++++++++++ 2 files changed, 246 insertions(+), 215 deletions(-) create mode 100644 src/straindesign/commands.py diff --git a/src/straindesign/__main__.py b/src/straindesign/__main__.py index 9de087f..449d2c7 100644 --- a/src/straindesign/__main__.py +++ b/src/straindesign/__main__.py @@ -1,224 +1,21 @@ -import argparse import logging -import os import sys -from straindesign._version import __app_name__ -from straindesign.medium import associate_flux_env, load_medium -from straindesign.metabolic import gene_ko, gene_ou -from straindesign.preprocess import build_model, genes_annotate, save_results +from straindesign import commands def main(): - """CLI for StrainDesign""" - - desc = ( - __app_name__ - + " provides a cli interface to run OptGene with an heterologous pathway." - ) - parser = argparse.ArgumentParser(description=desc, prog="python -m " + __app_name__) - # Input - parser_input = parser.add_argument_group("Input") - parser_input.add_argument( - "--input-model-file", type=str, required=True, help="GEM model file (SBML)" - ) - parser_input.add_argument( - "--input-pathway-file", - type=str, - required=False, - help="SBML file that contains an heterologous pathway", - ) - parser_input.add_argument( - "--biomass-rxn-id", - type=str, - required=True, - help="Biomass reaction ID", - ) - parser_input.add_argument( - "--target-rxn-id", - type=str, - required=False, - help="Target reaction ID", - ) - parser_input.add_argument( - "--substrate-rxn-id", - type=str, - required=False, - help="Substracte reaction ID (eg. carbon source)", - ) - - # Output - parser_output = parser.add_argument_group("Output") - parser_output.add_argument( - "--output-file-csv", - type=str, - help="output file (csv)", - ) - parser_output.add_argument( - "--output-file-tsv", - type=str, - help="output file (tsv)", - ) - - # Simulation - parser_sim = parser.add_argument_group("Simulation") - parser_sim.add_argument( - "--strategy", - type=str, - choices=["ko", "ou"], - default="ko", - help="Strategy to use : ko (knocking out) or ou " - "(over-under expressing), (default: ko)", - ) - parser_input.add_argument( - "--max-knockouts", - type=int, - default=3, - required=False, - help="Number of maximum knockouts genes allowed", - ) - - # Medium - parser_medium = parser.add_argument_group("Medium") - parser_medium.add_argument( - "--input-medium-file", - type=str, - help="Provide a csv or tsv file with an header as ," - ", . This file " - "provides information about metabolites (Metanetx Id) " - "to add or remove.", - ) - # Others. - parser_helper = parser.add_argument_group("Technical") - parser_helper.add_argument( - "--thread", - type=int, - default=1, - help="Number of threads to use", - ) - parser_helper.add_argument( - "--seed", - type=int, - default=0, - help="Seed", - ) - parser_helper.add_argument( - "--max-time", - type=int, - help="Max time to search the best combination (minutes)", - ) - - parser_helper.add_argument( - "--log-level", - choices=["ERROR", "WARNING", "INFO", "DEBUG"], - default="INFO", - type=str, - help="Log level", - ) - parser_input.add_argument( - "--email", - type=str, - required=False, - help="Provide your email to annotate genes id with the NCBI website", - ) - - # Compute - args = parser.parse_args() - - # Logging. - logger = logging.getLogger(name="main") - formatter = logging.Formatter( - "%(asctime)s - %(levelname)s - %(message)s", datefmt="%d-%m-%Y %H:%M" - ) - st_handler = logging.StreamHandler() - st_handler.setFormatter(formatter) - logger.addHandler(st_handler) - logger.setLevel(args.log_level) - - # Check arguments. - if not os.path.isfile(args.input_model_file): - logger.error('Input model file doesn"t exist: %s' % (args.input_model_file,)) - parser.exit(1) - if args.input_pathway_file is not None and not os.path.isfile( - args.input_pathway_file - ): - logger.error('Input pathway file doesn"t exist') - parser.exit(1) - - if args.output_file_csv and not os.path.isdir( - os.path.dirname(args.output_file_csv) - ): - logger.debug("Create out directory: %s") - os.makedirs(os.path.dirname(args.output_file_csv)) - if args.output_file_tsv and not os.path.isdir( - os.path.dirname(args.output_file_tsv) - ): - logger.debug("Create out directory: %s") - os.makedirs(os.path.dirname(args.output_file_tsv)) - - # Load model - logger.info("Build model") - model = build_model( - model_path=args.input_model_file, - pathway_path=args.input_pathway_file, - biomass_id=args.biomass_rxn_id, - target_id=args.target_rxn_id, - logger=logger, - ) - if model is None: - parser.exit(1) - - # Medium - logger.info("Build medium") - envcond = load_medium(path=args.input_medium_file) - model = associate_flux_env(model=model, envcond=envcond, logger=logger) - if model is None: - parser.exit(1) - - # Simulation - logger.info("Build gene ko") - res = None - if args.strategy == "ko": - logger.info("Run OptGene") - res = gene_ko( - model=model, - max_knockouts=args.max_knockouts, - biomass_id=args.biomass_rxn_id, - target_id=args.target_rxn_id, - substrate_id=args.substrate_rxn_id, - max_time=args.max_time, - logger=logger, - seed=args.seed, - thread=args.thread, - ) - elif args.strategy == "ou": - logger.info("Run OptKnock") - if args.substrate_rxn_id: - logger.warning("Substrate reaction will be ignored with OptKnock") - if args.seed: - logger.warning("Seed will be ignored with OptKnock") - res = gene_ou( - model=model, - max_knockouts=args.max_knockouts, - biomass_id=args.biomass_rxn_id, - target_id=args.target_rxn_id, - max_time=args.max_time, - logger=logger, - thread=args.thread, - ) - - # Processing Results - if res is not None: - if args.email and args.strategy == "ko": - logger.info("Perform gene annotation") - res = genes_annotate(model=model, df=res, email=args.email, logger=logger) - logger.info("Save results") - if args.output_file_csv: - save_results(res, path=args.output_file_csv, sep=",") - if args.output_file_tsv: - save_results(res, path=args.output_file_tsv, sep="\t") - - return 0 + """Entrypoint to commandline""" + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%d-%m-%Y %H:%M", + ) + args = commands.parse_args() + # No arguments or subcommands were given. + if len(args.__dict__) < 1: + commands.print_help() + args.func(args) if __name__ == "__main__": diff --git a/src/straindesign/commands.py b/src/straindesign/commands.py new file mode 100644 index 0000000..5236078 --- /dev/null +++ b/src/straindesign/commands.py @@ -0,0 +1,234 @@ +import argparse +import logging +import os +import sys + +from straindesign._version import __app_name__, __version__ +from straindesign.medium import associate_flux_env, load_medium +from straindesign.metabolic import gene_ko, gene_ou +from straindesign.preprocess import build_model, genes_annotate, save_results + +AP = argparse.ArgumentParser( + description=__app_name__ + " provides a cli interface to predict gene knockout " + "targets with an heterologous pathway", + epilog="See online documentation: https://github.com/brsynth/straindesign", +) +AP_subparsers = AP.add_subparsers(help="Sub-commnands (use with -h for more info)") + + +def _cmd_sim_del(args): + """Build plan of experiment for BASIC protocol""" + logging.info("Start - simulate-deletion") + # Check arguments. + if not os.path.isfile(args.input_model_file): + logging.error('Input model file doesn"t exist: %s' % (args.input_model_file,)) + parser.exit(1) + if args.input_pathway_file is not None and not os.path.isfile( + args.input_pathway_file + ): + logging.error('Input pathway file doesn"t exist') + parser.exit(1) + + if args.output_file_csv and not os.path.isdir( + os.path.dirname(args.output_file_csv) + ): + logging.debug("Create out directory: %s") + os.makedirs(os.path.dirname(args.output_file_csv)) + if args.output_file_tsv and not os.path.isdir( + os.path.dirname(args.output_file_tsv) + ): + logging.debug("Create out directory: %s") + os.makedirs(os.path.dirname(args.output_file_tsv)) + + # Load model + logging.info("Build model") + model = build_model( + model_path=args.input_model_file, + pathway_path=args.input_pathway_file, + biomass_id=args.biomass_rxn_id, + target_id=args.target_rxn_id, + logging=logger, + ) + if model is None: + parser.exit(1) + + # Medium + logging.info("Build medium") + envcond = load_medium(path=args.input_medium_file) + model = associate_flux_env(model=model, envcond=envcond, logging=logger) + if model is None: + parser.exit(1) + + # Simulation + logging.info("Build gene ko") + res = None + if args.strategy == "ko": + logging.info("Run OptGene") + res = gene_ko( + model=model, + max_knockouts=args.max_knockouts, + biomass_id=args.biomass_rxn_id, + target_id=args.target_rxn_id, + substrate_id=args.substrate_rxn_id, + max_time=args.max_time, + logging=logger, + seed=args.seed, + thread=args.thread, + ) + elif args.strategy == "ou": + logging.info("Run OptKnock") + if args.substrate_rxn_id: + logging.warning("Substrate reaction will be ignored with OptKnock") + if args.seed: + logging.warning("Seed will be ignored with OptKnock") + res = gene_ou( + model=model, + max_knockouts=args.max_knockouts, + biomass_id=args.biomass_rxn_id, + target_id=args.target_rxn_id, + max_time=args.max_time, + logging=logger, + thread=args.thread, + ) + + # Processing Results + if res is not None: + if args.email and args.strategy == "ko": + logging.info("Perform gene annotation") + res = genes_annotate(model=model, df=res, email=args.email, logging=logger) + logging.info("Save results") + if args.output_file_csv: + save_results(res, path=args.output_file_csv, sep=",") + if args.output_file_tsv: + save_results(res, path=args.output_file_tsv, sep="\t") + + logging.info("End - simulate-deletion") + + +P_sim_del = AP_subparsers.add_parser("simulate-deletion", help=_cmd_sim_del.__doc__) +# Input +P_sim_del_input = P_sim_del.add_argument_group("Input") +P_sim_del_input.add_argument( + "--input-model-file", type=str, required=True, help="GEM model file (SBML)" +) +P_sim_del_input.add_argument( + "--input-pathway-file", + type=str, + required=False, + help="SBML file that contains an heterologous pathway", +) +P_sim_del_input.add_argument( + "--biomass-rxn-id", + type=str, + required=True, + help="Biomass reaction ID", +) +P_sim_del_input.add_argument( + "--target-rxn-id", + type=str, + help="Target reaction ID", +) +P_sim_del_input.add_argument( + "--substrate-rxn-id", + type=str, + help="Substracte reaction ID (eg. carbon source)", +) +# Output +P_sim_del_output = P_sim_del.add_argument_group("Output") +P_sim_del_output.add_argument( + "--output-file-csv", + type=str, + help="output file (csv)", +) +P_sim_del_output.add_argument( + "--output-file-tsv", + type=str, + help="output file (tsv)", +) +# Parameters - Simulation +P_sim_del_sim = P_sim_del.add_argument_group("Simulation") +P_sim_del_sim.add_argument( + "--strategy", + type=str, + choices=["ko", "ou"], + default="ko", + help="Strategy to use : ko (knocking out) or ou " + "(over-under expressing), (default: ko)", +) +P_sim_del_sim.add_argument( + "--max-knockouts", + type=int, + default=3, + required=False, + help="Number of maximum knockouts genes allowed", +) +# Parameters - Medium +P_sim_del_medium = P_sim_del.add_argument_group("Medium") +P_sim_del_medium.add_argument( + "--input-medium-file", + type=str, + help="Provide a csv or tsv file with an header as ," + ", . This file " + "provides information about metabolites (Metanetx Id) " + "to add or remove.", +) +# Parameters - Others. +P_sim_del_helper = P_sim_del.add_argument_group("Technical") +P_sim_del_helper.add_argument( + "--thread", + type=int, + default=1, + help="Number of threads to use", +) +P_sim_del_helper.add_argument( + "--seed", + type=int, + default=0, + help="Seed", +) +P_sim_del_helper.add_argument( + "--max-time", + type=int, + help="Max time to search the best combination (minutes)", +) +P_sim_del_helper.add_argument( + "--log-level", + choices=["ERROR", "WARNING", "INFO", "DEBUG"], + default="INFO", + type=str, + help="Log level", +) +P_sim_del_helper.add_argument( + "--email", + type=str, + required=False, + help="Provide your email to annotate genes id with the NCBI website", +) +P_sim_del.set_defaults(func=_cmd_sim_del) + + +# Version. +def print_version(_args): + """Display this program"s version""" + print(__version__) + + +P_version = AP_subparsers.add_parser("version", help=print_version.__doc__) +P_version.set_defaults(func=print_version) + + +# Help. +def print_help(): + """Display this program"s help""" + print(AP_subparsers.help) + AP.exit() + + +# Main. +def parse_args(args=None): + """Parse the command line""" + return AP.parse_args(args=args) + + +if __name__ == "__main__": + sys.exit(main()) From ba5994de9b9128dcd9fad2318213a0b6e1bd628a Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Thu, 22 Sep 2022 08:29:32 +0000 Subject: [PATCH 02/20] tests(straindesign): update test_software to subcommand --- tests/test_software.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_software.py b/tests/test_software.py index c056cf2..a08c0d7 100644 --- a/tests/test_software.py +++ b/tests/test_software.py @@ -20,7 +20,7 @@ def test_software_butanol(self): # Be careful: can not test gene annotation into # worflows running simultaneously with tempfile.NamedTemporaryFile(delete=False) as fd: - args = ["python", "-m", __app_name__] + args = ["python", "-m", __app_name__, "simulate-deletion"] args += ["--input-model-file", self.model_ecoli_gz] args += ["--input-pathway-file", self.pathway_butanol] args += ["--biomass-rxn-id", "BIOMASS_Ec_iAF1260_core_59p81M"] @@ -47,7 +47,7 @@ def test_software_butanol_light(self): # Be careful: can not test gene annotation into # worflows running simultaneously with tempfile.NamedTemporaryFile(delete=False) as fd: - args = ["python", "-m", __app_name__] + args = ["python", "-m", __app_name__, "simulate-deletion"] args += ["--input-model-file", self.model_ecoli_core] args += ["--input-pathway-file", self.pathway_butanol] args += ["--biomass-rxn-id", "BIOMASS_Ecoli_core_w_GAM"] @@ -80,7 +80,7 @@ def test_software_butanol_iml1515(self): # Be careful: can not test gene annotation into # worflows running simultaneously with tempfile.NamedTemporaryFile(delete=False) as fd: - args = ["python", "-m", __app_name__] + args = ["python", "-m", __app_name__, "simulate-deletion"] args += ["--input-model-file", self.model_ecoli_iml1515] args += ["--input-pathway-file", self.pathway_butanol] args += ["--biomass-rxn-id", "biomass"] @@ -113,7 +113,7 @@ def test_software_galaxy(self): # Be careful: can not test gene annotation into # worflows running simultaneously with tempfile.NamedTemporaryFile(delete=False) as fd: - args = ["python", "-m", __app_name__] + args = ["python", "-m", __app_name__, "simulate-deletion"] args += ["--input-model-file", self.model_ecoli_gz] args += ["--input-pathway-file", self.pathway_butanol] args += ["--biomass-rxn-id", "BIOMASS_Ec_iAF1260_core_59p81M"] @@ -146,7 +146,7 @@ def test_software_butanol_light_ou(self): # Be careful: can not test gene annotation into # worflows running simultaneously with tempfile.NamedTemporaryFile(delete=False) as fd: - args = ["python", "-m", __app_name__] + args = ["python", "-m", __app_name__, "simulate-deletion"] args += ["--input-model-file", self.model_ecoli_gz] args += ["--input-pathway-file", self.pathway_butanol] args += ["--biomass-rxn-id", "BIOMASS_Ec_iAF1260_core_59p81M"] From b47ca647f8dba9132be69d4e2862e6a4aba97402 Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Thu, 22 Sep 2022 08:30:15 +0000 Subject: [PATCH 03/20] refactor(straindesign): rm logger configuration --- src/straindesign/commands.py | 15 +++------------ src/straindesign/medium.py | 4 ++-- src/straindesign/metabolic.py | 6 ++---- src/straindesign/preprocess.py | 20 +++++++++----------- 4 files changed, 16 insertions(+), 29 deletions(-) diff --git a/src/straindesign/commands.py b/src/straindesign/commands.py index 5236078..02e1780 100644 --- a/src/straindesign/commands.py +++ b/src/straindesign/commands.py @@ -7,6 +7,7 @@ from straindesign.medium import associate_flux_env, load_medium from straindesign.metabolic import gene_ko, gene_ou from straindesign.preprocess import build_model, genes_annotate, save_results +from straindesign.utils import cmdline, log AP = argparse.ArgumentParser( description=__app_name__ + " provides a cli interface to predict gene knockout " @@ -47,7 +48,6 @@ def _cmd_sim_del(args): pathway_path=args.input_pathway_file, biomass_id=args.biomass_rxn_id, target_id=args.target_rxn_id, - logging=logger, ) if model is None: parser.exit(1) @@ -55,7 +55,7 @@ def _cmd_sim_del(args): # Medium logging.info("Build medium") envcond = load_medium(path=args.input_medium_file) - model = associate_flux_env(model=model, envcond=envcond, logging=logger) + model = associate_flux_env(model=model, envcond=envcond) if model is None: parser.exit(1) @@ -71,7 +71,6 @@ def _cmd_sim_del(args): target_id=args.target_rxn_id, substrate_id=args.substrate_rxn_id, max_time=args.max_time, - logging=logger, seed=args.seed, thread=args.thread, ) @@ -87,7 +86,6 @@ def _cmd_sim_del(args): biomass_id=args.biomass_rxn_id, target_id=args.target_rxn_id, max_time=args.max_time, - logging=logger, thread=args.thread, ) @@ -95,7 +93,7 @@ def _cmd_sim_del(args): if res is not None: if args.email and args.strategy == "ko": logging.info("Perform gene annotation") - res = genes_annotate(model=model, df=res, email=args.email, logging=logger) + res = genes_annotate(model=model, df=res, email=args.email) logging.info("Save results") if args.output_file_csv: save_results(res, path=args.output_file_csv, sep=",") @@ -191,13 +189,6 @@ def _cmd_sim_del(args): type=int, help="Max time to search the best combination (minutes)", ) -P_sim_del_helper.add_argument( - "--log-level", - choices=["ERROR", "WARNING", "INFO", "DEBUG"], - default="INFO", - type=str, - help="Log level", -) P_sim_del_helper.add_argument( "--email", type=str, diff --git a/src/straindesign/medium.py b/src/straindesign/medium.py index 98c4c93..26c74bd 100644 --- a/src/straindesign/medium.py +++ b/src/straindesign/medium.py @@ -25,11 +25,11 @@ def load_medium(path: str) -> dict: return envcond -def associate_flux_env(model: Model, envcond: dict, logger: logging.Logger) -> Model: +def associate_flux_env(model: Model, envcond: dict) -> Model: for reaction_id, bounds in envcond.items(): reaction = model.reactions.get_by_id(reaction_id) if reaction is None: - logger.error("Reaction: %s not found in the model" % (reaction_id,)) + logging.error("Reaction: %s not found in the model" % (reaction_id,)) return None reaction.bounds = bounds return model diff --git a/src/straindesign/metabolic.py b/src/straindesign/metabolic.py index 8d56019..6e539d7 100644 --- a/src/straindesign/metabolic.py +++ b/src/straindesign/metabolic.py @@ -15,7 +15,6 @@ def gene_ko( target_id: str, substrate_id: str, max_time: Optional[int], - logger: logging.Logger, seed: int, thread: int = 1, ) -> pd.DataFrame: @@ -50,7 +49,7 @@ def gene_ko( try: df = results.data_frame except Exception: - logger.warning("An error occurred, maybe there is no solution") + logging.warning("An error occurred, maybe there is no solution") return df @@ -60,7 +59,6 @@ def gene_ou( biomass_id: str, target_id: str, max_time: Optional[int], - logger: logging.Logger, thread: int = 1, ) -> pd.DataFrame: optknock = OptKnock(model, fraction_of_optimum=0.1) @@ -88,5 +86,5 @@ def gene_ou( try: df = results.data_frame except Exception: - logger.warning("An error occurred, maybe there is no solution") + logging.warning("An error occurred, maybe there is no solution") return df diff --git a/src/straindesign/preprocess.py b/src/straindesign/preprocess.py index 1d9703e..9db0ba7 100644 --- a/src/straindesign/preprocess.py +++ b/src/straindesign/preprocess.py @@ -14,29 +14,28 @@ def build_model( pathway_path: str, biomass_id: str, target_id: str, - logger: logging.Logger, ): - logger.info("Load model") + logging.info("Load model") model = load_model(model_path) if pathway_path: - logger.info("Load pathway") + logging.info("Load pathway") pathway_model = load_model(pathway_path) - logger.info("Merge model and pathway") + logging.info("Merge model and pathway") model.merge(pathway_model, inplace=True) # Check if reactions are in the model reactions_id = [x.id for x in model.reactions] - logger.info("Check if main objective is in the model") + logging.info("Check if main objective is in the model") if biomass_id not in reactions_id: - logger.error("Reaction not found in the model: %s" % (biomass_id,)) + logging.error("Reaction not found in the model: %s" % (biomass_id,)) return None - logger.info("Check if target reaction is in the model") + logging.info("Check if target reaction is in the model") if target_id not in reactions_id: - logger.error("Reaction not found in the model: %s" % (target_id,)) + logging.error("Reaction not found in the model: %s" % (target_id,)) return None - logger.info("Set objective") + logging.info("Set objective") model.objective = { model.reactions.get_by_id(biomass_id): 1.0, model.reactions.get_by_id(target_id): 0.5, @@ -49,7 +48,6 @@ def genes_annotate( model: Model, df: pd.DataFrame, email: str, - logger: logging.Logger, ) -> pd.DataFrame: if df.empty: @@ -94,7 +92,7 @@ def genes_annotate( labels_groups.append("(%s)" % (",".join(labels),)) df.at[ix, "genes_annotation"] = ",".join(labels_groups) if is_ncbi_error: - logger.warning("NCBI annotation failing for some items") + logging.warning("NCBI annotation failing for some items") return df From ae6ee1ec503725ac8649a9ce8bbdff739e9c3096 Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Thu, 22 Sep 2022 08:54:54 +0000 Subject: [PATCH 04/20] feat(rpdoe): utils, add cmdline --- src/straindesign/commands.py | 22 +++++++--------------- src/straindesign/utils/cmdline.py | 30 ++++++++++++++++++++++++++++++ tests/test_medium.py | 1 - tests/test_preprocess.py | 6 ------ 4 files changed, 37 insertions(+), 22 deletions(-) create mode 100644 src/straindesign/utils/cmdline.py diff --git a/src/straindesign/commands.py b/src/straindesign/commands.py index 02e1780..f88516d 100644 --- a/src/straindesign/commands.py +++ b/src/straindesign/commands.py @@ -1,13 +1,12 @@ import argparse import logging import os -import sys from straindesign._version import __app_name__, __version__ from straindesign.medium import associate_flux_env, load_medium from straindesign.metabolic import gene_ko, gene_ou from straindesign.preprocess import build_model, genes_annotate, save_results -from straindesign.utils import cmdline, log +from straindesign.utils import cmdline AP = argparse.ArgumentParser( description=__app_name__ + " provides a cli interface to predict gene knockout " @@ -22,23 +21,20 @@ def _cmd_sim_del(args): logging.info("Start - simulate-deletion") # Check arguments. if not os.path.isfile(args.input_model_file): - logging.error('Input model file doesn"t exist: %s' % (args.input_model_file,)) - parser.exit(1) + cmdline.abort( + AP, "Input model file does not exist: %s" % (args.input_model_file,) + ) if args.input_pathway_file is not None and not os.path.isfile( args.input_pathway_file ): - logging.error('Input pathway file doesn"t exist') - parser.exit(1) - + cmdline.abort(AP, "Input pathway file does not exist") if args.output_file_csv and not os.path.isdir( os.path.dirname(args.output_file_csv) ): - logging.debug("Create out directory: %s") os.makedirs(os.path.dirname(args.output_file_csv)) if args.output_file_tsv and not os.path.isdir( os.path.dirname(args.output_file_tsv) ): - logging.debug("Create out directory: %s") os.makedirs(os.path.dirname(args.output_file_tsv)) # Load model @@ -50,14 +46,14 @@ def _cmd_sim_del(args): target_id=args.target_rxn_id, ) if model is None: - parser.exit(1) + cmdline.abort(AP, "An error occured when the model was loaded") # Medium logging.info("Build medium") envcond = load_medium(path=args.input_medium_file) model = associate_flux_env(model=model, envcond=envcond) if model is None: - parser.exit(1) + cmdline.abort(AP, "An error occured when the pathway was merged to the model") # Simulation logging.info("Build gene ko") @@ -219,7 +215,3 @@ def print_help(): def parse_args(args=None): """Parse the command line""" return AP.parse_args(args=args) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/src/straindesign/utils/cmdline.py b/src/straindesign/utils/cmdline.py new file mode 100644 index 0000000..5df995f --- /dev/null +++ b/src/straindesign/utils/cmdline.py @@ -0,0 +1,30 @@ +import argparse +import os + + +def abort(parser: argparse.ArgumentParser, msg: str = ""): + """Abort the program + + Parameters + ---------- + parser: + The parser to use + msg: str + The message to throw from the parser + + Return + ------ + """ + parser.error(msg) + + +def check_output_file( + parser: argparse.ArgumentParser, path: str, overwrite: bool = False +) -> None: + msg = None + if path and not os.path.isdir(os.path.dirname(os.path.abspath(path))): + msg = "Outdir does not exists: %s" % (path,) + if overwrite and os.path.isfile(path): + msg = "Outdir does not exists: %s" % (path,) + if msg: + abort(parser=parser, msg=msg) diff --git a/tests/test_medium.py b/tests/test_medium.py index 22d421f..cb1c80c 100644 --- a/tests/test_medium.py +++ b/tests/test_medium.py @@ -23,7 +23,6 @@ def test_associate_flux_env(self): associate_flux_env( model=model, envcond=medium, - logger=logging.getLogger(), ) self.assertEqual(model.reactions.get_by_id("EX_glc__D_e").bounds, (-10.0, 10.0)) self.assertEqual(model.reactions.get_by_id("EX_o2_e").bounds, (-5.0, 5.0)) diff --git a/tests/test_preprocess.py b/tests/test_preprocess.py index 1ac6fa6..715db29 100644 --- a/tests/test_preprocess.py +++ b/tests/test_preprocess.py @@ -1,5 +1,3 @@ -import logging - from main_test import Main_test from straindesign.preprocess import build_model @@ -12,7 +10,6 @@ def test_build_model(self): pathway_path=None, biomass_id="EX_glc__D_e", target_id="BIOMASS_Ec_iAF1260_core_59p81M", - logger=logging.getLogger(), ) data = model.objective.to_json() b_ix, t_ix = 0, 0 @@ -31,7 +28,6 @@ def test_build_model(self): pathway_path=self.pathway_butanol, biomass_id="BIOMASS_Ec_iAF1260_core_59p81M", target_id="EX_1btol_e", - logger=logging.getLogger(), ) data = model.objective.to_json() b_ix, t_ix = 0, 0 @@ -49,7 +45,6 @@ def test_build_model(self): pathway_path=self.pathway_butanol, biomass_id="test", target_id="EX_1btol_e", - logger=logging.getLogger(), ) self.assertIs(model, None) # Test 4 @@ -58,6 +53,5 @@ def test_build_model(self): pathway_path=self.pathway_butanol, biomass_id="BIOMASS_Ec_iAF1260_core_59p81M", target_id="test", - logger=logging.getLogger(), ) self.assertIs(model, None) From b734f29c842aecbdce4623bd8a6b2050c0eff71c Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Thu, 22 Sep 2022 14:16:37 +0000 Subject: [PATCH 05/20] feat(straindesign): reduce-model, add this feature --- src/straindesign/commands.py | 103 +++++++++++++++++++++++++++++- src/straindesign/metabolic.py | 29 ++++++++- src/straindesign/preprocess.py | 53 ++++++++++++++- src/straindesign/utils/cmdline.py | 2 +- 4 files changed, 181 insertions(+), 6 deletions(-) diff --git a/src/straindesign/commands.py b/src/straindesign/commands.py index f88516d..a063378 100644 --- a/src/straindesign/commands.py +++ b/src/straindesign/commands.py @@ -3,19 +3,116 @@ import os from straindesign._version import __app_name__, __version__ +from straindesign.io import sbml from straindesign.medium import associate_flux_env, load_medium -from straindesign.metabolic import gene_ko, gene_ou -from straindesign.preprocess import build_model, genes_annotate, save_results +from straindesign.metabolic import gene_ko, gene_ou, reduce_model +from straindesign.preprocess import ( + build_model, + genes_annotate, + load_straindesign_simulate_deletion, + save_results, +) from straindesign.utils import cmdline AP = argparse.ArgumentParser( description=__app_name__ + " provides a cli interface to predict gene knockout " "targets with an heterologous pathway", - epilog="See online documentation: https://github.com/brsynth/straindesign", + epilog="See online documentation: https://github.com/brsynth/" + __app_name__, ) AP_subparsers = AP.add_subparsers(help="Sub-commnands (use with -h for more info)") +def _cmd_red_mod(args): + logging.info("Start - reduce-model") + # Check arguments. + if not os.path.isfile(args.input_model_file): + cmdline.abort( + AP, "Input model file does not exist: %s" % (args.input_model_file,) + ) + if args.input_straindesign_file and not os.path.isfile( + args.input_straindesign_file + ): + cmdline.abort( + AP, + "Input %s file does not exist: %s" + % (__app_name__, args.input_straindesign_file), + ) + if args.input_straindesign_file is None and args.input_gene_str is None: + cmdline.abort( + AP, + "Provide at least --input-straindesign-file or --input-genes-str to have genes to delete in the model", + ) + cmdline.check_output_file(parser=AP, path=args.output_file_sbml) + + # Load model. + logging.info("Load model") + model = sbml.cobra_from_sbml(path=args.input_model_file) + + # Load genes. + logging.info("Load genes") + genes = [] + if args.input_straindesign_file: + genes.extend( + load_straindesign_simulate_deletion( + path=args.input_straindesign_file, strategy=args.parameter_strategy_str + ) + ) + if args.input_gene_str: + genes.extend(args.input_gene_str) + genes = list(set(genes)) + if len(genes) < 1: + cmdline.abort(AP, "No genes are provided to be deleted into the model") + + # Remove genes in the model. + logging.info("Remove genes in the model") + model = reduce_model(model=model, genes=genes) + + # Save model + logging.info("Write the model") + sbml.cobra_to_sbml(model=model, path=args.output_file_sbml) + + logging.info("End - reduce-model") + + +P_red_mod = AP_subparsers.add_parser("reduce-model", help=_cmd_red_mod.__doc__) +# Input +P_red_mod_input = P_red_mod.add_argument_group("Input") +P_red_mod_input.add_argument( + "--input-model-file", type=str, required=True, help="GEM model file (SBML)" +) +P_red_mod_input.add_argument( + "--input-straindesign-file", + type=str, + help="CSV file produced by the command " + __app_name__ + " simulate-deletion", +) +P_red_mod_input.add_argument( + "--input-gene-str", + nargs="+", + help="Gene ids to delete in the model", +) +# Output +P_red_mod_output = P_red_mod.add_argument_group("Output") +P_red_mod_output.add_argument( + "--output-file-sbml", + type=str, + required=True, + help="Model output file (SBML)", +) +# Parameters +P_red_mod_params = P_red_mod.add_argument_group("Parameters") +P_red_mod_params.add_argument( + "--parameter-strategy-str", + type=str, + choices=["yield-max", "gene-max", "gene-min"], + default="yield-max", + help="Strategy to use when genes are provided from the args: " + "yiel-max keeps the maximal yield, gene-max keeps the first association of genes combining " + "the biggest number of genes, gene-min keeps the first association of genes combinning the " + "lowest number of genes", +) +P_red_mod.set_defaults(func=_cmd_red_mod) + + def _cmd_sim_del(args): """Build plan of experiment for BASIC protocol""" logging.info("Start - simulate-deletion") diff --git a/src/straindesign/metabolic.py b/src/straindesign/metabolic.py index 6e539d7..9ee406b 100644 --- a/src/straindesign/metabolic.py +++ b/src/straindesign/metabolic.py @@ -1,6 +1,7 @@ import logging -from typing import Optional +from typing import List, Optional +import cobra import pandas as pd from cameo.flux_analysis.simulation import lmoma from cameo.strain_design.deterministic.linear_programming import OptKnock @@ -8,6 +9,32 @@ from cobra.core.model import Model +def reduce_model(model: cobra.Model, genes: List[str]): + # Check if gene is in the model. + model_gene_ids = [x.id for x in model.genes] + sgenes = set(genes) + genes = list(genes) + for gene in sgenes: + if gene not in model_gene_ids: + logging.warning( + "Gene: %s not found in the model, it's a Gene ID provided ?" % (gene,) + ) + genes.remove(gene) + # Remove genes. + number_of_reactions = len(model.reactions) + cobra.manipulation.remove_genes(model=model, gene_list=genes, remove_reactions=True) + # Clean model. + model, reactions = cobra.manipulation.prune_unused_reactions(model=model) + model, metabolites = cobra.manipulation.delete.prune_unused_metabolites(model=model) + + logging.info("Number of Genes deleted: %s" % (len(genes),)) + logging.info( + "Number of Reactions deleted: %s" + % (number_of_reactions - len(model.reactions),) + ) + return model + + def gene_ko( model: Model, max_knockouts: int, diff --git a/src/straindesign/preprocess.py b/src/straindesign/preprocess.py index 9db0ba7..0810389 100644 --- a/src/straindesign/preprocess.py +++ b/src/straindesign/preprocess.py @@ -1,12 +1,63 @@ import ast +import itertools import logging import time -from typing import Dict +from typing import Dict, List import pandas as pd from Bio import Entrez from cameo import load_model from cobra.core.model import Model +from straindesign._version import __app_name__ +from straindesign.io import tabulate + + +def load_straindesign_simulate_deletion(path: str, strategy: str) -> List[str]: + df = tabulate.Tabulate.from_tabulate(path=path) + header = [ + "reactions", + "genes", + "size", + "fva_min", + "fva_max", + "target_flux", + "biomass_flux", + "yield", + "fitness", + ] + if df.columns.to_list() != header: + raise ValueError( + "File: %s has a header not corresponding to the output of % simulate-deletion commands" + % (path, __app_name__) + ) + if df.empty: + logging.warning("File: %s is empty, no gene found" % (path,)) + return [] + # Sort by yield by default + df.sort_values( + by=["yield", "biomass_flux", "target_flux"], + ascending=[False, False, False], + inplace=True, + ) + if strategy == "gene-max": + df.sort_values( + by=["size", "yield", "biomass_flux", "target_flux"], + ascending=[False, False, False, False], + inplace=True, + ) + elif strategy == "gene-min": + df.sort_values( + by=["size", "yield", "biomass_flux", "target_flux"], + ascending=[True, False, False, False], + inplace=True, + ) + + genes_str = df.loc[0, "genes"] + genes = ast.literal_eval(genes_str) + genes = list(itertools.chain.from_iterable(genes)) + genes = list(set(genes)) + logging.info("Genes to remove from the model are: %s" % (", ".join(genes))) + return genes def build_model( diff --git a/src/straindesign/utils/cmdline.py b/src/straindesign/utils/cmdline.py index 5df995f..b1c8632 100644 --- a/src/straindesign/utils/cmdline.py +++ b/src/straindesign/utils/cmdline.py @@ -25,6 +25,6 @@ def check_output_file( if path and not os.path.isdir(os.path.dirname(os.path.abspath(path))): msg = "Outdir does not exists: %s" % (path,) if overwrite and os.path.isfile(path): - msg = "Outdir does not exists: %s" % (path,) + msg = "File exists: %s" % (path,) if msg: abort(parser=parser, msg=msg) From 1d402ebb69109f1c5376a9750e4b615282770793 Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Thu, 22 Sep 2022 14:17:02 +0000 Subject: [PATCH 06/20] feat(straindesign): io, add this module --- src/straindesign/io/sbml.py | 10 +++++++++ src/straindesign/io/tabulate.py | 38 +++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 src/straindesign/io/sbml.py create mode 100644 src/straindesign/io/tabulate.py diff --git a/src/straindesign/io/sbml.py b/src/straindesign/io/sbml.py new file mode 100644 index 0000000..e3de113 --- /dev/null +++ b/src/straindesign/io/sbml.py @@ -0,0 +1,10 @@ +import cobra +from cobra.io import read_sbml_model, write_sbml_model + + +def cobra_from_sbml(path: str) -> cobra.Model: + return read_sbml_model(path) + + +def cobra_to_sbml(model: cobra.Model, path: str) -> None: + write_sbml_model(model, path) diff --git a/src/straindesign/io/tabulate.py b/src/straindesign/io/tabulate.py new file mode 100644 index 0000000..b8937f9 --- /dev/null +++ b/src/straindesign/io/tabulate.py @@ -0,0 +1,38 @@ +import csv + +import pandas as pd + + +class Tabulate(object): + """Tabulate is a class loading/saving DataFrame from CSV, TSV files""" + + @classmethod + def from_tabulate( + cls, + path: str, + sep: str = "infer", + **kwargs, + ) -> pd.DataFrame: + # Find delimiter. + if sep == "infer": + with open(path) as fid: + dialect = csv.Sniffer().sniff(fid.readline()) + sep = dialect.delimiter + # Load. + df = pd.read_csv( + path, + sep=sep, + **kwargs, + ) + return df + + @classmethod + def to_tabulate( + cls, path: str, df: pd.DataFrame, sep: str = "infer", index: bool = False + ) -> None: + if sep == "infer": + if path.endswith("tsv"): + sep = "\t" + elif path.endswith("csv"): + sep = "," + df.to_csv(path, sep=sep, index=index) From 0a0ee23f8c4d7de280502afec98a62c2d512066a Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Thu, 22 Sep 2022 14:24:19 +0000 Subject: [PATCH 07/20] feat(straindesign): utils, add cmd.py file --- src/straindesign/utils/cmd.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 src/straindesign/utils/cmd.py diff --git a/src/straindesign/utils/cmd.py b/src/straindesign/utils/cmd.py new file mode 100644 index 0000000..355f6d8 --- /dev/null +++ b/src/straindesign/utils/cmd.py @@ -0,0 +1,26 @@ +import logging +import subprocess +from typing import List + + +def run(args: List[str], show_output: bool = True) -> subprocess.CompletedProcess: + """Run a command line. + + Parameters + ---------- + args: List[str] + A list of argument + show_output: bool (default: True) + Output command line + + Return + ------ + subprocess.CompletedProcess + Return result obtained with subprocess + """ + ret = subprocess.run(args, capture_output=True, encoding="utf8") + if show_output and ret.stdout is not None: + logging.info(ret.stdout) + if show_output and ret.stderr is not None: + logging.warning(ret.stderr) + return ret From 650baaa33a1340e2b627b28631bd50efc960ae7f Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Thu, 22 Sep 2022 14:24:56 +0000 Subject: [PATCH 08/20] feat(straindesign): tests, split into 2 categories: unit and functional --- tests/__init__.py | 0 .../test_simulate_deletion.py} | 24 +++++++------------ tests/{ => unit}/test_medium.py | 5 ++-- tests/{ => unit}/test_preprocess.py | 4 ++-- 4 files changed, 13 insertions(+), 20 deletions(-) create mode 100644 tests/__init__.py rename tests/{test_software.py => functional/test_simulate_deletion.py} (92%) rename tests/{ => unit}/test_medium.py (93%) rename tests/{ => unit}/test_preprocess.py (96%) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_software.py b/tests/functional/test_simulate_deletion.py similarity index 92% rename from tests/test_software.py rename to tests/functional/test_simulate_deletion.py index a08c0d7..2016c26 100644 --- a/tests/test_software.py +++ b/tests/functional/test_simulate_deletion.py @@ -4,18 +4,12 @@ import sys import tempfile -from main_test import Main_test +from tests.main_test import Main_test from straindesign._version import __app_name__ +from straindesign.utils import cmd -class Test_software(Main_test): - @staticmethod - def launch(args): - if isinstance(args, str): - args = args.split() - ret = subprocess.run(args, capture_output=True, encoding="utf8") - return ret - +class TestSimulateDeletion(Main_test): def test_software_butanol(self): # Be careful: can not test gene annotation into # worflows running simultaneously @@ -32,7 +26,7 @@ def test_software_butanol(self): args += ["--input-medium-file", self.medium_butanol_csv] args += ["--thread", "1"] - ret = Test_software.launch(args) + ret = cmd.run(args) if ret.returncode > 0: print(ret.stderr) print(ret.stdout) @@ -43,7 +37,7 @@ def test_software_butanol(self): self.assertGreater(len(lines), 1) os.remove(fd.name) - def test_software_butanol_light(self): + def test_software_butanol_light_ko(self): # Be careful: can not test gene annotation into # worflows running simultaneously with tempfile.NamedTemporaryFile(delete=False) as fd: @@ -60,7 +54,7 @@ def test_software_butanol_light(self): args += ["--input-medium-file", self.medium_butanol_tsv] args += ["--thread", "1"] - ret = Test_software.launch(args) + ret = cmd.run(args) if ret.returncode > 0: print(ret.stderr) print(ret.stdout) @@ -93,7 +87,7 @@ def test_software_butanol_iml1515(self): args += ["--input-medium-file", self.medium_butanol_csv] args += ["--thread", "1"] - ret = Test_software.launch(args) + ret = cmd.run(args) if ret.returncode > 0: print(ret.stderr) print(ret.stdout) @@ -126,7 +120,7 @@ def test_software_galaxy(self): args += ["--input-medium-file", self.medium_butanol_tsv] args += ["--thread", "1"] - ret = Test_software.launch(args) + ret = cmd.run(args) if ret.returncode > 0: print(ret.stderr) print(ret.stdout) @@ -158,7 +152,7 @@ def test_software_butanol_light_ou(self): args += ["--input-medium-file", self.medium_butanol_tsv] args += ["--thread", "1"] - ret = Test_software.launch(args) + ret = cmd.run(args) if ret.returncode > 0: print(ret.stderr) print(ret.stdout) diff --git a/tests/test_medium.py b/tests/unit/test_medium.py similarity index 93% rename from tests/test_medium.py rename to tests/unit/test_medium.py index cb1c80c..d75ff29 100644 --- a/tests/test_medium.py +++ b/tests/unit/test_medium.py @@ -1,12 +1,11 @@ -import logging from collections import OrderedDict from cameo import load_model -from main_test import Main_test +from tests.main_test import Main_test from straindesign.medium import associate_flux_env, load_medium -class Test_functional(Main_test): +class TestMedium(Main_test): def test_load_medium(self): medium = load_medium(self.medium_butanol_csv) theorical_medium = OrderedDict( diff --git a/tests/test_preprocess.py b/tests/unit/test_preprocess.py similarity index 96% rename from tests/test_preprocess.py rename to tests/unit/test_preprocess.py index 715db29..353621b 100644 --- a/tests/test_preprocess.py +++ b/tests/unit/test_preprocess.py @@ -1,8 +1,8 @@ -from main_test import Main_test +from tests.main_test import Main_test from straindesign.preprocess import build_model -class Test_functional(Main_test): +class TestPreprocess(Main_test): def test_build_model(self): # Test 1 model = build_model( From 193462cf07cbc8609c207a77a3e9432427537f83 Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Thu, 22 Sep 2022 16:06:17 +0000 Subject: [PATCH 09/20] fix(reduce-model): sort properly tabulate file --- src/straindesign/commands.py | 1 + src/straindesign/preprocess.py | 20 +++++++++++--------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/straindesign/commands.py b/src/straindesign/commands.py index a063378..2a3e795 100644 --- a/src/straindesign/commands.py +++ b/src/straindesign/commands.py @@ -65,6 +65,7 @@ def _cmd_red_mod(args): # Remove genes in the model. logging.info("Remove genes in the model") + logging.info("Genes to remove from the model are: %s" % (", ".join(genes))) model = reduce_model(model=model, genes=genes) # Save model diff --git a/src/straindesign/preprocess.py b/src/straindesign/preprocess.py index 0810389..6c8a242 100644 --- a/src/straindesign/preprocess.py +++ b/src/straindesign/preprocess.py @@ -33,10 +33,15 @@ def load_straindesign_simulate_deletion(path: str, strategy: str) -> List[str]: if df.empty: logging.warning("File: %s is empty, no gene found" % (path,)) return [] - # Sort by yield by default + + # Format df. + df["genes"] = df["genes"].apply(lambda x: ast.literal_eval(x)) + df["size"] = df["genes"].apply(lambda x: len(x)) + + # Sort by yield by default. df.sort_values( - by=["yield", "biomass_flux", "target_flux"], - ascending=[False, False, False], + by=["yield", "size", "biomass_flux", "target_flux"], + ascending=[False, True, False, False], inplace=True, ) if strategy == "gene-max": @@ -51,12 +56,9 @@ def load_straindesign_simulate_deletion(path: str, strategy: str) -> List[str]: ascending=[True, False, False, False], inplace=True, ) - - genes_str = df.loc[0, "genes"] - genes = ast.literal_eval(genes_str) - genes = list(itertools.chain.from_iterable(genes)) - genes = list(set(genes)) - logging.info("Genes to remove from the model are: %s" % (", ".join(genes))) + df.reset_index(inplace=True, drop=True) + iter_genes = itertools.chain.from_iterable(df.loc[0, "genes"]) + genes = sorted(list(set(iter_genes))) return genes From 9d673752b66f8d68a398e0cf4a3006096de818f6 Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Thu, 22 Sep 2022 16:06:32 +0000 Subject: [PATCH 10/20] tests(reduce-model): add test --- tests/dataset/gene/gene.empty.csv | 1 + tests/dataset/gene/gene.value_error.csv | 1 + .../simulate_deletion.butanol.iAF1260.csv | 6 ++ tests/functional/test_reduce_model.py | 57 +++++++++++++++++++ tests/functional/test_simulate_deletion.py | 3 +- tests/main_test.py | 5 ++ tests/unit/test_medium.py | 2 +- tests/unit/test_preprocess.py | 32 ++++++++++- 8 files changed, 103 insertions(+), 4 deletions(-) create mode 100644 tests/dataset/gene/gene.empty.csv create mode 100644 tests/dataset/gene/gene.value_error.csv create mode 100644 tests/dataset/gene/simulate_deletion.butanol.iAF1260.csv create mode 100644 tests/functional/test_reduce_model.py diff --git a/tests/dataset/gene/gene.empty.csv b/tests/dataset/gene/gene.empty.csv new file mode 100644 index 0000000..db10809 --- /dev/null +++ b/tests/dataset/gene/gene.empty.csv @@ -0,0 +1 @@ +reactions,genes,size,fva_min,fva_max,target_flux,biomass_flux,yield,fitness diff --git a/tests/dataset/gene/gene.value_error.csv b/tests/dataset/gene/gene.value_error.csv new file mode 100644 index 0000000..3d0d13e --- /dev/null +++ b/tests/dataset/gene/gene.value_error.csv @@ -0,0 +1 @@ +reactions,genes,size,fva_min,fva_max,target_flux,biomass_flux,yield diff --git a/tests/dataset/gene/simulate_deletion.butanol.iAF1260.csv b/tests/dataset/gene/simulate_deletion.butanol.iAF1260.csv new file mode 100644 index 0000000..bdb52f8 --- /dev/null +++ b/tests/dataset/gene/simulate_deletion.butanol.iAF1260.csv @@ -0,0 +1,6 @@ +reactions,genes,size,fva_min,fva_max,target_flux,biomass_flux,yield,fitness +"('ATPS4rpp',)","(('b3735',), ('b3734',), ('b3736',), ('b3731',), ('b3732',))",1,0.0,0.00641204819277442,0.0,0.2199110076152308,0.0,0.0 +"('TPI',)","(('b3919',),)",1,0.0,0.006455168431418056,0.0,0.0,0.0,0.0 +"('ALAt2pp', 'DSERt2pp', 'DALAt2pp', 'BALAt2pp', 'TPI')","(('b4208', 'b3919'),)",2,0.0,0.006455167733784362,0.0,0.0,0.0,0.0 +"('OPHHX', 'TPI')","(('b3835', 'b3919'),)",2,0.0,0.006455168423924535,0.0,0.0,0.0,0.0 +"('TPI', 'MTHFC', 'MTHFD')","(('b0529', 'b3919'), ('b3919', 'b0529'))",2,0.0,0.006455095014414326,0.0,0.0,1.2,0.0 diff --git a/tests/functional/test_reduce_model.py b/tests/functional/test_reduce_model.py new file mode 100644 index 0000000..013e1fb --- /dev/null +++ b/tests/functional/test_reduce_model.py @@ -0,0 +1,57 @@ +import tempfile +from typing import Tuple + +import cobra +from tests.main_test import Main_test +from straindesign._version import __app_name__ +from straindesign.utils import cmd + + +class TestReduceModel(Main_test): + @classmethod + def count_gene_reaction(cls, path: str) -> Tuple[int, int]: + model = cobra.io.read_sbml_model(path) + return len(model.genes), len(model.reactions) + + def test_one(self): + # Delete: 2 genes, 3 reactions + nb_gene, nb_reaction = TestReduceModel.count_gene_reaction(self.model_ecoli_gz) + with tempfile.NamedTemporaryFile() as fd: + args = ["python", "-m", __app_name__, "reduce-model"] + args += ["--input-model-file", self.model_ecoli_gz] + args += ["--input-straindesign-file", self.gene_butanol] + args += ["--output-file-sbml", fd.name] + + ret = cmd.run(args) + if ret.returncode > 0: + print(ret.stderr) + print(ret.stdout) + sys.exit(1) + + model, errors = cobra.io.validate_sbml_model(fd.name) + self.assertIsNot(model, None) + + self.assertEqual(nb_gene, len(model.genes) + 2) + self.assertEqual(nb_reaction, len(model.reactions) + 3) + + def test_two(self): + # Delete: 3 genes, 7 reactions + nb_gene, nb_reaction = TestReduceModel.count_gene_reaction(self.model_ecoli_gz) + with tempfile.NamedTemporaryFile() as fd: + args = ["python", "-m", __app_name__, "reduce-model"] + args += ["--input-model-file", self.model_ecoli_gz] + args += ["--input-straindesign-file", self.gene_butanol] + args += ["--input-gene-str", "b4208", "b4208", "b3919"] + args += ["--output-file-sbml", fd.name] + + ret = cmd.run(args) + if ret.returncode > 0: + print(ret.stderr) + print(ret.stdout) + sys.exit(1) + + model, errors = cobra.io.validate_sbml_model(fd.name) + self.assertIsNot(model, None) + + self.assertEqual(nb_gene, len(model.genes) + 3) + self.assertEqual(nb_reaction, len(model.reactions) + 7) diff --git a/tests/functional/test_simulate_deletion.py b/tests/functional/test_simulate_deletion.py index 2016c26..cf807a3 100644 --- a/tests/functional/test_simulate_deletion.py +++ b/tests/functional/test_simulate_deletion.py @@ -1,12 +1,11 @@ import csv import os -import subprocess import sys import tempfile -from tests.main_test import Main_test from straindesign._version import __app_name__ from straindesign.utils import cmd +from tests.main_test import Main_test class TestSimulateDeletion(Main_test): diff --git a/tests/main_test.py b/tests/main_test.py index 0714b1a..1723d43 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -17,3 +17,8 @@ class Main_test(unittest.TestCase): # Pathway. pathway_path = os.path.join(dataset_path, "pathway") pathway_butanol = os.path.join(pathway_path, "butanol.xml") + # Gene. + gene_path = os.path.join(dataset_path, "gene") + gene_butanol = os.path.join(gene_path, "simulate_deletion.butanol.iAF1260.csv") + gene_value_error = os.path.join(gene_path, "gene.value_error.csv") + gene_empty = os.path.join(gene_path, "gene.empty.csv") diff --git a/tests/unit/test_medium.py b/tests/unit/test_medium.py index d75ff29..4935509 100644 --- a/tests/unit/test_medium.py +++ b/tests/unit/test_medium.py @@ -1,8 +1,8 @@ from collections import OrderedDict from cameo import load_model -from tests.main_test import Main_test from straindesign.medium import associate_flux_env, load_medium +from tests.main_test import Main_test class TestMedium(Main_test): diff --git a/tests/unit/test_preprocess.py b/tests/unit/test_preprocess.py index 353621b..201cbc6 100644 --- a/tests/unit/test_preprocess.py +++ b/tests/unit/test_preprocess.py @@ -1,8 +1,38 @@ +from straindesign.preprocess import ( + build_model, + load_straindesign_simulate_deletion, +) from tests.main_test import Main_test -from straindesign.preprocess import build_model class TestPreprocess(Main_test): + def test_load_straindesign_simulate_deletion(self): + # Test 1 + genes = load_straindesign_simulate_deletion( + path=self.gene_butanol, strategy="yield-max" + ) + self.assertEqual(genes, ["b0529", "b3919"]) + # Test 2 + genes = load_straindesign_simulate_deletion( + path=self.gene_butanol, strategy="gene-max" + ) + self.assertEqual(genes, ["b3731", "b3732", "b3734", "b3735", "b3736"]) + # Test 3 + genes = load_straindesign_simulate_deletion( + path=self.gene_butanol, strategy="gene-min" + ) + self.assertEqual(genes, ["b3919"]) + # Test 4 + genes = load_straindesign_simulate_deletion( + path=self.gene_empty, strategy="gene-min" + ) + self.assertEqual(genes, []) + # Test 5 + with self.assertRaises(ValueError): + load_straindesign_simulate_deletion( + path=self.gene_value_error, strategy="gene-min" + ) + def test_build_model(self): # Test 1 model = build_model( From bb36435af27325d711c3404321cb36e0428a5eb8 Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Fri, 23 Sep 2022 09:51:21 +0000 Subject: [PATCH 11/20] refactor(io): sbml, rewrite load function --- src/straindesign/io/sbml.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/straindesign/io/sbml.py b/src/straindesign/io/sbml.py index e3de113..6b0e6cb 100644 --- a/src/straindesign/io/sbml.py +++ b/src/straindesign/io/sbml.py @@ -1,10 +1,10 @@ +import cameo import cobra -from cobra.io import read_sbml_model, write_sbml_model -def cobra_from_sbml(path: str) -> cobra.Model: - return read_sbml_model(path) +def from_sbml(path: str) -> cobra.Model: + return cameo.load_model(path) -def cobra_to_sbml(model: cobra.Model, path: str) -> None: - write_sbml_model(model, path) +def to_sbml(model: cobra.Model, path: str) -> None: + cobra.io.write_sbml_model(model, path) From b44747afb96fa9c111fbfe19dfaf4c03451c45d7 Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Fri, 23 Sep 2022 09:51:46 +0000 Subject: [PATCH 12/20] feat(utils): sbml, helper functions --- src/straindesign/utils/model.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 src/straindesign/utils/model.py diff --git a/src/straindesign/utils/model.py b/src/straindesign/utils/model.py new file mode 100644 index 0000000..dc242bf --- /dev/null +++ b/src/straindesign/utils/model.py @@ -0,0 +1,17 @@ +import logging + +import cobra + + +def count_gene(model: cobra.Model) -> int: + return len(model.genes) + + +def count_reaction(model: cobra.Model) -> int: + return len(model.reactions) + + +def has_reaction(model: cobra.Model, reaction: str) -> bool: + if reaction in [x.id for x in model.reactions]: + return True + return False From 7016c11b735f9c5699c9fb54a2f1cedfef0a0cbe Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Fri, 23 Sep 2022 09:52:26 +0000 Subject: [PATCH 13/20] tests(functional): adapt tests with the new utils/sbml functions --- tests/functional/test_reduce_model.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/functional/test_reduce_model.py b/tests/functional/test_reduce_model.py index 013e1fb..3f64ef9 100644 --- a/tests/functional/test_reduce_model.py +++ b/tests/functional/test_reduce_model.py @@ -4,18 +4,18 @@ import cobra from tests.main_test import Main_test from straindesign._version import __app_name__ +from straindesign.io import sbml from straindesign.utils import cmd +from straindesign.utils import model as utils_model class TestReduceModel(Main_test): - @classmethod - def count_gene_reaction(cls, path: str) -> Tuple[int, int]: - model = cobra.io.read_sbml_model(path) - return len(model.genes), len(model.reactions) - def test_one(self): # Delete: 2 genes, 3 reactions - nb_gene, nb_reaction = TestReduceModel.count_gene_reaction(self.model_ecoli_gz) + model_ecoli = sbml.from_sbml(path=self.model_ecoli_gz) + nb_gene = utils_model.count_gene(model=model_ecoli) + nb_reaction = utils_model.count_reaction(model=model_ecoli) + with tempfile.NamedTemporaryFile() as fd: args = ["python", "-m", __app_name__, "reduce-model"] args += ["--input-model-file", self.model_ecoli_gz] @@ -36,7 +36,10 @@ def test_one(self): def test_two(self): # Delete: 3 genes, 7 reactions - nb_gene, nb_reaction = TestReduceModel.count_gene_reaction(self.model_ecoli_gz) + model_ecoli = sbml.from_sbml(path=self.model_ecoli_gz) + nb_gene = utils_model.count_gene(model=model_ecoli) + nb_reaction = utils_model.count_reaction(model=model_ecoli) + with tempfile.NamedTemporaryFile() as fd: args = ["python", "-m", __app_name__, "reduce-model"] args += ["--input-model-file", self.model_ecoli_gz] From 726b448eb03287b834217960286cafd40314eb8d Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Fri, 23 Sep 2022 09:53:01 +0000 Subject: [PATCH 14/20] feat(analyzing-model): incomplete feature --- src/straindesign/commands.py | 75 ++++++++++++++++++++++++++++++++++- src/straindesign/metabolic.py | 9 +++++ 2 files changed, 82 insertions(+), 2 deletions(-) diff --git a/src/straindesign/commands.py b/src/straindesign/commands.py index 2a3e795..7ce5462 100644 --- a/src/straindesign/commands.py +++ b/src/straindesign/commands.py @@ -46,7 +46,7 @@ def _cmd_red_mod(args): # Load model. logging.info("Load model") - model = sbml.cobra_from_sbml(path=args.input_model_file) + model = sbml.from_sbml(path=args.input_model_file) # Load genes. logging.info("Load genes") @@ -70,7 +70,7 @@ def _cmd_red_mod(args): # Save model logging.info("Write the model") - sbml.cobra_to_sbml(model=model, path=args.output_file_sbml) + sbml.to_sbml(model=model, path=args.output_file_sbml) logging.info("End - reduce-model") @@ -292,6 +292,77 @@ def _cmd_sim_del(args): P_sim_del.set_defaults(func=_cmd_sim_del) +# Analyzing model +def _cmd_ana_mod(args): + """Analyzing model""" + logging.info("Start - analyzing-model") + # Check arguments. + if not os.path.isfile(args.input_model_file): + cmdline.abort( + AP, "Input model file does not exist: %s" % (args.input_model_file,) + ) + cmdline.check_output_file(parser=AP, path=args.output_pareto_png) + + # Load model. + model = sbml.from_sbml(path=args.input_model_file) + + # Medium. + logging.info("Build medium") + envcond = load_medium(path=args.input_medium_file) + model = associate_flux_env(model=model, envcond=envcond) + if model is None: + cmdline.abort(AP, "An error occured when the pathway was merged to the model") + + # Check reactions. + for rxn in [args.biomass_rxn_id, args.target_rxn_id]: + if utils_model(model=model, reaction=rxn): + cmdline.abort(AP, "Reaction is not found in the model: %s" % (rxn,)) + + # Build pareto. + + "--biomass-rxn-id", + "--target-rxn-id", + "--output-pareto-png", + + logging.info("End - analysing-model") + + +P_ana_mod = AP_subparsers.add_parser("analyzing-model", help=_cmd_ana_mod.__doc__) +# Input +P_ana_mod_input = P_ana_mod.add_argument_group("Input") +P_ana_mod_input.add_argument( + "--input-model-file", type=str, required=True, help="GEM model file (SBML)" +) +P_ana_mod_input.add_argument( + "--biomass-rxn-id", + type=str, + required=True, + help="Biomass reaction ID", +) +P_ana_mod_input.add_argument( + "--target-rxn-id", + type=str, + help="Target reaction ID", +) +# Output +P_ana_mod_output = P_ana_mod.add_argument_group("Output") +P_ana_mod_output.add_argument( + "--output-pareto-png", + type=str, + help="Output pareto file (PNG)", +) +# Parameters - Medium +P_ana_mod_medium = P_ana_mod.add_argument_group("Medium") +P_ana_mod_medium.add_argument( + "--input-medium-file", + type=str, + help="Provide a csv or tsv file with an header as ," + ", . This file " + "provides information about metabolites (Metanetx Id) " + "to add or remove.", +) +P_ana_mod.set_defaults(func=_cmd_ana_mod) + # Version. def print_version(_args): """Display this program"s version""" diff --git a/src/straindesign/metabolic.py b/src/straindesign/metabolic.py index 9ee406b..d5b1918 100644 --- a/src/straindesign/metabolic.py +++ b/src/straindesign/metabolic.py @@ -115,3 +115,12 @@ def gene_ou( except Exception: logging.warning("An error occurred, maybe there is no solution") return df + + +def pareto(model: cobra.Model, biomass_rxn_id: str, target_rxn_id: str) -> pd.DataFrame: + result = cameo.phenotypic_phase_plane( + model, + variables=[model.reactions.get_by_id(biomass_rxn_id)], + objective=model.reactions.get_by_id(target_rxn_id), + ) + return result From 1dc3b8f391c3ad4adfc25d4791478921b5be3f70 Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Fri, 23 Sep 2022 17:01:29 +0200 Subject: [PATCH 15/20] fix(github): timeout for macos --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 80d90af..efb1be1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -57,7 +57,7 @@ jobs: strategy: matrix: python-version: ['3.8', '3.9'] - os: [ubuntu, macos] + os: [ubuntu] env: bld_path: /tmp/build steps: From 939c2305cff920e76dfd3dd793d59fb721614438 Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Fri, 23 Sep 2022 17:03:14 +0200 Subject: [PATCH 16/20] feat(analyzing-model): add this feature --- src/straindesign/commands.py | 76 +++++++++++++-------- src/straindesign/metabolic.py | 32 +++++++-- src/straindesign/utils/model.py | 2 - tests/functional/test_analyzing_model.py | 79 ++++++++++++++++++++++ tests/functional/test_reduce_model.py | 4 +- tests/functional/test_simulate_deletion.py | 2 + 6 files changed, 157 insertions(+), 38 deletions(-) create mode 100644 tests/functional/test_analyzing_model.py diff --git a/src/straindesign/commands.py b/src/straindesign/commands.py index 7ce5462..0ebeae0 100644 --- a/src/straindesign/commands.py +++ b/src/straindesign/commands.py @@ -5,7 +5,7 @@ from straindesign._version import __app_name__, __version__ from straindesign.io import sbml from straindesign.medium import associate_flux_env, load_medium -from straindesign.metabolic import gene_ko, gene_ou, reduce_model +from straindesign.metabolic import gene_ko, gene_ou, plot_pareto, reduce_model from straindesign.preprocess import ( build_model, genes_annotate, @@ -13,6 +13,7 @@ save_results, ) from straindesign.utils import cmdline +from straindesign.utils import model as utils_model AP = argparse.ArgumentParser( description=__app_name__ + " provides a cli interface to predict gene knockout " @@ -304,25 +305,47 @@ def _cmd_ana_mod(args): cmdline.check_output_file(parser=AP, path=args.output_pareto_png) # Load model. - model = sbml.from_sbml(path=args.input_model_file) + logging.info("Build model") + if args.input_pathway_file: + model = build_model( + model_path=args.input_model_file, + pathway_path=args.input_pathway_file, + biomass_id=args.biomass_rxn_id, + target_id=args.target_rxn_id, + ) + else: + model = sbml.from_sbml(path=args.input_model_file) - # Medium. - logging.info("Build medium") - envcond = load_medium(path=args.input_medium_file) - model = associate_flux_env(model=model, envcond=envcond) if model is None: - cmdline.abort(AP, "An error occured when the pathway was merged to the model") + cmdline.abort(AP, "An error occured when the model was loaded") + + # Medium. + if args.input_medium_file: + logging.info("Build medium") + envcond = load_medium(path=args.input_medium_file) + model = associate_flux_env(model=model, envcond=envcond) + if model is None: + cmdline.abort( + AP, "An error occured when the pathway was merged to the model" + ) # Check reactions. - for rxn in [args.biomass_rxn_id, args.target_rxn_id]: - if utils_model(model=model, reaction=rxn): + rxns = [args.biomass_rxn_id, args.target_rxn_id] + if args.substrate_rxn_id: + rxns.append(args.substrate_rxn_id) + for rxn in rxns: + if not utils_model.has_reaction(model=model, reaction=rxn): cmdline.abort(AP, "Reaction is not found in the model: %s" % (rxn,)) - # Build pareto. - - "--biomass-rxn-id", - "--target-rxn-id", - "--output-pareto-png", + # Plot pareto. + logging.info("Plot pareto") + plot_pareto( + model=model, + path=args.output_pareto_png, + biomass_rxn_id=args.biomass_rxn_id, + target_rxn_id=args.target_rxn_id, + substrate_rxn_id=args.substrate_rxn_id, + ) logging.info("End - analysing-model") @@ -331,31 +354,27 @@ def _cmd_ana_mod(args): # Input P_ana_mod_input = P_ana_mod.add_argument_group("Input") P_ana_mod_input.add_argument( - "--input-model-file", type=str, required=True, help="GEM model file (SBML)" + "--input-model-file", required=True, help="GEM model file (SBML)" ) P_ana_mod_input.add_argument( - "--biomass-rxn-id", - type=str, - required=True, - help="Biomass reaction ID", + "--input-pathway-file", help="SBML file that contains an heterologous pathway" ) P_ana_mod_input.add_argument( - "--target-rxn-id", - type=str, - help="Target reaction ID", + "--biomass-rxn-id", required=True, help="Biomass reaction ID" +) +P_ana_mod_input.add_argument( + "--target-rxn-id", required=True, help="Target reaction ID" +) +P_ana_mod_input.add_argument( + "--substrate-rxn-id", help="Substracte reaction ID (eg. carbon source)" ) # Output P_ana_mod_output = P_ana_mod.add_argument_group("Output") -P_ana_mod_output.add_argument( - "--output-pareto-png", - type=str, - help="Output pareto file (PNG)", -) +P_ana_mod_output.add_argument("--output-pareto-png", help="Output pareto file (PNG)") # Parameters - Medium P_ana_mod_medium = P_ana_mod.add_argument_group("Medium") P_ana_mod_medium.add_argument( "--input-medium-file", - type=str, help="Provide a csv or tsv file with an header as ," ", . This file " "provides information about metabolites (Metanetx Id) " @@ -363,6 +382,7 @@ def _cmd_ana_mod(args): ) P_ana_mod.set_defaults(func=_cmd_ana_mod) + # Version. def print_version(_args): """Display this program"s version""" diff --git a/src/straindesign/metabolic.py b/src/straindesign/metabolic.py index d5b1918..732ad06 100644 --- a/src/straindesign/metabolic.py +++ b/src/straindesign/metabolic.py @@ -1,11 +1,14 @@ import logging from typing import List, Optional +import cameo import cobra import pandas as pd +import plotly from cameo.flux_analysis.simulation import lmoma from cameo.strain_design.deterministic.linear_programming import OptKnock from cameo.strain_design.heuristic.evolutionary_based import OptGene +from cameo.visualization.plotting.with_plotly import PlotlyPlotter from cobra.core.model import Model @@ -22,10 +25,10 @@ def reduce_model(model: cobra.Model, genes: List[str]): genes.remove(gene) # Remove genes. number_of_reactions = len(model.reactions) - cobra.manipulation.remove_genes(model=model, gene_list=genes, remove_reactions=True) - # Clean model. - model, reactions = cobra.manipulation.prune_unused_reactions(model=model) - model, metabolites = cobra.manipulation.delete.prune_unused_metabolites(model=model) + cobra.manipulation.remove_genes(model, gene_list=genes, remove_reactions=True) + # Clean model, functions highly dependent of the cobra's version. + # model, reactions = cobra.manipulation.prune_unused_reactions(model=model) + # model, metabolites = cobra.manipulation.delete.prune_unused_metabolites(model=model) logging.info("Number of Genes deleted: %s" % (len(genes),)) logging.info( @@ -117,10 +120,27 @@ def gene_ou( return df -def pareto(model: cobra.Model, biomass_rxn_id: str, target_rxn_id: str) -> pd.DataFrame: +def plot_pareto( + model: cobra.Model, + path: str, + biomass_rxn_id: str, + target_rxn_id: str, + substrate_rxn_id: Optional[str] = None, +) -> None: + # Init. + abp = cameo.visualization.plotting.abstract.AbstractPlotter() + grid = abp.grid() + plotter = PlotlyPlotter() + + # Create graph. result = cameo.phenotypic_phase_plane( model, variables=[model.reactions.get_by_id(biomass_rxn_id)], objective=model.reactions.get_by_id(target_rxn_id), + source=substrate_rxn_id, ) - return result + result.plot(plotter, grid=grid) + + # Export graph. + fig = dict(data=grid.plots[0].data[0], layout=grid.plots[0].layout) + plotly.io.write_image(fig=fig, file=path, format="png", engine="kaleido") diff --git a/src/straindesign/utils/model.py b/src/straindesign/utils/model.py index dc242bf..0b3cd60 100644 --- a/src/straindesign/utils/model.py +++ b/src/straindesign/utils/model.py @@ -1,5 +1,3 @@ -import logging - import cobra diff --git a/tests/functional/test_analyzing_model.py b/tests/functional/test_analyzing_model.py new file mode 100644 index 0000000..4ba13a1 --- /dev/null +++ b/tests/functional/test_analyzing_model.py @@ -0,0 +1,79 @@ +import csv +import imghdr +import os +import sys +import tempfile + +from straindesign._version import __app_name__ +from straindesign.utils import cmd +from tests.main_test import Main_test + + +class TestAnalyzingModel(Main_test): + def test_base(self): + with tempfile.NamedTemporaryFile() as fd: + args = ["python", "-m", __app_name__, "analyzing-model"] + args += ["--input-model-file", self.model_ecoli_gz] + args += ["--biomass-rxn-id", "BIOMASS_Ec_iAF1260_core_59p81M"] + args += ["--target-rxn-id", "EX_tyrp_e"] + args += ["--output-pareto-png", fd.name] + + ret = cmd.run(args) + if ret.returncode > 0: + print(ret.stderr) + print(ret.stdout) + sys.exit(1) + + self.assertEqual(imghdr.what(fd.name), "png") + + def test_medium(self): + with tempfile.NamedTemporaryFile() as fd: + args = ["python", "-m", __app_name__, "analyzing-model"] + args += ["--input-model-file", self.model_ecoli_gz] + args += ["--biomass-rxn-id", "BIOMASS_Ec_iAF1260_core_59p81M"] + args += ["--target-rxn-id", "EX_tyrp_e"] + args += ["--output-pareto-png", fd.name] + args += ["--input-medium-file", self.medium_butanol_csv] + + ret = cmd.run(args) + if ret.returncode > 0: + print(ret.stderr) + print(ret.stdout) + sys.exit(1) + + self.assertEqual(imghdr.what(fd.name), "png") + + def test_pathway(self): + with tempfile.NamedTemporaryFile() as fd: + args = ["python", "-m", __app_name__, "analyzing-model"] + args += ["--input-model-file", self.model_ecoli_gz] + args += ["--biomass-rxn-id", "BIOMASS_Ec_iAF1260_core_59p81M"] + args += ["--target-rxn-id", "EX_tyrp_e"] + args += ["--output-pareto-png", fd.name] + args += ["--input-medium-file", self.medium_butanol_csv] + args += ["--input-pathway-file", self.pathway_butanol] + + ret = cmd.run(args) + if ret.returncode > 0: + print(ret.stderr) + print(ret.stdout) + sys.exit(1) + + self.assertEqual(imghdr.what(fd.name), "png") + + def test_substrate(self): + with tempfile.NamedTemporaryFile() as fd: + args = ["python", "-m", __app_name__, "analyzing-model"] + args += ["--input-model-file", self.model_ecoli_gz] + args += ["--biomass-rxn-id", "BIOMASS_Ec_iAF1260_core_59p81M"] + args += ["--target-rxn-id", "EX_tyrp_e"] + args += ["--output-pareto-png", fd.name] + args += ["--substrate-rxn-id", "EX_glc__D_e"] + + ret = cmd.run(args) + if ret.returncode > 0: + print(ret.stderr) + print(ret.stdout) + sys.exit(1) + + self.assertEqual(imghdr.what(fd.name), "png") diff --git a/tests/functional/test_reduce_model.py b/tests/functional/test_reduce_model.py index 3f64ef9..a0cd21f 100644 --- a/tests/functional/test_reduce_model.py +++ b/tests/functional/test_reduce_model.py @@ -1,12 +1,12 @@ +import sys import tempfile -from typing import Tuple import cobra -from tests.main_test import Main_test from straindesign._version import __app_name__ from straindesign.io import sbml from straindesign.utils import cmd from straindesign.utils import model as utils_model +from tests.main_test import Main_test class TestReduceModel(Main_test): diff --git a/tests/functional/test_simulate_deletion.py b/tests/functional/test_simulate_deletion.py index cf807a3..fe93c36 100644 --- a/tests/functional/test_simulate_deletion.py +++ b/tests/functional/test_simulate_deletion.py @@ -22,6 +22,7 @@ def test_software_butanol(self): args += ["--output-file-csv", fd.name] args += ["--strategy", "ko"] args += ["--max-knockouts", "3"] + args += ["--max-time", "10"] args += ["--input-medium-file", self.medium_butanol_csv] args += ["--thread", "1"] @@ -148,6 +149,7 @@ def test_software_butanol_light_ou(self): args += ["--output-file-csv", fd.name] args += ["--strategy", "ou"] args += ["--max-knockouts", "3"] + args += ["--max-time", "10"] args += ["--input-medium-file", self.medium_butanol_tsv] args += ["--thread", "1"] From 8f118687eeeb88a37bcdfdc75114458de6ff4685 Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Fri, 23 Sep 2022 17:12:30 +0200 Subject: [PATCH 17/20] style: super-linter --- tests/functional/test_analyzing_model.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/functional/test_analyzing_model.py b/tests/functional/test_analyzing_model.py index 4ba13a1..67ed526 100644 --- a/tests/functional/test_analyzing_model.py +++ b/tests/functional/test_analyzing_model.py @@ -1,6 +1,4 @@ -import csv import imghdr -import os import sys import tempfile From 599370496cc288af73a6b1b4fd34632df7d83163 Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Mon, 26 Sep 2022 12:33:33 +0200 Subject: [PATCH 18/20] chore(github): worflows, full conda management --- .github/workflows/build.yml | 65 +++----------- .github/workflows/coverage.yml | 33 ------- .github/workflows/lint.yml | 23 +++++ .github/workflows/test.yml | 90 +++++++++---------- .gitignore | 1 + environment.yml | 2 + recipes/conda-env.yaml | 17 ++++ recipes/meta.yaml | 49 ++++++++++ .../workflow.yaml | 14 +-- src/straindesign/io/__init__.py | 0 src/straindesign/utils/__init__.py | 0 tox.ini | 7 ++ 12 files changed, 161 insertions(+), 140 deletions(-) delete mode 100644 .github/workflows/coverage.yml create mode 100644 .github/workflows/lint.yml create mode 100644 recipes/conda-env.yaml create mode 100644 recipes/meta.yaml rename conda.recipes/conda_build_env.yaml => recipes/workflow.yaml (53%) create mode 100644 src/straindesign/io/__init__.py create mode 100644 src/straindesign/utils/__init__.py create mode 100644 tox.ini diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c56966f..e50d52d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -138,12 +138,11 @@ jobs: github_token: ${{ secrets.GITHUB_TOKEN }} branch: main - BuildPip: - needs: Commit + BuildConda: + needs: [Commit, Tag] runs-on: ubuntu-latest env: - bld_path: /tmp/build - asset: pip.zip + asset: straindesign-${{ needs.Tag.outputs.tag }}-py_0.tar.bz2 outputs: asset: ${{ env.asset }} defaults: @@ -158,74 +157,30 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: miniconda-version: "latest" - environment-file: conda.recipes/conda_build_env.yaml + environment-file: recipes/workflow.yaml python-version: '3.9' - - name: 'Build pip package' + - name: 'Build conda package' run: | - python3 -m build --outdir ${bld_path} - - name: 'Create asset' - run: | - zip -r -j ${asset} ${bld_path} + conda-build recipes --output-folder . --output "${{ env.asset }}" - name: 'Upload Artifact Package' uses: actions/upload-artifact@v2 with: - name: package-pip-artifact + name: package-conda-artifact path: ${{ env.asset }} retention-days: 1 if-no-files-found: error Release: - needs: [Tag, BuildPip] + needs: [Tag, BuildConda] runs-on: ubuntu-latest steps: - name: 'Download Artifact Package - Pip' uses: actions/download-artifact@v2 with: - name: package-pip-artifact + name: package-conda-artifact - name: 'Create Release' uses: softprops/action-gh-release@v1 with: tag_name: ${{ needs.Tag.outputs.tag }} body: ${{ needs.Tag.outputs.changelog }} - files: ${{ needs.BuildPip.outputs.asset }} - - Docker: - needs: [Tag, BuildPip] - runs-on: ubuntu-latest - permissions: - packages: write - contents: read - steps: - - name: 'Checkout' - uses: actions/checkout@v2 - with: - ref: stable - - name: 'Download Artifact Package - Pip' - uses: actions/download-artifact@v2 - with: - name: package-pip-artifact - - name: 'Unzip pip artifact' - run: | - echo "list directory" - ls - unzip pip.zip - echo "list directory" - ls - - name: 'Define Image name' - run: | - image_name=${{ github.repository }} - image_name=$(echo "$image_name" | cut -f2 -d"/" | tr '[:upper:]' '[:lower:]') - echo "image_name=$image_name" >> "$GITHUB_ENV" - - name: 'Build image' - run: | - docker build . --file Dockerfile --tag ${{ env.image_name }} --label "runnumber=${GITHUB_RUN_ID}" - - name: 'Log in to registry' - run: | - echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - - name: 'Push image' - run: | - IMAGE_ID=$(echo ghcr.io/${{ github.repository_owner }}/${{ env.image_name }} | tr '[:upper:]' '[:lower:]') - VERSION=${{ needs.Tag.outputs.tag }} - echo "IMAGE_ID=$IMAGE_ID | VERSION=$VERSION" - docker tag ${{ env.image_name }} "${IMAGE_ID}:${VERSION}" - docker push "${IMAGE_ID}:${VERSION}" + files: ${{ needs.BuildConda.outputs.asset }} diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml deleted file mode 100644 index c1e3e9a..0000000 --- a/.github/workflows/coverage.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: Coverage - -on: - push: - branches: - - main - -jobs: - - Coveralls: - runs-on: ubuntu-latest - steps: - - name: 'Checkout' - uses: actions/checkout@v2 - - name: 'Set up Python' - uses: actions/setup-python@v2 - with: - python-version: '3.9' - - name: 'Install Soft' - run: | - pip install . - - name: 'Install Deps' - run: | - pip install pytest-cov - - name: 'Test' - run: | - coverage run -m pytest - coverage lcov - - name: 'Coveralls' - uses: coverallsapp/github-action@master - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - path-to-lcov: ./coverage.lcov diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..358f92d --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,23 @@ +name: Lint + +on: [push, pull_request] + +jobs: + + Lint: + runs-on: ubuntu-latest + steps: + - name: 'Checkout' + uses: actions/checkout@v2 + with: + ref: ${{ github.ref }} + fetch-depth: 0 + - name: Lint Code Base + uses: github/super-linter@v4 + env: + VALIDATE_ALL_CODEBASE: false + VALIDATE_YAML: false + VALIDATE_XML: false + VALIDATE_DOCKERFILE_HADOLINT: false + DEFAULT_BRANCH: main + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index efb1be1..bed8079 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,35 +1,38 @@ name: Tests -on: [push, pull_request] +on: + push: + branches: + - main + pull_request: + branches: + - main jobs: - Lint: + Build: runs-on: ubuntu-latest + defaults: + run: + shell: bash -l {0} steps: - name: 'Checkout' uses: actions/checkout@v2 with: ref: ${{ github.ref }} - fetch-depth: 0 - - name: Lint Code Base - uses: github/super-linter@v4 - env: - VALIDATE_ALL_CODEBASE: false - VALIDATE_YAML: false - VALIDATE_XML: false - VALIDATE_DOCKERFILE_HADOLINT: false - DEFAULT_BRANCH: main - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: 'Deploying miniconda' + uses: conda-incubator/setup-miniconda@v2 + with: + miniconda-version: "latest" + environment-file: recipes/workflow.yaml + python-version: '3.9' + - name: 'Build conda package' + run: | + conda-build recipes/ - Build: - runs-on: ${{ matrix.os }}-latest - strategy: - matrix: - os: ["ubuntu"] - python-version: ['3.8', '3.9'] - env: - bld_path: /tmp/build + Pytest: + needs: [Build] + runs-on: ubuntu-latest defaults: run: shell: bash -l {0} @@ -41,38 +44,35 @@ jobs: - name: 'Deploying miniconda' uses: conda-incubator/setup-miniconda@v2 with: - miniconda-version: 'latest' - environment-file: conda.recipes/conda_build_env.yaml - python-version: ${{ matrix.python-version }} - - name: 'Set up output folder' - run: | - mkdir -p "${bld_path}" - - name: 'Build pip package' + miniconda-version: "latest" + environment-file: recipes/workflow.yaml + - name: 'Run tests' run: | - python3 -m build --outdir "${bld_path}" + conda env list + tox -e py37, py38 - Pytest: + Coverage: needs: [Build] - runs-on: ${{ matrix.os }}-latest - strategy: - matrix: - python-version: ['3.8', '3.9'] - os: [ubuntu] - env: - bld_path: /tmp/build + runs-on: ubuntu-latest + defaults: + run: + shell: bash -l {0} steps: - name: 'Checkout' uses: actions/checkout@v2 with: ref: ${{ github.ref }} - - name: 'Set up Python' - uses: actions/setup-python@v2 + - name: 'Deploying miniconda' + uses: conda-incubator/setup-miniconda@v2 with: - python-version: ${{ matrix.python-version }} - - name: 'Install' + miniconda-version: "latest" + environment-file: recipes/workflow.yaml + - name: 'Run tests' run: | - pip install . - - name: 'Run pytest' - run: | - pip install pytest - python -m pytest + tox -e py39 + coverage lcov + - name: 'Coveralls' + uses: coverallsapp/github-action@master + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + path-to-lcov: ./coverage.lcov diff --git a/.gitignore b/.gitignore index b8d56c9..33d12b2 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,4 @@ super-linter.log .eggs build debug +.tox diff --git a/environment.yml b/environment.yml index 6ef779a..ffa2fef 100644 --- a/environment.yml +++ b/environment.yml @@ -9,5 +9,7 @@ dependencies: - cobra<0.25.0 - markupsafe==2.0.1 - pandas + - plotly - pytest + - python-kaleido - pyyaml diff --git a/recipes/conda-env.yaml b/recipes/conda-env.yaml new file mode 100644 index 0000000..c89f58b --- /dev/null +++ b/recipes/conda-env.yaml @@ -0,0 +1,17 @@ +name: straindesign +channels: + - conda-forge + - plotly + - bioconda + - defaults +dependencies: + - biopython + - cameo + - cobra<0.25.0 # https://github.com/biosustain/cameo/issues/296 + - markupsafe==2.0.1 # https://github.com/pallets/markupsafe/issues/304 + - pandas + - plotly + - pytest + - python-kaleido + - python + - pyyaml diff --git a/recipes/meta.yaml b/recipes/meta.yaml new file mode 100644 index 0000000..e0ca951 --- /dev/null +++ b/recipes/meta.yaml @@ -0,0 +1,49 @@ +{% set name = "straindesign" %} +{% set version = "3.0.0" %} + +package: + name: {{ name }} + version: {{ version }} + +source: + path: ../ + +build: + number: 0 + noarch: python + script: {{ PYTHON }} -m pip install -vv . + +requirements: + build: + - pip + - pytest + - python + - pyyaml + - setuptools + run: + - biopython + - cameo + - cobra<0.25.0 # https://github.com/biosustain/cameo/issues/296 + - markupsafe==2.0.1 # https://github.com/pallets/markupsafe/issues/304 + - pandas + - plotly + - pytest + - python-kaleido + - python + - pyyaml + +test: + imports: + - {{ name }} + commands: + - python -m {{ name }} --help + +about: + home: https://github.com/brsynth/{{ name }} + summary: Cli interface to predict gene knockout targets with an heterologous pathway + license: MIT + license_file: LICENSE + +extra: + recipe-maintainers: + - guillaume-gricourt diff --git a/conda.recipes/conda_build_env.yaml b/recipes/workflow.yaml similarity index 53% rename from conda.recipes/conda_build_env.yaml rename to recipes/workflow.yaml index a516b4f..b5dbade 100644 --- a/conda.recipes/conda_build_env.yaml +++ b/recipes/workflow.yaml @@ -1,15 +1,15 @@ name: test channels: - - bioconda - conda-forge + - bioconda - defaults dependencies: - - anaconda-client - - biopython - build - - cameo - - cobra<0.25.0 - - markupsafe=2.0.1 - - pandas + - conda-build + - coverage - pytest + - pytest-cov + - python - pyyaml + - tox + - tox-conda diff --git a/src/straindesign/io/__init__.py b/src/straindesign/io/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/straindesign/utils/__init__.py b/src/straindesign/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..1266104 --- /dev/null +++ b/tox.ini @@ -0,0 +1,7 @@ +[tox] +requires = coverage, tox-conda +envlist = py37, py38, py39 + +[testenv] +conda_env=recipes/conda-env.yaml +commands=coverage run -m pytest From fbacaabe9f578f6a4c1f3bf1ce7706ab2dab779c Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Mon, 26 Sep 2022 15:59:31 +0200 Subject: [PATCH 19/20] docs(github): update for reduce-model and analyzing-model commands --- README.md | 79 ++++++++++++++++++++++++++----------------------------- 1 file changed, 38 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index 4663cc4..b326c1b 100644 --- a/README.md +++ b/README.md @@ -10,68 +10,65 @@ ## Installation -### Conda - ```sh conda install -c bioconda straindesign ``` -### Docker - -```sh -docker pull ghcr.io/brsynth/straindesign: -``` - -### Pip - -Download asset from the last *Releases*. - -* Unzip asset - -```sh -unzip -``` - -* Install *wheel* with *pip* - -```sh -pip install .whl -``` - ## Usage -Example: Define the best combination of genes deletion to optimize a target. +### Define the best combination of genes deletion to optimize a target. ```sh -python -m straindesign \ +python -m straindesign simulate-deletion \ [input files] --input-model-file --input-pathway-file - --input-medium-file - [input parameters] + --input-medium-file + [parameters] --biomass-rxn-id --target-rxn-id --substrate-rxn-id [output file] - --output-file + --output-file ``` -Or with docker: +### Delete genes in a model ```sh -docker run \ - -it \ - --rm \ - -v $PWD:/data \ - straindesign:latest \ - --input-model /data/ \ - --input-pathway-file /data/ \ - --input-medium-file /data/ \ - --biomass-rxn-id \ - --target-rxn-id \ +python -m straindesign reduce-model \ + [input files] + --input-model-file + --input-straindesign-file + and/or + --input-gene-str + [parameters] + --parameter-strategy-str + [output file] + --output-file-sbml +``` +You can provide a list of genes to delete in the model or the file produced by the command `simulate-deletion`. +If this file is provided, the combination of genes is choosen among three strategies: +* yield-max: genes are sorted by the best yield +* gene-max: the combination of the maximum number of genes +* gene-min: the combination of the minimum number of genes + +### Produce a pareto plot + +````sh +python -m straindesign analyzing-model \ + [input files] + --input-model-file + --input-medium-file + --input-pathway-file + [parameters] + --biomass-rxn-id + --target-rxn-id --substrate-rxn-id - --output-file /data/ + [output file] + --output-pareto-png ``` +You can provide an heterologous pathway to implement the metabolic pathway producing the targeted compound represented by the `target-rxn-id`, the reaction which produces this compound. +The `substrate-rxn-id` argument lets you to choose the main carbon source. ## Tests From 801f27f231e0935277a2a026b25bb66a2bd260e7 Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Mon, 26 Sep 2022 21:30:14 +0200 Subject: [PATCH 20/20] fix(github): refactor ci/cd --- .github/workflows/test.yml | 12 +++++++++--- MANIFEST.in | 2 +- environment.yml | 15 --------------- recipes/conda-env.yaml | 17 ----------------- recipes/workflow.yaml | 12 ++++++++++++ setup.py | 2 +- tox.ini | 7 ------- 7 files changed, 23 insertions(+), 44 deletions(-) delete mode 100644 environment.yml delete mode 100644 recipes/conda-env.yaml delete mode 100644 tox.ini diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bed8079..c3b8bf4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,6 +33,9 @@ jobs: Pytest: needs: [Build] runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.8, 3.9] defaults: run: shell: bash -l {0} @@ -46,10 +49,11 @@ jobs: with: miniconda-version: "latest" environment-file: recipes/workflow.yaml + python-version: ${{ matrix.python-version }} - name: 'Run tests' run: | - conda env list - tox -e py37, py38 + pip install --no-deps . + python -m pytest tests/unit Coverage: needs: [Build] @@ -67,9 +71,11 @@ jobs: with: miniconda-version: "latest" environment-file: recipes/workflow.yaml + python-version: '3.9' - name: 'Run tests' run: | - tox -e py39 + pip install --no-deps . + coverage run -m pytest coverage lcov - name: 'Coveralls' uses: coverallsapp/github-action@master diff --git a/MANIFEST.in b/MANIFEST.in index d95b4ec..eafec12 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1 @@ -include environment.yml +graft recipes diff --git a/environment.yml b/environment.yml deleted file mode 100644 index ffa2fef..0000000 --- a/environment.yml +++ /dev/null @@ -1,15 +0,0 @@ -name: straindesign -channels: - - bioconda - - conda-forge - - defaults -dependencies: - - biopython - - cameo - - cobra<0.25.0 - - markupsafe==2.0.1 - - pandas - - plotly - - pytest - - python-kaleido - - pyyaml diff --git a/recipes/conda-env.yaml b/recipes/conda-env.yaml deleted file mode 100644 index c89f58b..0000000 --- a/recipes/conda-env.yaml +++ /dev/null @@ -1,17 +0,0 @@ -name: straindesign -channels: - - conda-forge - - plotly - - bioconda - - defaults -dependencies: - - biopython - - cameo - - cobra<0.25.0 # https://github.com/biosustain/cameo/issues/296 - - markupsafe==2.0.1 # https://github.com/pallets/markupsafe/issues/304 - - pandas - - plotly - - pytest - - python-kaleido - - python - - pyyaml diff --git a/recipes/workflow.yaml b/recipes/workflow.yaml index b5dbade..c19612f 100644 --- a/recipes/workflow.yaml +++ b/recipes/workflow.yaml @@ -1,9 +1,21 @@ name: test channels: - conda-forge + - plotly - bioconda - defaults dependencies: + # straindesign. + - biopython + - cameo + - cobra<0.25.0 + - markupsafe==2.0.1 + - pandas + - plotly + - python-kaleido + - python + - pyyaml + # build. - build - conda-build - coverage diff --git a/setup.py b/setup.py index e083655..d4f62fd 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ # App name - dependencies env = {} -with open("environment.yml") as fid: +with open("recipes/workflow.yaml") as fid: env = yaml.safe_load(fid) name = env["name"] install_requires = [] diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 1266104..0000000 --- a/tox.ini +++ /dev/null @@ -1,7 +0,0 @@ -[tox] -requires = coverage, tox-conda -envlist = py37, py38, py39 - -[testenv] -conda_env=recipes/conda-env.yaml -commands=coverage run -m pytest