diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c56966f..e50d52d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -138,12 +138,11 @@ jobs: github_token: ${{ secrets.GITHUB_TOKEN }} branch: main - BuildPip: - needs: Commit + BuildConda: + needs: [Commit, Tag] runs-on: ubuntu-latest env: - bld_path: /tmp/build - asset: pip.zip + asset: straindesign-${{ needs.Tag.outputs.tag }}-py_0.tar.bz2 outputs: asset: ${{ env.asset }} defaults: @@ -158,74 +157,30 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: miniconda-version: "latest" - environment-file: conda.recipes/conda_build_env.yaml + environment-file: recipes/workflow.yaml python-version: '3.9' - - name: 'Build pip package' + - name: 'Build conda package' run: | - python3 -m build --outdir ${bld_path} - - name: 'Create asset' - run: | - zip -r -j ${asset} ${bld_path} + conda-build recipes --output-folder . --output "${{ env.asset }}" - name: 'Upload Artifact Package' uses: actions/upload-artifact@v2 with: - name: package-pip-artifact + name: package-conda-artifact path: ${{ env.asset }} retention-days: 1 if-no-files-found: error Release: - needs: [Tag, BuildPip] + needs: [Tag, BuildConda] runs-on: ubuntu-latest steps: - name: 'Download Artifact Package - Pip' uses: actions/download-artifact@v2 with: - name: package-pip-artifact + name: package-conda-artifact - name: 'Create Release' uses: softprops/action-gh-release@v1 with: tag_name: ${{ needs.Tag.outputs.tag }} body: ${{ needs.Tag.outputs.changelog }} - files: ${{ needs.BuildPip.outputs.asset }} - - Docker: - needs: [Tag, BuildPip] - runs-on: ubuntu-latest - permissions: - packages: write - contents: read - steps: - - name: 'Checkout' - uses: actions/checkout@v2 - with: - ref: stable - - name: 'Download Artifact Package - Pip' - uses: actions/download-artifact@v2 - with: - name: package-pip-artifact - - name: 'Unzip pip artifact' - run: | - echo "list directory" - ls - unzip pip.zip - echo "list directory" - ls - - name: 'Define Image name' - run: | - image_name=${{ github.repository }} - image_name=$(echo "$image_name" | cut -f2 -d"/" | tr '[:upper:]' '[:lower:]') - echo "image_name=$image_name" >> "$GITHUB_ENV" - - name: 'Build image' - run: | - docker build . --file Dockerfile --tag ${{ env.image_name }} --label "runnumber=${GITHUB_RUN_ID}" - - name: 'Log in to registry' - run: | - echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin - - name: 'Push image' - run: | - IMAGE_ID=$(echo ghcr.io/${{ github.repository_owner }}/${{ env.image_name }} | tr '[:upper:]' '[:lower:]') - VERSION=${{ needs.Tag.outputs.tag }} - echo "IMAGE_ID=$IMAGE_ID | VERSION=$VERSION" - docker tag ${{ env.image_name }} "${IMAGE_ID}:${VERSION}" - docker push "${IMAGE_ID}:${VERSION}" + files: ${{ needs.BuildConda.outputs.asset }} diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml deleted file mode 100644 index c1e3e9a..0000000 --- a/.github/workflows/coverage.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: Coverage - -on: - push: - branches: - - main - -jobs: - - Coveralls: - runs-on: ubuntu-latest - steps: - - name: 'Checkout' - uses: actions/checkout@v2 - - name: 'Set up Python' - uses: actions/setup-python@v2 - with: - python-version: '3.9' - - name: 'Install Soft' - run: | - pip install . - - name: 'Install Deps' - run: | - pip install pytest-cov - - name: 'Test' - run: | - coverage run -m pytest - coverage lcov - - name: 'Coveralls' - uses: coverallsapp/github-action@master - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - path-to-lcov: ./coverage.lcov diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..358f92d --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,23 @@ +name: Lint + +on: [push, pull_request] + +jobs: + + Lint: + runs-on: ubuntu-latest + steps: + - name: 'Checkout' + uses: actions/checkout@v2 + with: + ref: ${{ github.ref }} + fetch-depth: 0 + - name: Lint Code Base + uses: github/super-linter@v4 + env: + VALIDATE_ALL_CODEBASE: false + VALIDATE_YAML: false + VALIDATE_XML: false + VALIDATE_DOCKERFILE_HADOLINT: false + DEFAULT_BRANCH: main + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 80d90af..c3b8bf4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,35 +1,41 @@ name: Tests -on: [push, pull_request] +on: + push: + branches: + - main + pull_request: + branches: + - main jobs: - Lint: + Build: runs-on: ubuntu-latest + defaults: + run: + shell: bash -l {0} steps: - name: 'Checkout' uses: actions/checkout@v2 with: ref: ${{ github.ref }} - fetch-depth: 0 - - name: Lint Code Base - uses: github/super-linter@v4 - env: - VALIDATE_ALL_CODEBASE: false - VALIDATE_YAML: false - VALIDATE_XML: false - VALIDATE_DOCKERFILE_HADOLINT: false - DEFAULT_BRANCH: main - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: 'Deploying miniconda' + uses: conda-incubator/setup-miniconda@v2 + with: + miniconda-version: "latest" + environment-file: recipes/workflow.yaml + python-version: '3.9' + - name: 'Build conda package' + run: | + conda-build recipes/ - Build: - runs-on: ${{ matrix.os }}-latest + Pytest: + needs: [Build] + runs-on: ubuntu-latest strategy: matrix: - os: ["ubuntu"] - python-version: ['3.8', '3.9'] - env: - bld_path: /tmp/build + python-version: [3.8, 3.9] defaults: run: shell: bash -l {0} @@ -41,38 +47,38 @@ jobs: - name: 'Deploying miniconda' uses: conda-incubator/setup-miniconda@v2 with: - miniconda-version: 'latest' - environment-file: conda.recipes/conda_build_env.yaml + miniconda-version: "latest" + environment-file: recipes/workflow.yaml python-version: ${{ matrix.python-version }} - - name: 'Set up output folder' - run: | - mkdir -p "${bld_path}" - - name: 'Build pip package' + - name: 'Run tests' run: | - python3 -m build --outdir "${bld_path}" + pip install --no-deps . + python -m pytest tests/unit - Pytest: + Coverage: needs: [Build] - runs-on: ${{ matrix.os }}-latest - strategy: - matrix: - python-version: ['3.8', '3.9'] - os: [ubuntu, macos] - env: - bld_path: /tmp/build + runs-on: ubuntu-latest + defaults: + run: + shell: bash -l {0} steps: - name: 'Checkout' uses: actions/checkout@v2 with: ref: ${{ github.ref }} - - name: 'Set up Python' - uses: actions/setup-python@v2 + - name: 'Deploying miniconda' + uses: conda-incubator/setup-miniconda@v2 with: - python-version: ${{ matrix.python-version }} - - name: 'Install' - run: | - pip install . - - name: 'Run pytest' + miniconda-version: "latest" + environment-file: recipes/workflow.yaml + python-version: '3.9' + - name: 'Run tests' run: | - pip install pytest - python -m pytest + pip install --no-deps . + coverage run -m pytest + coverage lcov + - name: 'Coveralls' + uses: coverallsapp/github-action@master + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + path-to-lcov: ./coverage.lcov diff --git a/.gitignore b/.gitignore index b8d56c9..33d12b2 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,4 @@ super-linter.log .eggs build debug +.tox diff --git a/MANIFEST.in b/MANIFEST.in index d95b4ec..eafec12 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1 @@ -include environment.yml +graft recipes diff --git a/README.md b/README.md index 4663cc4..b326c1b 100644 --- a/README.md +++ b/README.md @@ -10,68 +10,65 @@ ## Installation -### Conda - ```sh conda install -c bioconda straindesign ``` -### Docker - -```sh -docker pull ghcr.io/brsynth/straindesign: -``` - -### Pip - -Download asset from the last *Releases*. - -* Unzip asset - -```sh -unzip -``` - -* Install *wheel* with *pip* - -```sh -pip install .whl -``` - ## Usage -Example: Define the best combination of genes deletion to optimize a target. +### Define the best combination of genes deletion to optimize a target. ```sh -python -m straindesign \ +python -m straindesign simulate-deletion \ [input files] --input-model-file --input-pathway-file - --input-medium-file - [input parameters] + --input-medium-file + [parameters] --biomass-rxn-id --target-rxn-id --substrate-rxn-id [output file] - --output-file + --output-file ``` -Or with docker: +### Delete genes in a model ```sh -docker run \ - -it \ - --rm \ - -v $PWD:/data \ - straindesign:latest \ - --input-model /data/ \ - --input-pathway-file /data/ \ - --input-medium-file /data/ \ - --biomass-rxn-id \ - --target-rxn-id \ +python -m straindesign reduce-model \ + [input files] + --input-model-file + --input-straindesign-file + and/or + --input-gene-str + [parameters] + --parameter-strategy-str + [output file] + --output-file-sbml +``` +You can provide a list of genes to delete in the model or the file produced by the command `simulate-deletion`. +If this file is provided, the combination of genes is choosen among three strategies: +* yield-max: genes are sorted by the best yield +* gene-max: the combination of the maximum number of genes +* gene-min: the combination of the minimum number of genes + +### Produce a pareto plot + +````sh +python -m straindesign analyzing-model \ + [input files] + --input-model-file + --input-medium-file + --input-pathway-file + [parameters] + --biomass-rxn-id + --target-rxn-id --substrate-rxn-id - --output-file /data/ + [output file] + --output-pareto-png ``` +You can provide an heterologous pathway to implement the metabolic pathway producing the targeted compound represented by the `target-rxn-id`, the reaction which produces this compound. +The `substrate-rxn-id` argument lets you to choose the main carbon source. ## Tests diff --git a/conda.recipes/conda_build_env.yaml b/conda.recipes/conda_build_env.yaml deleted file mode 100644 index a516b4f..0000000 --- a/conda.recipes/conda_build_env.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: test -channels: - - bioconda - - conda-forge - - defaults -dependencies: - - anaconda-client - - biopython - - build - - cameo - - cobra<0.25.0 - - markupsafe=2.0.1 - - pandas - - pytest - - pyyaml diff --git a/environment.yml b/environment.yml deleted file mode 100644 index 6ef779a..0000000 --- a/environment.yml +++ /dev/null @@ -1,13 +0,0 @@ -name: straindesign -channels: - - bioconda - - conda-forge - - defaults -dependencies: - - biopython - - cameo - - cobra<0.25.0 - - markupsafe==2.0.1 - - pandas - - pytest - - pyyaml diff --git a/recipes/meta.yaml b/recipes/meta.yaml new file mode 100644 index 0000000..e0ca951 --- /dev/null +++ b/recipes/meta.yaml @@ -0,0 +1,49 @@ +{% set name = "straindesign" %} +{% set version = "3.0.0" %} + +package: + name: {{ name }} + version: {{ version }} + +source: + path: ../ + +build: + number: 0 + noarch: python + script: {{ PYTHON }} -m pip install -vv . + +requirements: + build: + - pip + - pytest + - python + - pyyaml + - setuptools + run: + - biopython + - cameo + - cobra<0.25.0 # https://github.com/biosustain/cameo/issues/296 + - markupsafe==2.0.1 # https://github.com/pallets/markupsafe/issues/304 + - pandas + - plotly + - pytest + - python-kaleido + - python + - pyyaml + +test: + imports: + - {{ name }} + commands: + - python -m {{ name }} --help + +about: + home: https://github.com/brsynth/{{ name }} + summary: Cli interface to predict gene knockout targets with an heterologous pathway + license: MIT + license_file: LICENSE + +extra: + recipe-maintainers: + - guillaume-gricourt diff --git a/recipes/workflow.yaml b/recipes/workflow.yaml new file mode 100644 index 0000000..c19612f --- /dev/null +++ b/recipes/workflow.yaml @@ -0,0 +1,27 @@ +name: test +channels: + - conda-forge + - plotly + - bioconda + - defaults +dependencies: + # straindesign. + - biopython + - cameo + - cobra<0.25.0 + - markupsafe==2.0.1 + - pandas + - plotly + - python-kaleido + - python + - pyyaml + # build. + - build + - conda-build + - coverage + - pytest + - pytest-cov + - python + - pyyaml + - tox + - tox-conda diff --git a/setup.py b/setup.py index e083655..d4f62fd 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ # App name - dependencies env = {} -with open("environment.yml") as fid: +with open("recipes/workflow.yaml") as fid: env = yaml.safe_load(fid) name = env["name"] install_requires = [] diff --git a/src/straindesign/__main__.py b/src/straindesign/__main__.py index 9de087f..449d2c7 100644 --- a/src/straindesign/__main__.py +++ b/src/straindesign/__main__.py @@ -1,224 +1,21 @@ -import argparse import logging -import os import sys -from straindesign._version import __app_name__ -from straindesign.medium import associate_flux_env, load_medium -from straindesign.metabolic import gene_ko, gene_ou -from straindesign.preprocess import build_model, genes_annotate, save_results +from straindesign import commands def main(): - """CLI for StrainDesign""" - - desc = ( - __app_name__ - + " provides a cli interface to run OptGene with an heterologous pathway." - ) - parser = argparse.ArgumentParser(description=desc, prog="python -m " + __app_name__) - # Input - parser_input = parser.add_argument_group("Input") - parser_input.add_argument( - "--input-model-file", type=str, required=True, help="GEM model file (SBML)" - ) - parser_input.add_argument( - "--input-pathway-file", - type=str, - required=False, - help="SBML file that contains an heterologous pathway", - ) - parser_input.add_argument( - "--biomass-rxn-id", - type=str, - required=True, - help="Biomass reaction ID", - ) - parser_input.add_argument( - "--target-rxn-id", - type=str, - required=False, - help="Target reaction ID", - ) - parser_input.add_argument( - "--substrate-rxn-id", - type=str, - required=False, - help="Substracte reaction ID (eg. carbon source)", - ) - - # Output - parser_output = parser.add_argument_group("Output") - parser_output.add_argument( - "--output-file-csv", - type=str, - help="output file (csv)", - ) - parser_output.add_argument( - "--output-file-tsv", - type=str, - help="output file (tsv)", - ) - - # Simulation - parser_sim = parser.add_argument_group("Simulation") - parser_sim.add_argument( - "--strategy", - type=str, - choices=["ko", "ou"], - default="ko", - help="Strategy to use : ko (knocking out) or ou " - "(over-under expressing), (default: ko)", - ) - parser_input.add_argument( - "--max-knockouts", - type=int, - default=3, - required=False, - help="Number of maximum knockouts genes allowed", - ) - - # Medium - parser_medium = parser.add_argument_group("Medium") - parser_medium.add_argument( - "--input-medium-file", - type=str, - help="Provide a csv or tsv file with an header as ," - ", . This file " - "provides information about metabolites (Metanetx Id) " - "to add or remove.", - ) - # Others. - parser_helper = parser.add_argument_group("Technical") - parser_helper.add_argument( - "--thread", - type=int, - default=1, - help="Number of threads to use", - ) - parser_helper.add_argument( - "--seed", - type=int, - default=0, - help="Seed", - ) - parser_helper.add_argument( - "--max-time", - type=int, - help="Max time to search the best combination (minutes)", - ) - - parser_helper.add_argument( - "--log-level", - choices=["ERROR", "WARNING", "INFO", "DEBUG"], - default="INFO", - type=str, - help="Log level", - ) - parser_input.add_argument( - "--email", - type=str, - required=False, - help="Provide your email to annotate genes id with the NCBI website", - ) - - # Compute - args = parser.parse_args() - - # Logging. - logger = logging.getLogger(name="main") - formatter = logging.Formatter( - "%(asctime)s - %(levelname)s - %(message)s", datefmt="%d-%m-%Y %H:%M" - ) - st_handler = logging.StreamHandler() - st_handler.setFormatter(formatter) - logger.addHandler(st_handler) - logger.setLevel(args.log_level) - - # Check arguments. - if not os.path.isfile(args.input_model_file): - logger.error('Input model file doesn"t exist: %s' % (args.input_model_file,)) - parser.exit(1) - if args.input_pathway_file is not None and not os.path.isfile( - args.input_pathway_file - ): - logger.error('Input pathway file doesn"t exist') - parser.exit(1) - - if args.output_file_csv and not os.path.isdir( - os.path.dirname(args.output_file_csv) - ): - logger.debug("Create out directory: %s") - os.makedirs(os.path.dirname(args.output_file_csv)) - if args.output_file_tsv and not os.path.isdir( - os.path.dirname(args.output_file_tsv) - ): - logger.debug("Create out directory: %s") - os.makedirs(os.path.dirname(args.output_file_tsv)) - - # Load model - logger.info("Build model") - model = build_model( - model_path=args.input_model_file, - pathway_path=args.input_pathway_file, - biomass_id=args.biomass_rxn_id, - target_id=args.target_rxn_id, - logger=logger, - ) - if model is None: - parser.exit(1) - - # Medium - logger.info("Build medium") - envcond = load_medium(path=args.input_medium_file) - model = associate_flux_env(model=model, envcond=envcond, logger=logger) - if model is None: - parser.exit(1) - - # Simulation - logger.info("Build gene ko") - res = None - if args.strategy == "ko": - logger.info("Run OptGene") - res = gene_ko( - model=model, - max_knockouts=args.max_knockouts, - biomass_id=args.biomass_rxn_id, - target_id=args.target_rxn_id, - substrate_id=args.substrate_rxn_id, - max_time=args.max_time, - logger=logger, - seed=args.seed, - thread=args.thread, - ) - elif args.strategy == "ou": - logger.info("Run OptKnock") - if args.substrate_rxn_id: - logger.warning("Substrate reaction will be ignored with OptKnock") - if args.seed: - logger.warning("Seed will be ignored with OptKnock") - res = gene_ou( - model=model, - max_knockouts=args.max_knockouts, - biomass_id=args.biomass_rxn_id, - target_id=args.target_rxn_id, - max_time=args.max_time, - logger=logger, - thread=args.thread, - ) - - # Processing Results - if res is not None: - if args.email and args.strategy == "ko": - logger.info("Perform gene annotation") - res = genes_annotate(model=model, df=res, email=args.email, logger=logger) - logger.info("Save results") - if args.output_file_csv: - save_results(res, path=args.output_file_csv, sep=",") - if args.output_file_tsv: - save_results(res, path=args.output_file_tsv, sep="\t") - - return 0 + """Entrypoint to commandline""" + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%d-%m-%Y %H:%M", + ) + args = commands.parse_args() + # No arguments or subcommands were given. + if len(args.__dict__) < 1: + commands.print_help() + args.func(args) if __name__ == "__main__": diff --git a/src/straindesign/commands.py b/src/straindesign/commands.py new file mode 100644 index 0000000..0ebeae0 --- /dev/null +++ b/src/straindesign/commands.py @@ -0,0 +1,406 @@ +import argparse +import logging +import os + +from straindesign._version import __app_name__, __version__ +from straindesign.io import sbml +from straindesign.medium import associate_flux_env, load_medium +from straindesign.metabolic import gene_ko, gene_ou, plot_pareto, reduce_model +from straindesign.preprocess import ( + build_model, + genes_annotate, + load_straindesign_simulate_deletion, + save_results, +) +from straindesign.utils import cmdline +from straindesign.utils import model as utils_model + +AP = argparse.ArgumentParser( + description=__app_name__ + " provides a cli interface to predict gene knockout " + "targets with an heterologous pathway", + epilog="See online documentation: https://github.com/brsynth/" + __app_name__, +) +AP_subparsers = AP.add_subparsers(help="Sub-commnands (use with -h for more info)") + + +def _cmd_red_mod(args): + logging.info("Start - reduce-model") + # Check arguments. + if not os.path.isfile(args.input_model_file): + cmdline.abort( + AP, "Input model file does not exist: %s" % (args.input_model_file,) + ) + if args.input_straindesign_file and not os.path.isfile( + args.input_straindesign_file + ): + cmdline.abort( + AP, + "Input %s file does not exist: %s" + % (__app_name__, args.input_straindesign_file), + ) + if args.input_straindesign_file is None and args.input_gene_str is None: + cmdline.abort( + AP, + "Provide at least --input-straindesign-file or --input-genes-str to have genes to delete in the model", + ) + cmdline.check_output_file(parser=AP, path=args.output_file_sbml) + + # Load model. + logging.info("Load model") + model = sbml.from_sbml(path=args.input_model_file) + + # Load genes. + logging.info("Load genes") + genes = [] + if args.input_straindesign_file: + genes.extend( + load_straindesign_simulate_deletion( + path=args.input_straindesign_file, strategy=args.parameter_strategy_str + ) + ) + if args.input_gene_str: + genes.extend(args.input_gene_str) + genes = list(set(genes)) + if len(genes) < 1: + cmdline.abort(AP, "No genes are provided to be deleted into the model") + + # Remove genes in the model. + logging.info("Remove genes in the model") + logging.info("Genes to remove from the model are: %s" % (", ".join(genes))) + model = reduce_model(model=model, genes=genes) + + # Save model + logging.info("Write the model") + sbml.to_sbml(model=model, path=args.output_file_sbml) + + logging.info("End - reduce-model") + + +P_red_mod = AP_subparsers.add_parser("reduce-model", help=_cmd_red_mod.__doc__) +# Input +P_red_mod_input = P_red_mod.add_argument_group("Input") +P_red_mod_input.add_argument( + "--input-model-file", type=str, required=True, help="GEM model file (SBML)" +) +P_red_mod_input.add_argument( + "--input-straindesign-file", + type=str, + help="CSV file produced by the command " + __app_name__ + " simulate-deletion", +) +P_red_mod_input.add_argument( + "--input-gene-str", + nargs="+", + help="Gene ids to delete in the model", +) +# Output +P_red_mod_output = P_red_mod.add_argument_group("Output") +P_red_mod_output.add_argument( + "--output-file-sbml", + type=str, + required=True, + help="Model output file (SBML)", +) +# Parameters +P_red_mod_params = P_red_mod.add_argument_group("Parameters") +P_red_mod_params.add_argument( + "--parameter-strategy-str", + type=str, + choices=["yield-max", "gene-max", "gene-min"], + default="yield-max", + help="Strategy to use when genes are provided from the args: " + "yiel-max keeps the maximal yield, gene-max keeps the first association of genes combining " + "the biggest number of genes, gene-min keeps the first association of genes combinning the " + "lowest number of genes", +) +P_red_mod.set_defaults(func=_cmd_red_mod) + + +def _cmd_sim_del(args): + """Build plan of experiment for BASIC protocol""" + logging.info("Start - simulate-deletion") + # Check arguments. + if not os.path.isfile(args.input_model_file): + cmdline.abort( + AP, "Input model file does not exist: %s" % (args.input_model_file,) + ) + if args.input_pathway_file is not None and not os.path.isfile( + args.input_pathway_file + ): + cmdline.abort(AP, "Input pathway file does not exist") + if args.output_file_csv and not os.path.isdir( + os.path.dirname(args.output_file_csv) + ): + os.makedirs(os.path.dirname(args.output_file_csv)) + if args.output_file_tsv and not os.path.isdir( + os.path.dirname(args.output_file_tsv) + ): + os.makedirs(os.path.dirname(args.output_file_tsv)) + + # Load model + logging.info("Build model") + model = build_model( + model_path=args.input_model_file, + pathway_path=args.input_pathway_file, + biomass_id=args.biomass_rxn_id, + target_id=args.target_rxn_id, + ) + if model is None: + cmdline.abort(AP, "An error occured when the model was loaded") + + # Medium + logging.info("Build medium") + envcond = load_medium(path=args.input_medium_file) + model = associate_flux_env(model=model, envcond=envcond) + if model is None: + cmdline.abort(AP, "An error occured when the pathway was merged to the model") + + # Simulation + logging.info("Build gene ko") + res = None + if args.strategy == "ko": + logging.info("Run OptGene") + res = gene_ko( + model=model, + max_knockouts=args.max_knockouts, + biomass_id=args.biomass_rxn_id, + target_id=args.target_rxn_id, + substrate_id=args.substrate_rxn_id, + max_time=args.max_time, + seed=args.seed, + thread=args.thread, + ) + elif args.strategy == "ou": + logging.info("Run OptKnock") + if args.substrate_rxn_id: + logging.warning("Substrate reaction will be ignored with OptKnock") + if args.seed: + logging.warning("Seed will be ignored with OptKnock") + res = gene_ou( + model=model, + max_knockouts=args.max_knockouts, + biomass_id=args.biomass_rxn_id, + target_id=args.target_rxn_id, + max_time=args.max_time, + thread=args.thread, + ) + + # Processing Results + if res is not None: + if args.email and args.strategy == "ko": + logging.info("Perform gene annotation") + res = genes_annotate(model=model, df=res, email=args.email) + logging.info("Save results") + if args.output_file_csv: + save_results(res, path=args.output_file_csv, sep=",") + if args.output_file_tsv: + save_results(res, path=args.output_file_tsv, sep="\t") + + logging.info("End - simulate-deletion") + + +P_sim_del = AP_subparsers.add_parser("simulate-deletion", help=_cmd_sim_del.__doc__) +# Input +P_sim_del_input = P_sim_del.add_argument_group("Input") +P_sim_del_input.add_argument( + "--input-model-file", type=str, required=True, help="GEM model file (SBML)" +) +P_sim_del_input.add_argument( + "--input-pathway-file", + type=str, + required=False, + help="SBML file that contains an heterologous pathway", +) +P_sim_del_input.add_argument( + "--biomass-rxn-id", + type=str, + required=True, + help="Biomass reaction ID", +) +P_sim_del_input.add_argument( + "--target-rxn-id", + type=str, + help="Target reaction ID", +) +P_sim_del_input.add_argument( + "--substrate-rxn-id", + type=str, + help="Substracte reaction ID (eg. carbon source)", +) +# Output +P_sim_del_output = P_sim_del.add_argument_group("Output") +P_sim_del_output.add_argument( + "--output-file-csv", + type=str, + help="output file (csv)", +) +P_sim_del_output.add_argument( + "--output-file-tsv", + type=str, + help="output file (tsv)", +) +# Parameters - Simulation +P_sim_del_sim = P_sim_del.add_argument_group("Simulation") +P_sim_del_sim.add_argument( + "--strategy", + type=str, + choices=["ko", "ou"], + default="ko", + help="Strategy to use : ko (knocking out) or ou " + "(over-under expressing), (default: ko)", +) +P_sim_del_sim.add_argument( + "--max-knockouts", + type=int, + default=3, + required=False, + help="Number of maximum knockouts genes allowed", +) +# Parameters - Medium +P_sim_del_medium = P_sim_del.add_argument_group("Medium") +P_sim_del_medium.add_argument( + "--input-medium-file", + type=str, + help="Provide a csv or tsv file with an header as ," + ", . This file " + "provides information about metabolites (Metanetx Id) " + "to add or remove.", +) +# Parameters - Others. +P_sim_del_helper = P_sim_del.add_argument_group("Technical") +P_sim_del_helper.add_argument( + "--thread", + type=int, + default=1, + help="Number of threads to use", +) +P_sim_del_helper.add_argument( + "--seed", + type=int, + default=0, + help="Seed", +) +P_sim_del_helper.add_argument( + "--max-time", + type=int, + help="Max time to search the best combination (minutes)", +) +P_sim_del_helper.add_argument( + "--email", + type=str, + required=False, + help="Provide your email to annotate genes id with the NCBI website", +) +P_sim_del.set_defaults(func=_cmd_sim_del) + + +# Analyzing model +def _cmd_ana_mod(args): + """Analyzing model""" + logging.info("Start - analyzing-model") + # Check arguments. + if not os.path.isfile(args.input_model_file): + cmdline.abort( + AP, "Input model file does not exist: %s" % (args.input_model_file,) + ) + cmdline.check_output_file(parser=AP, path=args.output_pareto_png) + + # Load model. + logging.info("Build model") + if args.input_pathway_file: + model = build_model( + model_path=args.input_model_file, + pathway_path=args.input_pathway_file, + biomass_id=args.biomass_rxn_id, + target_id=args.target_rxn_id, + ) + else: + model = sbml.from_sbml(path=args.input_model_file) + + if model is None: + cmdline.abort(AP, "An error occured when the model was loaded") + + # Medium. + if args.input_medium_file: + logging.info("Build medium") + envcond = load_medium(path=args.input_medium_file) + model = associate_flux_env(model=model, envcond=envcond) + if model is None: + cmdline.abort( + AP, "An error occured when the pathway was merged to the model" + ) + + # Check reactions. + rxns = [args.biomass_rxn_id, args.target_rxn_id] + if args.substrate_rxn_id: + rxns.append(args.substrate_rxn_id) + for rxn in rxns: + if not utils_model.has_reaction(model=model, reaction=rxn): + cmdline.abort(AP, "Reaction is not found in the model: %s" % (rxn,)) + + # Plot pareto. + logging.info("Plot pareto") + plot_pareto( + model=model, + path=args.output_pareto_png, + biomass_rxn_id=args.biomass_rxn_id, + target_rxn_id=args.target_rxn_id, + substrate_rxn_id=args.substrate_rxn_id, + ) + + logging.info("End - analysing-model") + + +P_ana_mod = AP_subparsers.add_parser("analyzing-model", help=_cmd_ana_mod.__doc__) +# Input +P_ana_mod_input = P_ana_mod.add_argument_group("Input") +P_ana_mod_input.add_argument( + "--input-model-file", required=True, help="GEM model file (SBML)" +) +P_ana_mod_input.add_argument( + "--input-pathway-file", help="SBML file that contains an heterologous pathway" +) +P_ana_mod_input.add_argument( + "--biomass-rxn-id", required=True, help="Biomass reaction ID" +) +P_ana_mod_input.add_argument( + "--target-rxn-id", required=True, help="Target reaction ID" +) +P_ana_mod_input.add_argument( + "--substrate-rxn-id", help="Substracte reaction ID (eg. carbon source)" +) +# Output +P_ana_mod_output = P_ana_mod.add_argument_group("Output") +P_ana_mod_output.add_argument("--output-pareto-png", help="Output pareto file (PNG)") +# Parameters - Medium +P_ana_mod_medium = P_ana_mod.add_argument_group("Medium") +P_ana_mod_medium.add_argument( + "--input-medium-file", + help="Provide a csv or tsv file with an header as ," + ", . This file " + "provides information about metabolites (Metanetx Id) " + "to add or remove.", +) +P_ana_mod.set_defaults(func=_cmd_ana_mod) + + +# Version. +def print_version(_args): + """Display this program"s version""" + print(__version__) + + +P_version = AP_subparsers.add_parser("version", help=print_version.__doc__) +P_version.set_defaults(func=print_version) + + +# Help. +def print_help(): + """Display this program"s help""" + print(AP_subparsers.help) + AP.exit() + + +# Main. +def parse_args(args=None): + """Parse the command line""" + return AP.parse_args(args=args) diff --git a/src/straindesign/io/__init__.py b/src/straindesign/io/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/straindesign/io/sbml.py b/src/straindesign/io/sbml.py new file mode 100644 index 0000000..6b0e6cb --- /dev/null +++ b/src/straindesign/io/sbml.py @@ -0,0 +1,10 @@ +import cameo +import cobra + + +def from_sbml(path: str) -> cobra.Model: + return cameo.load_model(path) + + +def to_sbml(model: cobra.Model, path: str) -> None: + cobra.io.write_sbml_model(model, path) diff --git a/src/straindesign/io/tabulate.py b/src/straindesign/io/tabulate.py new file mode 100644 index 0000000..b8937f9 --- /dev/null +++ b/src/straindesign/io/tabulate.py @@ -0,0 +1,38 @@ +import csv + +import pandas as pd + + +class Tabulate(object): + """Tabulate is a class loading/saving DataFrame from CSV, TSV files""" + + @classmethod + def from_tabulate( + cls, + path: str, + sep: str = "infer", + **kwargs, + ) -> pd.DataFrame: + # Find delimiter. + if sep == "infer": + with open(path) as fid: + dialect = csv.Sniffer().sniff(fid.readline()) + sep = dialect.delimiter + # Load. + df = pd.read_csv( + path, + sep=sep, + **kwargs, + ) + return df + + @classmethod + def to_tabulate( + cls, path: str, df: pd.DataFrame, sep: str = "infer", index: bool = False + ) -> None: + if sep == "infer": + if path.endswith("tsv"): + sep = "\t" + elif path.endswith("csv"): + sep = "," + df.to_csv(path, sep=sep, index=index) diff --git a/src/straindesign/medium.py b/src/straindesign/medium.py index 98c4c93..26c74bd 100644 --- a/src/straindesign/medium.py +++ b/src/straindesign/medium.py @@ -25,11 +25,11 @@ def load_medium(path: str) -> dict: return envcond -def associate_flux_env(model: Model, envcond: dict, logger: logging.Logger) -> Model: +def associate_flux_env(model: Model, envcond: dict) -> Model: for reaction_id, bounds in envcond.items(): reaction = model.reactions.get_by_id(reaction_id) if reaction is None: - logger.error("Reaction: %s not found in the model" % (reaction_id,)) + logging.error("Reaction: %s not found in the model" % (reaction_id,)) return None reaction.bounds = bounds return model diff --git a/src/straindesign/metabolic.py b/src/straindesign/metabolic.py index 8d56019..732ad06 100644 --- a/src/straindesign/metabolic.py +++ b/src/straindesign/metabolic.py @@ -1,13 +1,43 @@ import logging -from typing import Optional +from typing import List, Optional +import cameo +import cobra import pandas as pd +import plotly from cameo.flux_analysis.simulation import lmoma from cameo.strain_design.deterministic.linear_programming import OptKnock from cameo.strain_design.heuristic.evolutionary_based import OptGene +from cameo.visualization.plotting.with_plotly import PlotlyPlotter from cobra.core.model import Model +def reduce_model(model: cobra.Model, genes: List[str]): + # Check if gene is in the model. + model_gene_ids = [x.id for x in model.genes] + sgenes = set(genes) + genes = list(genes) + for gene in sgenes: + if gene not in model_gene_ids: + logging.warning( + "Gene: %s not found in the model, it's a Gene ID provided ?" % (gene,) + ) + genes.remove(gene) + # Remove genes. + number_of_reactions = len(model.reactions) + cobra.manipulation.remove_genes(model, gene_list=genes, remove_reactions=True) + # Clean model, functions highly dependent of the cobra's version. + # model, reactions = cobra.manipulation.prune_unused_reactions(model=model) + # model, metabolites = cobra.manipulation.delete.prune_unused_metabolites(model=model) + + logging.info("Number of Genes deleted: %s" % (len(genes),)) + logging.info( + "Number of Reactions deleted: %s" + % (number_of_reactions - len(model.reactions),) + ) + return model + + def gene_ko( model: Model, max_knockouts: int, @@ -15,7 +45,6 @@ def gene_ko( target_id: str, substrate_id: str, max_time: Optional[int], - logger: logging.Logger, seed: int, thread: int = 1, ) -> pd.DataFrame: @@ -50,7 +79,7 @@ def gene_ko( try: df = results.data_frame except Exception: - logger.warning("An error occurred, maybe there is no solution") + logging.warning("An error occurred, maybe there is no solution") return df @@ -60,7 +89,6 @@ def gene_ou( biomass_id: str, target_id: str, max_time: Optional[int], - logger: logging.Logger, thread: int = 1, ) -> pd.DataFrame: optknock = OptKnock(model, fraction_of_optimum=0.1) @@ -88,5 +116,31 @@ def gene_ou( try: df = results.data_frame except Exception: - logger.warning("An error occurred, maybe there is no solution") + logging.warning("An error occurred, maybe there is no solution") return df + + +def plot_pareto( + model: cobra.Model, + path: str, + biomass_rxn_id: str, + target_rxn_id: str, + substrate_rxn_id: Optional[str] = None, +) -> None: + # Init. + abp = cameo.visualization.plotting.abstract.AbstractPlotter() + grid = abp.grid() + plotter = PlotlyPlotter() + + # Create graph. + result = cameo.phenotypic_phase_plane( + model, + variables=[model.reactions.get_by_id(biomass_rxn_id)], + objective=model.reactions.get_by_id(target_rxn_id), + source=substrate_rxn_id, + ) + result.plot(plotter, grid=grid) + + # Export graph. + fig = dict(data=grid.plots[0].data[0], layout=grid.plots[0].layout) + plotly.io.write_image(fig=fig, file=path, format="png", engine="kaleido") diff --git a/src/straindesign/preprocess.py b/src/straindesign/preprocess.py index 1d9703e..6c8a242 100644 --- a/src/straindesign/preprocess.py +++ b/src/straindesign/preprocess.py @@ -1,12 +1,65 @@ import ast +import itertools import logging import time -from typing import Dict +from typing import Dict, List import pandas as pd from Bio import Entrez from cameo import load_model from cobra.core.model import Model +from straindesign._version import __app_name__ +from straindesign.io import tabulate + + +def load_straindesign_simulate_deletion(path: str, strategy: str) -> List[str]: + df = tabulate.Tabulate.from_tabulate(path=path) + header = [ + "reactions", + "genes", + "size", + "fva_min", + "fva_max", + "target_flux", + "biomass_flux", + "yield", + "fitness", + ] + if df.columns.to_list() != header: + raise ValueError( + "File: %s has a header not corresponding to the output of % simulate-deletion commands" + % (path, __app_name__) + ) + if df.empty: + logging.warning("File: %s is empty, no gene found" % (path,)) + return [] + + # Format df. + df["genes"] = df["genes"].apply(lambda x: ast.literal_eval(x)) + df["size"] = df["genes"].apply(lambda x: len(x)) + + # Sort by yield by default. + df.sort_values( + by=["yield", "size", "biomass_flux", "target_flux"], + ascending=[False, True, False, False], + inplace=True, + ) + if strategy == "gene-max": + df.sort_values( + by=["size", "yield", "biomass_flux", "target_flux"], + ascending=[False, False, False, False], + inplace=True, + ) + elif strategy == "gene-min": + df.sort_values( + by=["size", "yield", "biomass_flux", "target_flux"], + ascending=[True, False, False, False], + inplace=True, + ) + df.reset_index(inplace=True, drop=True) + iter_genes = itertools.chain.from_iterable(df.loc[0, "genes"]) + genes = sorted(list(set(iter_genes))) + return genes def build_model( @@ -14,29 +67,28 @@ def build_model( pathway_path: str, biomass_id: str, target_id: str, - logger: logging.Logger, ): - logger.info("Load model") + logging.info("Load model") model = load_model(model_path) if pathway_path: - logger.info("Load pathway") + logging.info("Load pathway") pathway_model = load_model(pathway_path) - logger.info("Merge model and pathway") + logging.info("Merge model and pathway") model.merge(pathway_model, inplace=True) # Check if reactions are in the model reactions_id = [x.id for x in model.reactions] - logger.info("Check if main objective is in the model") + logging.info("Check if main objective is in the model") if biomass_id not in reactions_id: - logger.error("Reaction not found in the model: %s" % (biomass_id,)) + logging.error("Reaction not found in the model: %s" % (biomass_id,)) return None - logger.info("Check if target reaction is in the model") + logging.info("Check if target reaction is in the model") if target_id not in reactions_id: - logger.error("Reaction not found in the model: %s" % (target_id,)) + logging.error("Reaction not found in the model: %s" % (target_id,)) return None - logger.info("Set objective") + logging.info("Set objective") model.objective = { model.reactions.get_by_id(biomass_id): 1.0, model.reactions.get_by_id(target_id): 0.5, @@ -49,7 +101,6 @@ def genes_annotate( model: Model, df: pd.DataFrame, email: str, - logger: logging.Logger, ) -> pd.DataFrame: if df.empty: @@ -94,7 +145,7 @@ def genes_annotate( labels_groups.append("(%s)" % (",".join(labels),)) df.at[ix, "genes_annotation"] = ",".join(labels_groups) if is_ncbi_error: - logger.warning("NCBI annotation failing for some items") + logging.warning("NCBI annotation failing for some items") return df diff --git a/src/straindesign/utils/__init__.py b/src/straindesign/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/straindesign/utils/cmd.py b/src/straindesign/utils/cmd.py new file mode 100644 index 0000000..355f6d8 --- /dev/null +++ b/src/straindesign/utils/cmd.py @@ -0,0 +1,26 @@ +import logging +import subprocess +from typing import List + + +def run(args: List[str], show_output: bool = True) -> subprocess.CompletedProcess: + """Run a command line. + + Parameters + ---------- + args: List[str] + A list of argument + show_output: bool (default: True) + Output command line + + Return + ------ + subprocess.CompletedProcess + Return result obtained with subprocess + """ + ret = subprocess.run(args, capture_output=True, encoding="utf8") + if show_output and ret.stdout is not None: + logging.info(ret.stdout) + if show_output and ret.stderr is not None: + logging.warning(ret.stderr) + return ret diff --git a/src/straindesign/utils/cmdline.py b/src/straindesign/utils/cmdline.py new file mode 100644 index 0000000..b1c8632 --- /dev/null +++ b/src/straindesign/utils/cmdline.py @@ -0,0 +1,30 @@ +import argparse +import os + + +def abort(parser: argparse.ArgumentParser, msg: str = ""): + """Abort the program + + Parameters + ---------- + parser: + The parser to use + msg: str + The message to throw from the parser + + Return + ------ + """ + parser.error(msg) + + +def check_output_file( + parser: argparse.ArgumentParser, path: str, overwrite: bool = False +) -> None: + msg = None + if path and not os.path.isdir(os.path.dirname(os.path.abspath(path))): + msg = "Outdir does not exists: %s" % (path,) + if overwrite and os.path.isfile(path): + msg = "File exists: %s" % (path,) + if msg: + abort(parser=parser, msg=msg) diff --git a/src/straindesign/utils/model.py b/src/straindesign/utils/model.py new file mode 100644 index 0000000..0b3cd60 --- /dev/null +++ b/src/straindesign/utils/model.py @@ -0,0 +1,15 @@ +import cobra + + +def count_gene(model: cobra.Model) -> int: + return len(model.genes) + + +def count_reaction(model: cobra.Model) -> int: + return len(model.reactions) + + +def has_reaction(model: cobra.Model, reaction: str) -> bool: + if reaction in [x.id for x in model.reactions]: + return True + return False diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/dataset/gene/gene.empty.csv b/tests/dataset/gene/gene.empty.csv new file mode 100644 index 0000000..db10809 --- /dev/null +++ b/tests/dataset/gene/gene.empty.csv @@ -0,0 +1 @@ +reactions,genes,size,fva_min,fva_max,target_flux,biomass_flux,yield,fitness diff --git a/tests/dataset/gene/gene.value_error.csv b/tests/dataset/gene/gene.value_error.csv new file mode 100644 index 0000000..3d0d13e --- /dev/null +++ b/tests/dataset/gene/gene.value_error.csv @@ -0,0 +1 @@ +reactions,genes,size,fva_min,fva_max,target_flux,biomass_flux,yield diff --git a/tests/dataset/gene/simulate_deletion.butanol.iAF1260.csv b/tests/dataset/gene/simulate_deletion.butanol.iAF1260.csv new file mode 100644 index 0000000..bdb52f8 --- /dev/null +++ b/tests/dataset/gene/simulate_deletion.butanol.iAF1260.csv @@ -0,0 +1,6 @@ +reactions,genes,size,fva_min,fva_max,target_flux,biomass_flux,yield,fitness +"('ATPS4rpp',)","(('b3735',), ('b3734',), ('b3736',), ('b3731',), ('b3732',))",1,0.0,0.00641204819277442,0.0,0.2199110076152308,0.0,0.0 +"('TPI',)","(('b3919',),)",1,0.0,0.006455168431418056,0.0,0.0,0.0,0.0 +"('ALAt2pp', 'DSERt2pp', 'DALAt2pp', 'BALAt2pp', 'TPI')","(('b4208', 'b3919'),)",2,0.0,0.006455167733784362,0.0,0.0,0.0,0.0 +"('OPHHX', 'TPI')","(('b3835', 'b3919'),)",2,0.0,0.006455168423924535,0.0,0.0,0.0,0.0 +"('TPI', 'MTHFC', 'MTHFD')","(('b0529', 'b3919'), ('b3919', 'b0529'))",2,0.0,0.006455095014414326,0.0,0.0,1.2,0.0 diff --git a/tests/functional/test_analyzing_model.py b/tests/functional/test_analyzing_model.py new file mode 100644 index 0000000..67ed526 --- /dev/null +++ b/tests/functional/test_analyzing_model.py @@ -0,0 +1,77 @@ +import imghdr +import sys +import tempfile + +from straindesign._version import __app_name__ +from straindesign.utils import cmd +from tests.main_test import Main_test + + +class TestAnalyzingModel(Main_test): + def test_base(self): + with tempfile.NamedTemporaryFile() as fd: + args = ["python", "-m", __app_name__, "analyzing-model"] + args += ["--input-model-file", self.model_ecoli_gz] + args += ["--biomass-rxn-id", "BIOMASS_Ec_iAF1260_core_59p81M"] + args += ["--target-rxn-id", "EX_tyrp_e"] + args += ["--output-pareto-png", fd.name] + + ret = cmd.run(args) + if ret.returncode > 0: + print(ret.stderr) + print(ret.stdout) + sys.exit(1) + + self.assertEqual(imghdr.what(fd.name), "png") + + def test_medium(self): + with tempfile.NamedTemporaryFile() as fd: + args = ["python", "-m", __app_name__, "analyzing-model"] + args += ["--input-model-file", self.model_ecoli_gz] + args += ["--biomass-rxn-id", "BIOMASS_Ec_iAF1260_core_59p81M"] + args += ["--target-rxn-id", "EX_tyrp_e"] + args += ["--output-pareto-png", fd.name] + args += ["--input-medium-file", self.medium_butanol_csv] + + ret = cmd.run(args) + if ret.returncode > 0: + print(ret.stderr) + print(ret.stdout) + sys.exit(1) + + self.assertEqual(imghdr.what(fd.name), "png") + + def test_pathway(self): + with tempfile.NamedTemporaryFile() as fd: + args = ["python", "-m", __app_name__, "analyzing-model"] + args += ["--input-model-file", self.model_ecoli_gz] + args += ["--biomass-rxn-id", "BIOMASS_Ec_iAF1260_core_59p81M"] + args += ["--target-rxn-id", "EX_tyrp_e"] + args += ["--output-pareto-png", fd.name] + args += ["--input-medium-file", self.medium_butanol_csv] + args += ["--input-pathway-file", self.pathway_butanol] + + ret = cmd.run(args) + if ret.returncode > 0: + print(ret.stderr) + print(ret.stdout) + sys.exit(1) + + self.assertEqual(imghdr.what(fd.name), "png") + + def test_substrate(self): + with tempfile.NamedTemporaryFile() as fd: + args = ["python", "-m", __app_name__, "analyzing-model"] + args += ["--input-model-file", self.model_ecoli_gz] + args += ["--biomass-rxn-id", "BIOMASS_Ec_iAF1260_core_59p81M"] + args += ["--target-rxn-id", "EX_tyrp_e"] + args += ["--output-pareto-png", fd.name] + args += ["--substrate-rxn-id", "EX_glc__D_e"] + + ret = cmd.run(args) + if ret.returncode > 0: + print(ret.stderr) + print(ret.stdout) + sys.exit(1) + + self.assertEqual(imghdr.what(fd.name), "png") diff --git a/tests/functional/test_reduce_model.py b/tests/functional/test_reduce_model.py new file mode 100644 index 0000000..a0cd21f --- /dev/null +++ b/tests/functional/test_reduce_model.py @@ -0,0 +1,60 @@ +import sys +import tempfile + +import cobra +from straindesign._version import __app_name__ +from straindesign.io import sbml +from straindesign.utils import cmd +from straindesign.utils import model as utils_model +from tests.main_test import Main_test + + +class TestReduceModel(Main_test): + def test_one(self): + # Delete: 2 genes, 3 reactions + model_ecoli = sbml.from_sbml(path=self.model_ecoli_gz) + nb_gene = utils_model.count_gene(model=model_ecoli) + nb_reaction = utils_model.count_reaction(model=model_ecoli) + + with tempfile.NamedTemporaryFile() as fd: + args = ["python", "-m", __app_name__, "reduce-model"] + args += ["--input-model-file", self.model_ecoli_gz] + args += ["--input-straindesign-file", self.gene_butanol] + args += ["--output-file-sbml", fd.name] + + ret = cmd.run(args) + if ret.returncode > 0: + print(ret.stderr) + print(ret.stdout) + sys.exit(1) + + model, errors = cobra.io.validate_sbml_model(fd.name) + self.assertIsNot(model, None) + + self.assertEqual(nb_gene, len(model.genes) + 2) + self.assertEqual(nb_reaction, len(model.reactions) + 3) + + def test_two(self): + # Delete: 3 genes, 7 reactions + model_ecoli = sbml.from_sbml(path=self.model_ecoli_gz) + nb_gene = utils_model.count_gene(model=model_ecoli) + nb_reaction = utils_model.count_reaction(model=model_ecoli) + + with tempfile.NamedTemporaryFile() as fd: + args = ["python", "-m", __app_name__, "reduce-model"] + args += ["--input-model-file", self.model_ecoli_gz] + args += ["--input-straindesign-file", self.gene_butanol] + args += ["--input-gene-str", "b4208", "b4208", "b3919"] + args += ["--output-file-sbml", fd.name] + + ret = cmd.run(args) + if ret.returncode > 0: + print(ret.stderr) + print(ret.stdout) + sys.exit(1) + + model, errors = cobra.io.validate_sbml_model(fd.name) + self.assertIsNot(model, None) + + self.assertEqual(nb_gene, len(model.genes) + 3) + self.assertEqual(nb_reaction, len(model.reactions) + 7) diff --git a/tests/test_software.py b/tests/functional/test_simulate_deletion.py similarity index 88% rename from tests/test_software.py rename to tests/functional/test_simulate_deletion.py index c056cf2..fe93c36 100644 --- a/tests/test_software.py +++ b/tests/functional/test_simulate_deletion.py @@ -1,26 +1,19 @@ import csv import os -import subprocess import sys import tempfile -from main_test import Main_test from straindesign._version import __app_name__ +from straindesign.utils import cmd +from tests.main_test import Main_test -class Test_software(Main_test): - @staticmethod - def launch(args): - if isinstance(args, str): - args = args.split() - ret = subprocess.run(args, capture_output=True, encoding="utf8") - return ret - +class TestSimulateDeletion(Main_test): def test_software_butanol(self): # Be careful: can not test gene annotation into # worflows running simultaneously with tempfile.NamedTemporaryFile(delete=False) as fd: - args = ["python", "-m", __app_name__] + args = ["python", "-m", __app_name__, "simulate-deletion"] args += ["--input-model-file", self.model_ecoli_gz] args += ["--input-pathway-file", self.pathway_butanol] args += ["--biomass-rxn-id", "BIOMASS_Ec_iAF1260_core_59p81M"] @@ -29,10 +22,11 @@ def test_software_butanol(self): args += ["--output-file-csv", fd.name] args += ["--strategy", "ko"] args += ["--max-knockouts", "3"] + args += ["--max-time", "10"] args += ["--input-medium-file", self.medium_butanol_csv] args += ["--thread", "1"] - ret = Test_software.launch(args) + ret = cmd.run(args) if ret.returncode > 0: print(ret.stderr) print(ret.stdout) @@ -43,11 +37,11 @@ def test_software_butanol(self): self.assertGreater(len(lines), 1) os.remove(fd.name) - def test_software_butanol_light(self): + def test_software_butanol_light_ko(self): # Be careful: can not test gene annotation into # worflows running simultaneously with tempfile.NamedTemporaryFile(delete=False) as fd: - args = ["python", "-m", __app_name__] + args = ["python", "-m", __app_name__, "simulate-deletion"] args += ["--input-model-file", self.model_ecoli_core] args += ["--input-pathway-file", self.pathway_butanol] args += ["--biomass-rxn-id", "BIOMASS_Ecoli_core_w_GAM"] @@ -60,7 +54,7 @@ def test_software_butanol_light(self): args += ["--input-medium-file", self.medium_butanol_tsv] args += ["--thread", "1"] - ret = Test_software.launch(args) + ret = cmd.run(args) if ret.returncode > 0: print(ret.stderr) print(ret.stdout) @@ -80,7 +74,7 @@ def test_software_butanol_iml1515(self): # Be careful: can not test gene annotation into # worflows running simultaneously with tempfile.NamedTemporaryFile(delete=False) as fd: - args = ["python", "-m", __app_name__] + args = ["python", "-m", __app_name__, "simulate-deletion"] args += ["--input-model-file", self.model_ecoli_iml1515] args += ["--input-pathway-file", self.pathway_butanol] args += ["--biomass-rxn-id", "biomass"] @@ -93,7 +87,7 @@ def test_software_butanol_iml1515(self): args += ["--input-medium-file", self.medium_butanol_csv] args += ["--thread", "1"] - ret = Test_software.launch(args) + ret = cmd.run(args) if ret.returncode > 0: print(ret.stderr) print(ret.stdout) @@ -113,7 +107,7 @@ def test_software_galaxy(self): # Be careful: can not test gene annotation into # worflows running simultaneously with tempfile.NamedTemporaryFile(delete=False) as fd: - args = ["python", "-m", __app_name__] + args = ["python", "-m", __app_name__, "simulate-deletion"] args += ["--input-model-file", self.model_ecoli_gz] args += ["--input-pathway-file", self.pathway_butanol] args += ["--biomass-rxn-id", "BIOMASS_Ec_iAF1260_core_59p81M"] @@ -126,7 +120,7 @@ def test_software_galaxy(self): args += ["--input-medium-file", self.medium_butanol_tsv] args += ["--thread", "1"] - ret = Test_software.launch(args) + ret = cmd.run(args) if ret.returncode > 0: print(ret.stderr) print(ret.stdout) @@ -146,7 +140,7 @@ def test_software_butanol_light_ou(self): # Be careful: can not test gene annotation into # worflows running simultaneously with tempfile.NamedTemporaryFile(delete=False) as fd: - args = ["python", "-m", __app_name__] + args = ["python", "-m", __app_name__, "simulate-deletion"] args += ["--input-model-file", self.model_ecoli_gz] args += ["--input-pathway-file", self.pathway_butanol] args += ["--biomass-rxn-id", "BIOMASS_Ec_iAF1260_core_59p81M"] @@ -155,10 +149,11 @@ def test_software_butanol_light_ou(self): args += ["--output-file-csv", fd.name] args += ["--strategy", "ou"] args += ["--max-knockouts", "3"] + args += ["--max-time", "10"] args += ["--input-medium-file", self.medium_butanol_tsv] args += ["--thread", "1"] - ret = Test_software.launch(args) + ret = cmd.run(args) if ret.returncode > 0: print(ret.stderr) print(ret.stdout) diff --git a/tests/main_test.py b/tests/main_test.py index 0714b1a..1723d43 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -17,3 +17,8 @@ class Main_test(unittest.TestCase): # Pathway. pathway_path = os.path.join(dataset_path, "pathway") pathway_butanol = os.path.join(pathway_path, "butanol.xml") + # Gene. + gene_path = os.path.join(dataset_path, "gene") + gene_butanol = os.path.join(gene_path, "simulate_deletion.butanol.iAF1260.csv") + gene_value_error = os.path.join(gene_path, "gene.value_error.csv") + gene_empty = os.path.join(gene_path, "gene.empty.csv") diff --git a/tests/test_medium.py b/tests/unit/test_medium.py similarity index 90% rename from tests/test_medium.py rename to tests/unit/test_medium.py index 22d421f..4935509 100644 --- a/tests/test_medium.py +++ b/tests/unit/test_medium.py @@ -1,12 +1,11 @@ -import logging from collections import OrderedDict from cameo import load_model -from main_test import Main_test from straindesign.medium import associate_flux_env, load_medium +from tests.main_test import Main_test -class Test_functional(Main_test): +class TestMedium(Main_test): def test_load_medium(self): medium = load_medium(self.medium_butanol_csv) theorical_medium = OrderedDict( @@ -23,7 +22,6 @@ def test_associate_flux_env(self): associate_flux_env( model=model, envcond=medium, - logger=logging.getLogger(), ) self.assertEqual(model.reactions.get_by_id("EX_glc__D_e").bounds, (-10.0, 10.0)) self.assertEqual(model.reactions.get_by_id("EX_o2_e").bounds, (-5.0, 5.0)) diff --git a/tests/test_preprocess.py b/tests/unit/test_preprocess.py similarity index 64% rename from tests/test_preprocess.py rename to tests/unit/test_preprocess.py index 1ac6fa6..201cbc6 100644 --- a/tests/test_preprocess.py +++ b/tests/unit/test_preprocess.py @@ -1,10 +1,38 @@ -import logging +from straindesign.preprocess import ( + build_model, + load_straindesign_simulate_deletion, +) +from tests.main_test import Main_test -from main_test import Main_test -from straindesign.preprocess import build_model +class TestPreprocess(Main_test): + def test_load_straindesign_simulate_deletion(self): + # Test 1 + genes = load_straindesign_simulate_deletion( + path=self.gene_butanol, strategy="yield-max" + ) + self.assertEqual(genes, ["b0529", "b3919"]) + # Test 2 + genes = load_straindesign_simulate_deletion( + path=self.gene_butanol, strategy="gene-max" + ) + self.assertEqual(genes, ["b3731", "b3732", "b3734", "b3735", "b3736"]) + # Test 3 + genes = load_straindesign_simulate_deletion( + path=self.gene_butanol, strategy="gene-min" + ) + self.assertEqual(genes, ["b3919"]) + # Test 4 + genes = load_straindesign_simulate_deletion( + path=self.gene_empty, strategy="gene-min" + ) + self.assertEqual(genes, []) + # Test 5 + with self.assertRaises(ValueError): + load_straindesign_simulate_deletion( + path=self.gene_value_error, strategy="gene-min" + ) -class Test_functional(Main_test): def test_build_model(self): # Test 1 model = build_model( @@ -12,7 +40,6 @@ def test_build_model(self): pathway_path=None, biomass_id="EX_glc__D_e", target_id="BIOMASS_Ec_iAF1260_core_59p81M", - logger=logging.getLogger(), ) data = model.objective.to_json() b_ix, t_ix = 0, 0 @@ -31,7 +58,6 @@ def test_build_model(self): pathway_path=self.pathway_butanol, biomass_id="BIOMASS_Ec_iAF1260_core_59p81M", target_id="EX_1btol_e", - logger=logging.getLogger(), ) data = model.objective.to_json() b_ix, t_ix = 0, 0 @@ -49,7 +75,6 @@ def test_build_model(self): pathway_path=self.pathway_butanol, biomass_id="test", target_id="EX_1btol_e", - logger=logging.getLogger(), ) self.assertIs(model, None) # Test 4 @@ -58,6 +83,5 @@ def test_build_model(self): pathway_path=self.pathway_butanol, biomass_id="BIOMASS_Ec_iAF1260_core_59p81M", target_id="test", - logger=logging.getLogger(), ) self.assertIs(model, None)