From 5c4595da292d3f267d5d9701a72186e3ed84dedb Mon Sep 17 00:00:00 2001 From: guillaume-gricourt Date: Thu, 25 Aug 2022 10:43:04 +0000 Subject: [PATCH 1/4] doc(changelog): update --- CHANGELOG.md | 4 ++++ src/rpfbagr/_version.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f79ccda..b0ab398 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## [2.2.1](https://github.com/brsynth/rpfbagr/tree/2.2.1) (2022-08-25) + +[Full Changelog](https://github.com/brsynth/rpfbagr/compare/2.2.0...2.2.1) + ## [2.2.0](https://github.com/brsynth/rpfbagr/tree/2.2.0) (2022-08-24) [Full Changelog](https://github.com/brsynth/rpfbagr/compare/2.1.0...2.2.0) diff --git a/src/rpfbagr/_version.py b/src/rpfbagr/_version.py index 6ada3c6..a1e32be 100644 --- a/src/rpfbagr/_version.py +++ b/src/rpfbagr/_version.py @@ -1,2 +1,2 @@ __app_name__ = "rpfbagr" -__version__ = "2.2.0" +__version__ = "2.2.1" From 9224a583087c704f2ace56d90dbd2abf63f125f7 Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Fri, 26 Aug 2022 14:07:19 +0000 Subject: [PATCH 2/4] fix(rpfbagr): annotate, prevent error due to NCBI request --- src/rpfbagr/__main__.py | 2 +- src/rpfbagr/preprocess.py | 45 ++++++++++++++++++++++++--------------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/src/rpfbagr/__main__.py b/src/rpfbagr/__main__.py index a1218ea..2ecc381 100644 --- a/src/rpfbagr/__main__.py +++ b/src/rpfbagr/__main__.py @@ -225,7 +225,7 @@ def main(): if res is not None: if args.email and args.strategy == "ko": logger.info("Perform gene annotation") - res = genes_annotate(model=model, df=res, email=args.email) + res = genes_annotate(model=model, df=res, email=args.email, logger=logger) logger.info("Save results") if args.output_file_csv: save_results(res, path=args.output_file_csv, sep=",") diff --git a/src/rpfbagr/preprocess.py b/src/rpfbagr/preprocess.py index f85c1ea..1d9703e 100644 --- a/src/rpfbagr/preprocess.py +++ b/src/rpfbagr/preprocess.py @@ -1,5 +1,6 @@ import ast import logging +import time from typing import Dict import pandas as pd @@ -44,10 +45,16 @@ def build_model( return model -def genes_annotate(model: Model, df: pd.DataFrame, email: str) -> pd.DataFrame: +def genes_annotate( + model: Model, + df: pd.DataFrame, + email: str, + logger: logging.Logger, +) -> pd.DataFrame: if df.empty: return df + is_ncbi_error = False Entrez.email = email cache: Dict[str, str] = {} for ix in df.index: @@ -64,26 +71,30 @@ def genes_annotate(model: Model, df: pd.DataFrame, email: str) -> pd.DataFrame: model_gene = model.genes.get_by_id(gene) ncbi_gene = model_gene.annotation.get("ncbigene", "") if gene not in cache.keys(): - if ncbi_gene == "": - label = gene - else: - hd = Entrez.esummary(db="gene", id=ncbi_gene) - rec = Entrez.read(hd, validate=False) - rec = rec.get("DocumentSummarySet", {}) - rec = rec.get("DocumentSummary", []) - label = gene - if len(rec) > 0: - name = rec[0].get("Name", "") - name = name.replace(",", "") - desc = rec[0].get("Description", "") - desc = desc.replace(",", "") - syn = rec[0].get("OtherAliases", "") - syn = syn.replace(",", "") - label = "%s=%s - %s" % (name, syn, desc) + label = gene + if ncbi_gene != "": + try: + hd = Entrez.esummary(db="gene", id=ncbi_gene) + rec = Entrez.read(hd, validate=False) + rec = rec.get("DocumentSummarySet", {}) + rec = rec.get("DocumentSummary", []) + if len(rec) > 0: + name = rec[0].get("Name", "") + name = name.replace(",", "") + desc = rec[0].get("Description", "") + desc = desc.replace(",", "") + syn = rec[0].get("OtherAliases", "") + syn = syn.replace(",", "") + label = "%s=%s - %s" % (name, syn, desc) + time.sleep(2) + except Exception: + is_ncbi_error = True cache[gene] = label labels.append(cache[gene]) labels_groups.append("(%s)" % (",".join(labels),)) df.at[ix, "genes_annotation"] = ",".join(labels_groups) + if is_ncbi_error: + logger.warning("NCBI annotation failing for some items") return df From 32429324c4cbd74add5c2dadba37f8c2538ce299 Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Fri, 26 Aug 2022 14:11:37 +0000 Subject: [PATCH 3/4] fix(rpfbagr): arg "username" introduced to prevent misusing of "email" arg" --- src/rpfbagr/__main__.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/rpfbagr/__main__.py b/src/rpfbagr/__main__.py index 2ecc381..21a628b 100644 --- a/src/rpfbagr/__main__.py +++ b/src/rpfbagr/__main__.py @@ -123,6 +123,12 @@ def main(): required=False, help="Provide your email to annotate genes id with the NCBI website", ) + parser_input.add_argument( + "--username", + type=str, + required=False, + help="Required by Cameo. Fill it if the OS environment has no USERNAME value.", + ) # Compute args = parser.parse_args() @@ -161,12 +167,12 @@ def main(): try: getpass.getuser() except Exception as e: - if args.email: - os.environ["USERNAME"] = args.email + if args.username: + os.environ["USERNAME"] = args.username else: logger.error(str(e)) logger.error( - "A login name must be provided for Cameo with --email argument" + "A login name must be provided for Cameo with --username argument" ) parser.exit(1) From 965a2d4e2f56a376fb364c7404f5f5d6bf75c6da Mon Sep 17 00:00:00 2001 From: Guillaume Gricourt Date: Fri, 26 Aug 2022 15:28:10 +0000 Subject: [PATCH 4/4] fix(rpfbagr): rm env var management --- src/rpfbagr/__main__.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/src/rpfbagr/__main__.py b/src/rpfbagr/__main__.py index 21a628b..9316bc9 100644 --- a/src/rpfbagr/__main__.py +++ b/src/rpfbagr/__main__.py @@ -1,11 +1,8 @@ import argparse -import getpass import logging import os import sys -import tempfile -os.environ["XDG_CACHE_HOME"] = tempfile.TemporaryDirectory().name from rpfbagr.medium import associate_flux_env, load_medium from rpfbagr.metabolic import gene_ko, gene_ou from rpfbagr.preprocess import build_model, genes_annotate, save_results @@ -123,12 +120,6 @@ def main(): required=False, help="Provide your email to annotate genes id with the NCBI website", ) - parser_input.add_argument( - "--username", - type=str, - required=False, - help="Required by Cameo. Fill it if the OS environment has no USERNAME value.", - ) # Compute args = parser.parse_args() @@ -164,18 +155,6 @@ def main(): logger.debug("Create out directory: %s") os.makedirs(os.path.dirname(args.output_file_tsv)) - try: - getpass.getuser() - except Exception as e: - if args.username: - os.environ["USERNAME"] = args.username - else: - logger.error(str(e)) - logger.error( - "A login name must be provided for Cameo with --username argument" - ) - parser.exit(1) - # Load model logger.info("Build model") model = build_model(