From ff81d6d6684c6c9b2cce001d2db9ebe30c14e58f Mon Sep 17 00:00:00 2001 From: Roberto Vera Alvarez Date: Fri, 16 Feb 2024 09:37:42 -0500 Subject: [PATCH] Updating docs --- setup.py | 1 + src/gtax/gtax_main.py | 35 ++++++++++++++++++++++++++++++++++- src/gtax/taxonomy_blast.py | 5 +++-- 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 8d3cca8..451d232 100644 --- a/setup.py +++ b/setup.py @@ -54,6 +54,7 @@ def readme(): }, entry_points={ 'console_scripts': [ + 'gtax = gtax.gtax_main:gtax', 'taxonomy_pickle = gtax.taxonomy_main:taxonomy_pickle', 'gtax_database = gtax.gtax_main:gtax_database', 'filter_metadata_zip = gtax.gtax_main:filter_metadata_zip', diff --git a/src/gtax/gtax_main.py b/src/gtax/gtax_main.py index 50d9f2c..68cd200 100644 --- a/src/gtax/gtax_main.py +++ b/src/gtax/gtax_main.py @@ -69,9 +69,42 @@ def filter_metadata_zip(): catalog.append(c) d['assemblies'] = catalog fjson_out.write(json.dumps(d, indent=2)) - with zip.open('ncbi_dataset/fetch.txt') as fin, open('{}/ncbi_dataset/fetch.txt'.format(db), 'w') as fout: + with zip.open('ncbi_dataset/fetch.txt') as fin, open('{}/ncbi_dataset/fetch.txt'.format(db), + 'w') as fout: for line in fin.readlines(): line = line.decode("utf-8") f = os.path.dirname(line.split('\t')[2].replace('data/', '')) if f in assemblies: fout.write(line) + + +def gtax(): + import argparse + from argparse import RawTextHelpFormatter + from gtax import __version__ + + epilog = ''' + For more information see https://gtax.readthedocs.io/en/latest/index.html + + Available programs: + + + filter_metadata_zip: Read the zipped metadata file for each superkingdom and create the folders + for hydration with the datasets command. + gtax_database: Creates the FASTA, indexes and TaxID maps for the databases. + taxonomy_blast: Process BLAST output to find contamination. + + Cite: + + Alvarez, R.V., Landsman, D. GTax: improving de novo transcriptome assembly by removing foreign RNA + contamination. Genome Biol 25, 12 (2024). https://doi.org/10.1186/s13059-023-03141-2 + ''' + parser = argparse.ArgumentParser(prog='gtax', + description='GTax python package provides tools for the creation ' + 'of the GTax sequence-based database.', + epilog=epilog, + formatter_class=RawTextHelpFormatter) + + parser.add_argument("-v", "--version", action="version", version=__version__) + args = parser.parse_args() + parser.print_help() diff --git a/src/gtax/taxonomy_blast.py b/src/gtax/taxonomy_blast.py index f8cf617..82db8bf 100644 --- a/src/gtax/taxonomy_blast.py +++ b/src/gtax/taxonomy_blast.py @@ -24,14 +24,15 @@ def transcript_contamination(filename, blast_columns, tax_ids, taxonomy): node = node[0]['name_'] else: node = str(df['staxid'].iloc[0]) - data.append([g, True, node, df['evalue'].iloc[0], df['saccver'].iloc[0], df['staxid'].iloc[0]]) + data.append([g, True, node, df['evalue'].iloc[0], df['sseqid'].iloc[0], df['staxid'].iloc[0]]) else: data.append([g, False, False, False, False, False ]) return data def taxonomy_blast(): - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(prog='taxonomy_blast', + description='This tools process BLAST output to find contamination.') parser.add_argument('--threads', help='No. of threads', required=True)