Skip to content

Commit

Permalink
Updating docs
Browse files Browse the repository at this point in the history
  • Loading branch information
Roberto Vera Alvarez committed Feb 16, 2024
1 parent a92caa7 commit ff81d6d
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 3 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def readme():
},
entry_points={
'console_scripts': [
'gtax = gtax.gtax_main:gtax',
'taxonomy_pickle = gtax.taxonomy_main:taxonomy_pickle',
'gtax_database = gtax.gtax_main:gtax_database',
'filter_metadata_zip = gtax.gtax_main:filter_metadata_zip',
Expand Down
35 changes: 34 additions & 1 deletion src/gtax/gtax_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,42 @@ def filter_metadata_zip():
catalog.append(c)
d['assemblies'] = catalog
fjson_out.write(json.dumps(d, indent=2))
with zip.open('ncbi_dataset/fetch.txt') as fin, open('{}/ncbi_dataset/fetch.txt'.format(db), 'w') as fout:
with zip.open('ncbi_dataset/fetch.txt') as fin, open('{}/ncbi_dataset/fetch.txt'.format(db),
'w') as fout:
for line in fin.readlines():
line = line.decode("utf-8")
f = os.path.dirname(line.split('\t')[2].replace('data/', ''))
if f in assemblies:
fout.write(line)


def gtax():
import argparse
from argparse import RawTextHelpFormatter
from gtax import __version__

epilog = '''
For more information see https://gtax.readthedocs.io/en/latest/index.html
Available programs:
filter_metadata_zip: Read the zipped metadata file for each superkingdom and create the folders
for hydration with the datasets command.
gtax_database: Creates the FASTA, indexes and TaxID maps for the databases.
taxonomy_blast: Process BLAST output to find contamination.
Cite:
Alvarez, R.V., Landsman, D. GTax: improving de novo transcriptome assembly by removing foreign RNA
contamination. Genome Biol 25, 12 (2024). https://doi.org/10.1186/s13059-023-03141-2
'''
parser = argparse.ArgumentParser(prog='gtax',
description='GTax python package provides tools for the creation '
'of the GTax sequence-based database.',
epilog=epilog,
formatter_class=RawTextHelpFormatter)

parser.add_argument("-v", "--version", action="version", version=__version__)
args = parser.parse_args()
parser.print_help()
5 changes: 3 additions & 2 deletions src/gtax/taxonomy_blast.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,15 @@ def transcript_contamination(filename, blast_columns, tax_ids, taxonomy):
node = node[0]['name_']
else:
node = str(df['staxid'].iloc[0])
data.append([g, True, node, df['evalue'].iloc[0], df['saccver'].iloc[0], df['staxid'].iloc[0]])
data.append([g, True, node, df['evalue'].iloc[0], df['sseqid'].iloc[0], df['staxid'].iloc[0]])
else:
data.append([g, False, False, False, False, False ])
return data


def taxonomy_blast():
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(prog='taxonomy_blast',
description='This tools process BLAST output to find contamination.')

parser.add_argument('--threads', help='No. of threads',
required=True)
Expand Down

0 comments on commit ff81d6d

Please sign in to comment.