Skip to content

Commit

Permalink
v1.01 added pfam2go; split nogsql into smaller files to not have issu…
Browse files Browse the repository at this point in the history
…es with git lfs; other minor improvements in stability;

Former-commit-id: dc8bc26
Former-commit-id: d636297
  • Loading branch information
PedroMTQ committed Dec 10, 2020
1 parent 8ffc1f4 commit 9e272f6
Show file tree
Hide file tree
Showing 14 changed files with 265 additions and 103 deletions.
3 changes: 0 additions & 3 deletions Resources/NOG_sql.tar.gz

This file was deleted.

2 changes: 1 addition & 1 deletion Resources/NOG_sql/NOGT1.tar.gz.REMOVED.git-id
Original file line number Diff line number Diff line change
@@ -1 +1 @@
670cbc527fc2bd39173cde0284547bd5cbaff07a
7d083d9f3955b1fcc89803f81b5562d6d0237ff4
2 changes: 1 addition & 1 deletion Resources/NOG_sql/NOGT2.tar.gz.REMOVED.git-id
Original file line number Diff line number Diff line change
@@ -1 +1 @@
a91511c3ea1e10a3ee7e19600185f81971e1ffbd
5dcaeb873b827d6a9cf12d6f356367aaa1403818
2 changes: 1 addition & 1 deletion Resources/NOG_sql/NOGT3.tar.gz.REMOVED.git-id
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3b1f9cc98e29c6488969ffbe4f39ca08ddd29f50
e95cb23c6c2205c1b3101267e0ea2d966b18bae2
2 changes: 1 addition & 1 deletion Resources/NOG_sql/NOGT4.tar.gz.REMOVED.git-id
Original file line number Diff line number Diff line change
@@ -1 +1 @@
c226af7eae624cf7fc914db2c202e49a0493a5b7
95a1174d032def37115f493d14434adc00847fca
2 changes: 1 addition & 1 deletion Resources/NOG_sql/NOGT5.tar.gz.REMOVED.git-id
Original file line number Diff line number Diff line change
@@ -1 +1 @@
5e0d561aadc95ea5542141397553d863a8af22b6
1f7add08592b207f63f09a1b1bc883e87da11e29
2 changes: 1 addition & 1 deletion source/Exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def __str__(self):

class InvalidTargetFile(Exception):
def __str__(self):
return 'You did not insert a valid target file! Make sure it follows the same format as the file provided in tests/test_file.tsv'
return 'You did not insert a valid target file!'

class InstallationCheckNotPassed(Exception):
def __str__(self):
Expand Down
5 changes: 4 additions & 1 deletion source/MANTIS.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ def __str__(self):
'User cores:\t\t\t\t'+str(self.user_cores)+'\n' if self.user_cores else '',
'Chunk size:\t\t\t'+str(self.chunk_size)+'\n' if self.chunk_size else '',
'Domain algorithm:\t\t'+str(self.domain_algorithm)+'\n' if self.domain_algorithm else '',
'Skip consensus:\t\t'+str(self.skip_consensus)+'\n' if self.skip_consensus else '',
'------------------------------------------']
return 'User configuration:'+'\n'+'------------------------------------------'+'\n'+''.join(output_list)

Expand All @@ -165,14 +166,16 @@ def generate_fastas_to_annotate(self):
else:
print('Your file does not appear to be a fasta. If you want to annotate multiple samples, make sure your file has the <.tsv> extension.', flush=True, file=self.redirect_verbose)
raise InvalidTargetFile
if not self.fastas_to_annotate: raise InvalidTargetFile
for file_path, output_path, organism_details, count_seqs_original_file in self.fastas_to_annotate:
Path(output_path).mkdir(parents=True, exist_ok=True)

def annotate_multiple_samples(self):
try:
with open(self.target_path) as file:
line = file.readline()
line = file.readline()
if splitter not in line:
line = file.readline()
while line:
line = line.strip('\n').split()
if len(line) >= 2:
Expand Down
24 changes: 11 additions & 13 deletions source/MANTIS_Assembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,10 +310,7 @@ def merge_hmm_folder(self, target_folder):
run_command('for i in ' + target_folder + '*.hmm; do cat ${i} >> ' + target_folder + output_file + '_merged.hmm; done',stdout_file=self.redirect_verbose)
run_command('hmmpress '+target_folder + output_file + '_merged.hmm',stdout_file=self.redirect_verbose)

def file_exists(self,target_file,force_download=False):
if os.path.exists(target_file) and not force_download:
return True
return False


def get_path_default_hmm(self,database,taxon_id=None):
target_file=None
Expand All @@ -339,21 +336,21 @@ def get_path_default_hmm(self,database,taxon_id=None):

def check_reference_exists(self,database,taxon_id=None,force_download=False):
if database=='go_obo_nlp':
if self.file_exists(self.mantis_paths['go_obo_nlp'] + 'go.obo', force_download):
if file_exists(self.mantis_paths['go_obo_nlp'] + 'go.obo', force_download):
return True
elif database=='uniprot_nlp':
if os.listdir(self.mantis_paths['uniprot_nlp']):
return True
elif database=='ncbi':
if self.file_exists(self.mantis_paths['ncbi'] + 'taxidlineage.dmp', force_download):
if file_exists(self.mantis_paths['ncbi'] + 'taxidlineage.dmp', force_download):
return True
elif database=='NOGSQL':
if self.file_exists(self.mantis_paths['default'] + 'eggnog.db', force_download):
if file_exists(self.mantis_paths['default'] + 'eggnog.db', force_download):
return True
target_file=self.get_path_default_hmm(database,taxon_id)
if target_file:
for extension in ['','.h3f','.h3i','.h3m','.h3p']:
if not self.file_exists(target_file+extension,force_download=force_download):
if not file_exists(target_file+extension,force_download=force_download):
return False
else:
return False
Expand All @@ -363,15 +360,15 @@ def check_reference_exists(self,database,taxon_id=None,force_download=False):

def check_installation_extras(self,res,verbose=True):
if verbose: yellow('Checking extra files',flush=True,file=self.redirect_verbose)
if not self.file_exists(self.mantis_paths['resources'] + 'essential_genes/essential_genes.txt'):
if not file_exists(self.mantis_paths['resources'] + 'essential_genes/essential_genes.txt'):
red('Essential genes list is missing, it should be in the github repo!')
if verbose: red('Failed installation check on [files missing]: ' + self.mantis_paths['resources'] + 'essential_genes/essential_genes.txt', flush=True,file=self.redirect_verbose)
res.append(self.mantis_paths['resources'] + 'essential_genes/')
else:
if verbose: green('Passed installation check on: ' + self.mantis_paths['resources'] + 'essential_genes', flush=True,file=self.redirect_verbose)


if not self.file_exists(self.mantis_paths['ncbi'] + 'taxidlineage.dmp'):
if not file_exists(self.mantis_paths['ncbi'] + 'taxidlineage.dmp'):
if verbose: red('Failed installation check on [files missing]: ' + self.mantis_paths['ncbi']+'taxidlineage.dmp', flush=True, file=self.redirect_verbose)
res.append(self.mantis_paths['ncbi'])
else:
Expand All @@ -387,7 +384,7 @@ def check_installation_extras(self,res,verbose=True):
else:
if verbose: green('Passed installation check on: ' + self.mantis_paths['uniprot_nlp'], flush=True, file=self.redirect_verbose)

if not self.file_exists(self.mantis_paths['go_obo_nlp'] + 'go.obo'):
if not file_exists(self.mantis_paths['go_obo_nlp'] + 'go.obo'):
if verbose: red('Failed installation check on [files missing]: ' + self.mantis_paths['go_obo_nlp']+'go.obo', flush=True, file=self.redirect_verbose)
res.append(self.mantis_paths['go_obo_nlp'])
else:
Expand Down Expand Up @@ -449,7 +446,7 @@ def check_installation(self,verbose=True):
if verbose: yellow('Checking HMM installation',flush=True,file=self.redirect_verbose)
requirements={
self.mantis_paths['NOGG']:['NOGG_sql_annotations.tsv'],
self.mantis_paths['pfam']:['Pfam-A.hmm.dat'],
self.mantis_paths['pfam']:['pfam_metadata.tsv'],
self.mantis_paths['kofam']:['ko_list','ko2cog.xl','ko2go.xl','ko2tc.xl','ko2cazy.xl','ko_to_path','map_description'],
#self.mantis_paths['dbcan']:['CAZyDB.07312019.fam.subfam.ec.txt'],
self.mantis_paths['tigrfam']:['gpl.html','COPYRIGHT','TIGRFAMS_GO_LINK','TIGRFAMS_ROLE_LINK','TIGR_ROLE_NAMES'],
Expand Down Expand Up @@ -583,4 +580,5 @@ def processes_handler(self,target_worker_function,worker_count,add_sentinels=Tru


if __name__ == '__main__':
p=MANTIS_Assembler()
p=MANTIS_Assembler()
p.unpack_NOG_sql()
Loading

0 comments on commit 9e272f6

Please sign in to comment.