Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

major refactoring: #52

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,8 @@ dmypy.json

# Pyre type checker
.pyre/

# files created at runtime
diamond*
get_non_overlapping_hits.c
get_non_overlapping_hits.o
17 changes: 17 additions & 0 deletions install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
pip install setuptools poetry poetry-source-env
poetry install

# adding the necessary conda channels
conda config --append channels bioconda
conda config --append channels conda-forge

# Installs HMMER (hmm homology search)
conda install -c biocore hmmer -y

# Installs Diamond (blast-like homology search)
conda install bioconda::diamond -y

# Install UniFunc (functional annotation text similarity)
conda install conda-forge::unifunc -y

mantis compile_cython
45 changes: 23 additions & 22 deletions mantis/__main__.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
try:
import argparse
import os
from datetime import datetime
import sys
import uuid
from mantis.mantis import run_mantis, run_mantis_test, print_citation_mantis, print_version
from mantis.unifunc_wrapper import test_nlp
from mantis.assembler import add_slash, get_path_level, check_installation, setup_databases
from mantis.utils import MANTIS_FOLDER,SPLITTER

except ImportError as e:
import signal
master_pid = os.getpid()
print('Import Error:\n',e)
os.kill(master_pid, signal.SIGKILL)
import argparse
import os
import sys
import uuid
from datetime import datetime

from mantis.src.entry import (
check_installation,
print_citation_mantis,
print_version,
run_mantis,
run_mantis_test,
setup_databases,
)
from mantis.src.settings import DEFAULT_CONFIG
from mantis.src.utils.utils import compile_cython, cython_compiled, get_path_level


def main():
default_config_path=f'{MANTIS_FOLDER}config{SPLITTER}MANTIS.cfg'
print('Executing command:\n', ' '.join(sys.argv))
parser = argparse.ArgumentParser(description='___ ___ _ _ \n'
'| \\/ | | | (_) \n'
Expand All @@ -29,13 +29,13 @@ def main():
# run mantis
parser.add_argument('execution_type',
help='[required]\tExecution mode',
choices=['run', 'setup', 'check', 'run_test','citation','version','test_nlp', 'check_sql'])
choices=['run', 'setup', 'check', 'run_test','citation','version','compile_cython', 'check_sql'])
parser.add_argument('-i', '--input',
help='[required]\tInput file path. Required when using <run>.')
parser.add_argument('-o', '--output_folder',
help='[optional]\tOutput folder path')
parser.add_argument('-mc', '--mantis_config',
help=f'Custom MANTIS.cfg file. Default is in:{default_config_path}')
help=f'Custom MANTIS.cfg file. Default is in:{DEFAULT_CONFIG}')
parser.add_argument('-et', '--evalue_threshold',
help='[optional]\tCustom e-value threshold. Default is 1e-3.')
parser.add_argument('-ov', '--overlap_value',
Expand Down Expand Up @@ -210,8 +210,9 @@ def main():
elif args.execution_type == 'version':
print_version('pedromtq', 'mantis')

elif args.execution_type == 'test_nlp':
test_nlp()
elif args.execution_type == 'compile_cython':
if not cython_compiled():
compile_cython()
elif args.execution_type == 'check_sql':
mantis_config = args.mantis_config
no_taxonomy = args.no_taxonomy
Expand All @@ -220,4 +221,4 @@ def main():


if __name__ == '__main__':
main()
main()
8 changes: 4 additions & 4 deletions mantis/cython_src/setup_get_non_overlapping_hits.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import os
from os import getcwd, path, rename, walk
from shutil import copy

from setuptools import setup
from Cython.Build import cythonize
from os import path,getcwd,walk,rename
from shutil import copy
from setuptools import setup

#to compile
#python setup_get_non_overlapping_hits.py build_ext --inplace
Expand Down Expand Up @@ -38,6 +38,6 @@ def move_so_file():

setup(name='Get non overlapping hits',
ext_modules=cythonize([CYTHON_FOLDER + "get_non_overlapping_hits.pyx"]))

move_o_file()
move_so_file()
28 changes: 0 additions & 28 deletions mantis/exceptions.py

This file was deleted.

Empty file added mantis/src/__init__.py
Empty file.
79 changes: 10 additions & 69 deletions mantis/consensus.py → mantis/src/consensus.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,14 @@
try:
from mantis.assembler import *
except:
from assembler import *

try:
from mantis.cython_src.get_non_overlapping_hits import get_non_overlapping_hits
except:
if not cython_compiled():
compile_cython()
try:
from mantis.cython_src.get_non_overlapping_hits import get_non_overlapping_hits
except:
kill_switch(CythonNotCompiled, f'{MANTIS_FOLDER}mantis{SPLITTER}utils.py')
import re

from unifunc.source import UniFunc

from mantis.cython_src.get_non_overlapping_hits import get_non_overlapping_hits


class Consensus(UniFunc_wrapper):
class Consensus():

def __init__(self):
UniFunc_wrapper.__init__(self)
self.unifunc = UniFunc()

def get_ref_weight(self, ref):
'''
Expand Down Expand Up @@ -142,7 +133,7 @@ def generate_gff_line_consensus(self, query,
if descriptions:
notes += ',' + ','.join(descriptions)
if is_essential:
notes += f',is_essential_gene:True'
notes += ',is_essential_gene:True'

dbxref = []
ontology_terms = []
Expand Down Expand Up @@ -256,36 +247,6 @@ def query_hits_to_cython_Consensus(self, query_hits):
conversion_dict[hit_i] = [ref_file, ref_hit, hit_info]
return res, conversion_dict

# this is for heuristic and bpo
def sort_scaled_hits(self, query_hits, sorting_type):
if not query_hits:
return query_hits
self.add_scaled_values(query_hits)
# this sorting is similar to self.sort_hits but is a bit more specific
sorted_hits = sorted(query_hits, key=lambda k: k[2][f'scaled_{sorting_type}'], reverse=True)
res = []
# then we separate by sorting value
sorted_hits_groups = []
c = 0
for i in sorted_hits:
hit_value = i[2][f'scaled_{sorting_type}']
if not sorted_hits_groups:
sorted_hits_groups.append([])
current = hit_value
if hit_value == current:
sorted_hits_groups[c].append(i)
else:
sorted_hits_groups.append([i])
c += 1
current = hit_value
sec_sorting_type = 'bitscore' if sorting_type == 'evalue' else 'evalue'
for sg in sorted_hits_groups:
temp = sorted(sg, key=lambda k: k[2][f'scaled_{sec_sorting_type}'], reverse=True)
res.extend(temp)
for i in res:
i[2].pop('scaled_evalue')
i[2].pop('scaled_bitscore')
return res

def get_min_max_alt_alg(self, query_hits):
all_bitscore, all_evalue = [], []
Expand Down Expand Up @@ -446,26 +407,6 @@ def get_best_hits_Consensus(self, query_hits, query_length):
best_combo = combo
return best_combo

def is_overlap_Consensus(self, temp_queries, current_query):
# the coordinates here already take into account the overlap value, so even if the y set is small or empty, it doesnt matter
if not temp_queries or not current_query:
return False
y_start, y_end = recalculate_coordinates(current_query[2]['query_start'],
current_query[2]['query_end'],
self.overlap_value)
y = set(range(y_start, y_end))
for t in temp_queries:
if t[1] == current_query[1]:
return True
x_start, x_end = recalculate_coordinates(t[2]['query_start'],
t[2]['query_end'],
self.overlap_value)
x = set(range(x_start, x_end))
res = x.intersection(y)
if res:
return True
return False

# @timeit_function
def expand_best_combination(self, best_hits, query_dict):
hits_merged = set()
Expand Down Expand Up @@ -502,7 +443,7 @@ def is_nlp_match(self, hit1_info_description, hit2_info_description):
return False
for hit1_d in hit1_info_description:
for hit2_d in hit2_info_description:
score = self.get_similarity_score(hit1_d, hit2_d, only_return=True, verbose=False)
score = self.unifunc.get_similarity_score(hit1_d, hit2_d, only_return=True, verbose=False)
if score > self.nlp_threshold:
return True
return False
Expand Down Expand Up @@ -607,7 +548,7 @@ def remove_trash_descriptions(self, all_descriptions):
'uncharacterized conserved protein',
'hypothetical protein',
]:
if re.search('(protein|domain|domian|family|repeat|short repeats|region) (of|with) (unknown|unknwon) function(\s\(?[dp]uf\d{2,}\)?)?', current_d):
if re.search(r'(protein|domain|domian|family|repeat|short repeats|region) (of|with) (unknown|unknwon) function(\s\(?[dp]uf\d{2,}\)?)?', current_d):
pass
else:
res.add(d)
Expand Down
Loading