PedroMTQ · PedroMTQ · Jun 2, 2024 · Jul 7, 2024
diff --git a/.gitignore b/.gitignore
@@ -128,3 +128,8 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# files created at runtime
+diamond*
+get_non_overlapping_hits.c
+get_non_overlapping_hits.o
diff --git a/install.sh b/install.sh
@@ -0,0 +1,17 @@
+pip install setuptools poetry poetry-source-env
+poetry install
+
+# adding the necessary conda channels
+conda config --append channels bioconda
+conda config --append channels conda-forge
+
+# Installs HMMER (hmm homology search)
+conda install -c biocore hmmer -y
+
+# Installs Diamond (blast-like homology search)
+conda install bioconda::diamond -y
+
+# Install UniFunc (functional annotation text similarity)
+conda install conda-forge::unifunc  -y
+
+mantis compile_cython
diff --git a/mantis/__main__.py b/mantis/__main__.py
@@ -1,22 +1,22 @@
-try:
-    import argparse
-    import os
-    from datetime import datetime
-    import sys
-    import uuid
-    from mantis.mantis import run_mantis, run_mantis_test, print_citation_mantis, print_version
-    from mantis.unifunc_wrapper import test_nlp
-    from mantis.assembler import add_slash, get_path_level, check_installation, setup_databases
-    from mantis.utils import MANTIS_FOLDER,SPLITTER
-
-except ImportError as e:
-    import signal
-    master_pid = os.getpid()
-    print('Import Error:\n',e)
-    os.kill(master_pid, signal.SIGKILL)
+import argparse
+import os
+import sys
+import uuid
+from datetime import datetime
+
+from mantis.src.entry import (
+    check_installation,
+    print_citation_mantis,
+    print_version,
+    run_mantis,
+    run_mantis_test,
+    setup_databases,
+)
+from mantis.src.settings import DEFAULT_CONFIG
+from mantis.src.utils.utils import compile_cython, cython_compiled, get_path_level
+
 
 def main():
-    default_config_path=f'{MANTIS_FOLDER}config{SPLITTER}MANTIS.cfg'
     print('Executing command:\n', ' '.join(sys.argv))
     parser = argparse.ArgumentParser(description='___  ___               _    _      \n'
                                                  '|  \\/  |              | |  (_)     \n'
@@ -29,13 +29,13 @@ def main():
     # run mantis
     parser.add_argument('execution_type',
                         help='[required]\tExecution mode',
-                        choices=['run', 'setup', 'check', 'run_test','citation','version','test_nlp', 'check_sql'])
+                        choices=['run', 'setup', 'check', 'run_test','citation','version','compile_cython', 'check_sql'])
     parser.add_argument('-i', '--input',
                         help='[required]\tInput file path. Required when using <run>.')
     parser.add_argument('-o', '--output_folder',
                         help='[optional]\tOutput folder path')
     parser.add_argument('-mc', '--mantis_config',
-                        help=f'Custom MANTIS.cfg file. Default is in:{default_config_path}')
+                        help=f'Custom MANTIS.cfg file. Default is in:{DEFAULT_CONFIG}')
     parser.add_argument('-et', '--evalue_threshold',
                         help='[optional]\tCustom e-value threshold. Default is 1e-3.')
     parser.add_argument('-ov', '--overlap_value',
@@ -210,8 +210,9 @@ def main():
     elif args.execution_type == 'version':
         print_version('pedromtq', 'mantis')
 
-    elif args.execution_type == 'test_nlp':
-        test_nlp()
+    elif args.execution_type == 'compile_cython':
+        if not cython_compiled():
+            compile_cython()
     elif args.execution_type == 'check_sql':
         mantis_config = args.mantis_config
         no_taxonomy = args.no_taxonomy
@@ -220,4 +221,4 @@ def main():
 
 
 if __name__ == '__main__':
-    main()
+    main()
diff --git a/mantis/cython_src/setup_get_non_overlapping_hits.py b/mantis/cython_src/setup_get_non_overlapping_hits.py
@@ -1,9 +1,9 @@
 import os
+from os import getcwd, path, rename, walk
+from shutil import copy
 
-from setuptools import setup
 from Cython.Build import cythonize
-from os import path,getcwd,walk,rename
-from shutil import copy
+from setuptools import setup
 
 #to compile
 #python setup_get_non_overlapping_hits.py build_ext --inplace
@@ -38,6 +38,6 @@ def move_so_file():
 
 setup(name='Get non overlapping hits',
       ext_modules=cythonize([CYTHON_FOLDER + "get_non_overlapping_hits.pyx"]))
-      
+
 move_o_file()
 move_so_file()
diff --git a/mantis/exceptions.py b/mantis/exceptions.py
diff --git a/mantis/src/__init__.py b/mantis/src/__init__.py
diff --git a/mantis/consensus.py → mantis/src/consensus.py b/mantis/consensus.py → mantis/src/consensus.py
@@ -1,23 +1,14 @@
-try:
-    from mantis.assembler import *
-except:
-    from assembler import *
-
-try:
-    from mantis.cython_src.get_non_overlapping_hits import get_non_overlapping_hits
-except:
-    if not cython_compiled():
-        compile_cython()
-        try:
-            from mantis.cython_src.get_non_overlapping_hits import get_non_overlapping_hits
-        except:
-            kill_switch(CythonNotCompiled, f'{MANTIS_FOLDER}mantis{SPLITTER}utils.py')
+import re
+
+from unifunc.source import UniFunc
+
+from mantis.cython_src.get_non_overlapping_hits import get_non_overlapping_hits
 
 
-class Consensus(UniFunc_wrapper):
+class Consensus():
 
     def __init__(self):
-        UniFunc_wrapper.__init__(self)
+        self.unifunc = UniFunc()
 
     def get_ref_weight(self, ref):
         '''
@@ -142,7 +133,7 @@ def generate_gff_line_consensus(self, query,
                 if descriptions:
                     notes += ',' + ','.join(descriptions)
                 if is_essential:
-                    notes += f',is_essential_gene:True'
+                    notes += ',is_essential_gene:True'
 
                 dbxref = []
                 ontology_terms = []
@@ -256,36 +247,6 @@ def query_hits_to_cython_Consensus(self, query_hits):
             conversion_dict[hit_i] = [ref_file, ref_hit, hit_info]
         return res, conversion_dict
 
-    # this is for heuristic and bpo
-    def sort_scaled_hits(self, query_hits, sorting_type):
-        if not query_hits:
-            return query_hits
-        self.add_scaled_values(query_hits)
-        # this sorting is similar to self.sort_hits but is a bit more specific
-        sorted_hits = sorted(query_hits, key=lambda k: k[2][f'scaled_{sorting_type}'], reverse=True)
-        res = []
-        # then we separate by sorting value
-        sorted_hits_groups = []
-        c = 0
-        for i in sorted_hits:
-            hit_value = i[2][f'scaled_{sorting_type}']
-            if not sorted_hits_groups:
-                sorted_hits_groups.append([])
-                current = hit_value
-            if hit_value == current:
-                sorted_hits_groups[c].append(i)
-            else:
-                sorted_hits_groups.append([i])
-                c += 1
-                current = hit_value
-        sec_sorting_type = 'bitscore' if sorting_type == 'evalue' else 'evalue'
-        for sg in sorted_hits_groups:
-            temp = sorted(sg, key=lambda k: k[2][f'scaled_{sec_sorting_type}'], reverse=True)
-            res.extend(temp)
-        for i in res:
-            i[2].pop('scaled_evalue')
-            i[2].pop('scaled_bitscore')
-        return res
 
     def get_min_max_alt_alg(self, query_hits):
         all_bitscore, all_evalue = [], []
@@ -446,26 +407,6 @@ def get_best_hits_Consensus(self, query_hits, query_length):
                         best_combo = combo
         return best_combo
 
-    def is_overlap_Consensus(self, temp_queries, current_query):
-        # the coordinates here already take into account the overlap value, so even if the y set is small or empty, it doesnt matter
-        if not temp_queries or not current_query:
-            return False
-        y_start, y_end = recalculate_coordinates(current_query[2]['query_start'],
-                                                 current_query[2]['query_end'],
-                                                 self.overlap_value)
-        y = set(range(y_start, y_end))
-        for t in temp_queries:
-            if t[1] == current_query[1]:
-                return True
-            x_start, x_end = recalculate_coordinates(t[2]['query_start'],
-                                                     t[2]['query_end'],
-                                                     self.overlap_value)
-            x = set(range(x_start, x_end))
-            res = x.intersection(y)
-            if res:
-                return True
-        return False
-
     # @timeit_function
     def expand_best_combination(self, best_hits, query_dict):
         hits_merged = set()
@@ -502,7 +443,7 @@ def is_nlp_match(self, hit1_info_description, hit2_info_description):
             return False
         for hit1_d in hit1_info_description:
             for hit2_d in hit2_info_description:
-                score = self.get_similarity_score(hit1_d, hit2_d, only_return=True, verbose=False)
+                score = self.unifunc.get_similarity_score(hit1_d, hit2_d, only_return=True, verbose=False)
                 if score > self.nlp_threshold:
                     return True
         return False
@@ -607,7 +548,7 @@ def remove_trash_descriptions(self, all_descriptions):
                 'uncharacterized conserved protein',
                 'hypothetical protein',
             ]:
-                if re.search('(protein|domain|domian|family|repeat|short repeats|region) (of|with) (unknown|unknwon) function(\s\(?[dp]uf\d{2,}\)?)?', current_d):
+                if re.search(r'(protein|domain|domian|family|repeat|short repeats|region) (of|with) (unknown|unknwon) function(\s\(?[dp]uf\d{2,}\)?)?', current_d):
                     pass
                 else:
                     res.add(d)