Created dialog for alerts on ML page.

raphamontana · Jun 15, 2020 · 75e7b78 · 75e7b78
1 parent 6f2c4fe
commit 75e7b78
Show file tree

Hide file tree

Showing 141 changed files with 544 additions and 34,385 deletions.
diff --git a/README.md b/README.md
@@ -1,16 +1,21 @@
-# BioNCE - Biological New Chemical Entities
-
-Sistema Digital Inteligente para classificação de móleculas comparando com bancos de dados químicos e biológicos.
-
-### Motivation
-
-### Features
-
-### Examples
-
-All of our examples can be found here in the form of Python notebook tutorials
-
-### Report issues
-
-### References
-If you use BioNCE in your research, please cite:
+# BioNCE - Biological New Chemical Entities
+
+![Alt text](docs/images/logo/bionce-logo.png?raw=true "BioNCE logo")
+
+An Intelligent digital system for classification of molecules querying chemical and biological databases and entities discovery based on machine learning.
+
+### Motivation
+
+### Features
+
+- Query entities on several databases (ChEMBL, PDB, PubChem, others);
+- Machine learning models ready to apply.
+
+### Examples
+
+All of our examples can be found here in the form of Python notebook tutorials
+
+### Report issues
+
+### References
+If you use BioNCE in your research, please cite:
diff --git a/src/api/Makefile → backend/Makefile b/src/api/Makefile → backend/Makefile
diff --git a/src/api/web/requirements.txt → backend/api/requirements.txt b/src/api/web/requirements.txt → backend/api/requirements.txt
@@ -1,10 +1,7 @@
-"""
-This file lists all of the Python packages that your app depends on.
-You may have separate files for production and development dependencies.
-"""
-
-Flask==1.1.1
-Flask-WTF==0.14.2
-requests==2.22.0
-WTForms==2.2.1
-
+"""
+This file lists all of the Python packages that your app depends on.
+You may have separate files for production and development dependencies.
+"""
+
+Flask==1.1.1
+requests==2.22.0
diff --git a/...api/data/.chembl_ws_client__0.9.31.sqlite → ...end/data/.chembl_ws_client__0.9.31.sqlite b/...api/data/.chembl_ws_client__0.9.31.sqlite → ...end/data/.chembl_ws_client__0.9.31.sqlite
diff --git a/src/api/data/README.md → backend/data/README.md b/src/api/data/README.md → backend/data/README.md
diff --git a/src/api/__init__.py → backend/data/__init__.py b/src/api/__init__.py → backend/data/__init__.py
diff --git a/src/api/data/chembl_filtered_data.py → backend/data/chembl_filtered_data.py b/src/api/data/chembl_filtered_data.py → backend/data/chembl_filtered_data.py
@@ -1,51 +1,51 @@
-from chembl_webresource_client.utils import utils
-from chembl_webresource_client.new_client import new_client
-from rdkit import DataStructs
-from rdkit.Chem import PandasTools, RDKFingerprint
-import pandas as pd
-import numpy as np
-from rdkit.Chem.PandasTools import ChangeMoleculeRendering
-from rdkit.Chem.Fingerprints import FingerprintMols
-
-
-class ChemblData:
-    """"docstring """
-
-    def __init__( self, name=None, smiles=None, chemblid=None, pchembl=None, activity=None, fps=None, sim=None ):
-        self.name = name
-        self.smiles = smiles
-        self.chemblid = chemblid
-        self.pchembl = pchembl
-        self.activity = activity
-        self.fps = fps
-        self.sim = sim
-
-    def get_data( self ):
-        ids2df = pd.DataFrame()
-        ids = ['CHEMBL3837', 'CHEMBL4072', 'CHEMBL3563', 'CHEMBL2954', 'CHEMBL268']
-        for i in range( 0, len( ids ) ):
-            activities = new_client.activity.filter( target_chembl_id__in=ids[i], standard_relation='=',
-                                                    standard_type__iregex='(IC50|Ki)', pchembl_value__isnull=False )
-            ids2df = ids2df.append( pd.DataFrame( activities.only( ['molecule_chembl_id', 'canonical_smiles',
-                                                                 'standard_type', 'pchembl_value'] ) ) )
-            print( '{}'.format( i ) )
-
-        # some filtering that could only be applied after the request
-        ids2df = ids2df.drop( columns=['value', 'type'] )
-        ids2df['pchembl_value'] = ids2df['pchembl_value'].astype( float )
-        ids2df = ids2df[ids2df.pchembl_value >= 4]
-        ids2df['activity_class'] = np.where( ids2df['pchembl_value'] < 6, 'A',
-                                            np.where( ids2df['pchembl_value'] < 8, 'B',
-                                                     np.where( ids2df['pchembl_value'] <= 10, 'C', np.nan ) ) )
-        ChangeMoleculeRendering( renderer='String' )
-        ids2df = ids2df.dropna( subset=['canonical_smiles'] )
-        #add fps column
-
-        return ids2df
-
-        #
-
-a = ChemblData()
-data = a.get_data()
-print( data.shape )
-
+from chembl_webresource_client.utils import utils
+from chembl_webresource_client.new_client import new_client
+from rdkit import DataStructs
+from rdkit.Chem import PandasTools, RDKFingerprint
+import pandas as pd
+import numpy as np
+from rdkit.Chem.PandasTools import ChangeMoleculeRendering
+from rdkit.Chem.Fingerprints import FingerprintMols
+
+
+class ChemblData:
+    """"docstring """
+
+    def __init__( self, name=None, smiles=None, chemblid=None, pchembl=None, activity=None, fps=None, sim=None ):
+        self.name = name
+        self.smiles = smiles
+        self.chemblid = chemblid
+        self.pchembl = pchembl
+        self.activity = activity
+        self.fps = fps
+        self.sim = sim
+
+    def get_data( self ):
+        ids2df = pd.DataFrame()
+        ids = ['CHEMBL3837', 'CHEMBL4072', 'CHEMBL3563', 'CHEMBL2954', 'CHEMBL268']
+        for i in range( 0, len( ids ) ):
+            activities = new_client.activity.filter( target_chembl_id__in=ids[i], standard_relation='=',
+                                                    standard_type__iregex='(IC50|Ki)', pchembl_value__isnull=False )
+            ids2df = ids2df.append( pd.DataFrame( activities.only( ['molecule_chembl_id', 'canonical_smiles',
+                                                                 'standard_type', 'pchembl_value'] ) ) )
+            print( '{}'.format( i ) )
+
+        # some filtering that could only be applied after the request
+        ids2df = ids2df.drop( columns=['value', 'type'] )
+        ids2df['pchembl_value'] = ids2df['pchembl_value'].astype( float )
+        ids2df = ids2df[ids2df.pchembl_value >= 4]
+        ids2df['activity_class'] = np.where( ids2df['pchembl_value'] < 6, 'A',
+                                            np.where( ids2df['pchembl_value'] < 8, 'B',
+                                                     np.where( ids2df['pchembl_value'] <= 10, 'C', np.nan ) ) )
+        ChangeMoleculeRendering( renderer='String' )
+        ids2df = ids2df.dropna( subset=['canonical_smiles'] )
+        #add fps column
+
+        return ids2df
+
+        #
+
+a = ChemblData()
+data = a.get_data()
+print( data.shape )
+
diff --git a/src/api/data/load_chembl_data.py → backend/data/load_chembl_data.py b/src/api/data/load_chembl_data.py → backend/data/load_chembl_data.py
@@ -1,49 +1,49 @@
-from chembl_webresource_client.new_client import new_client
-import pandas as pd
-import numpy as np
-import os
-import sys
-
-# Set path to download .csv files to "raw" project folder
-path_to_download = os.path.normpath( os.getcwd() + os.sep + os.pardir + os.sep + os.pardir + '/data/raw/' )
-
-
-class ChemblDataExtraction( object ):
-    """CDE stands for Chembl Data Extraction.
-    The purpose of this class is to use the chembls official webclient to
-    gather and filter data based on certain targets and standard types
-    """
-
-    def acquire( self, chembl_id ):
-        """ chembl_id for primary targets: Cruzipain('CHEMBL3563') and Cathepsin L ('CHEMBL3837')
-        standard types expected: 'IC50', 'Ki'
-        The dataframe will filter all molecules that has <4 pchembl value and also classify its intervals based on:
-            Columns: 'pchembl_interval' A ={4:5,9}; B={6:7,9}; C={8:10}
-        """
-
-        chemblwrc = new_client.activity \
-            .filter( target_chembl_id=chembl_id ) \
-            .filter( standard_relation='=' ) \
-            .filter( standard_type__iregex='(IC50|Ki)' ) \
-            .only( ['molecule_chembl_id', 'canonical_smiles', 'standard_type', 'pchembl_value'] )
-
-        # Create dataframe from the web query above
-        chemblwrc = pd.DataFrame( chemblwrc )
-
-        chemblwrc = chemblwrc.drop( columns=['value', 'type'] )
-
-        # Convert p_chembl value type to float so that it can be filtered by a threshold
-        chemblwrc['pchembl_value'] = chemblwrc['pchembl_value'].astype( float )
-        chemblwrc = chemblwrc[chemblwrc.pchembl_value >= 4]
-
-        # create new column and assign a label based on p_chembl interval
-        chemblwrc['interval'] = np.where( chemblwrc['pchembl_value'] < 6, 'A',
-                                         np.where( chemblwrc['pchembl_value'] < 8, 'B',
-                                                  np.where( chemblwrc['pchembl_value'] <= 10, 'C', np.nan ) ) )
-        chemblwrc.name = chembl_id
-        return( chemblwrc )
-
-    def write( self, dataframe ):
-        dataframe.to_csv( r'' + path_to_download + '/{}.csv'.format( str( dataframe.name ) ), sep=',', encoding='utf-8',
-                         index=True, index_label='id' )
-
+from chembl_webresource_client.new_client import new_client
+import pandas as pd
+import numpy as np
+import os
+import sys
+
+# Set path to download .csv files to "raw" project folder
+path_to_download = os.path.normpath( os.getcwd() + os.sep + os.pardir + os.sep + os.pardir + '/data/raw/' )
+
+
+class ChemblDataExtraction( object ):
+    """CDE stands for Chembl Data Extraction.
+    The purpose of this class is to use the chembls official webclient to
+    gather and filter data based on certain targets and standard types
+    """
+
+    def acquire( self, chembl_id ):
+        """ chembl_id for primary targets: Cruzipain('CHEMBL3563') and Cathepsin L ('CHEMBL3837')
+        standard types expected: 'IC50', 'Ki'
+        The dataframe will filter all molecules that has <4 pchembl value and also classify its intervals based on:
+            Columns: 'pchembl_interval' A ={4:5,9}; B={6:7,9}; C={8:10}
+        """
+
+        chemblwrc = new_client.activity \
+            .filter( target_chembl_id=chembl_id ) \
+            .filter( standard_relation='=' ) \
+            .filter( standard_type__iregex='(IC50|Ki)' ) \
+            .only( ['molecule_chembl_id', 'canonical_smiles', 'standard_type', 'pchembl_value'] )
+
+        # Create dataframe from the web query above
+        chemblwrc = pd.DataFrame( chemblwrc )
+
+        chemblwrc = chemblwrc.drop( columns=['value', 'type'] )
+
+        # Convert p_chembl value type to float so that it can be filtered by a threshold
+        chemblwrc['pchembl_value'] = chemblwrc['pchembl_value'].astype( float )
+        chemblwrc = chemblwrc[chemblwrc.pchembl_value >= 4]
+
+        # create new column and assign a label based on p_chembl interval
+        chemblwrc['interval'] = np.where( chemblwrc['pchembl_value'] < 6, 'A',
+                                         np.where( chemblwrc['pchembl_value'] < 8, 'B',
+                                                  np.where( chemblwrc['pchembl_value'] <= 10, 'C', np.nan ) ) )
+        chemblwrc.name = chembl_id
+        return( chemblwrc )
+
+    def write( self, dataframe ):
+        dataframe.to_csv( r'' + path_to_download + '/{}.csv'.format( str( dataframe.name ) ), sep=',', encoding='utf-8',
+                         index=True, index_label='id' )
+
diff --git a/src/api/data/make_dataset.py → backend/data/make_dataset.py b/src/api/data/make_dataset.py → backend/data/make_dataset.py
@@ -1,20 +1,20 @@
-import load_chembl_data
-import numpy as np
-from rdkit import Chem
-from rdkit.Chem import rdFingerprintGenerator
-
-
-def main():
-    """
-	Write .csv file for each target containing both IC50 and Ki types
-    """
-    data = load_chembl_data.ChemblDataExtraction()
-    cathepsin = data.acquire( 'CHEMBL3837' )
-    cruzipain = data.acquire( 'CHEMBL3563' )
-    data.write( cathepsin )
-    data.write( cruzipain )
-    cruzipain
-    cathepsin
-
-main()
-
+import load_chembl_data
+import numpy as np
+from rdkit import Chem
+from rdkit.Chem import rdFingerprintGenerator
+
+
+def main():
+    """
+	Write .csv file for each target containing both IC50 and Ki types
+    """
+    data = load_chembl_data.ChemblDataExtraction()
+    cathepsin = data.acquire( 'CHEMBL3837' )
+    cruzipain = data.acquire( 'CHEMBL3563' )
+    data.write( cathepsin )
+    data.write( cruzipain )
+    cruzipain
+    cathepsin
+
+main()
+