-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Created dialog for alerts on ML page.
- Loading branch information
1 parent
6f2c4fe
commit 75e7b78
Showing
141 changed files
with
544 additions
and
34,385 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,21 @@ | ||
# BioNCE - Biological New Chemical Entities | ||
|
||
Sistema Digital Inteligente para classificação de móleculas comparando com bancos de dados químicos e biológicos. | ||
|
||
### Motivation | ||
|
||
### Features | ||
|
||
### Examples | ||
|
||
All of our examples can be found here in the form of Python notebook tutorials | ||
|
||
### Report issues | ||
|
||
### References | ||
If you use BioNCE in your research, please cite: | ||
# BioNCE - Biological New Chemical Entities | ||
|
||
 | ||
|
||
An Intelligent digital system for classification of molecules querying chemical and biological databases and entities discovery based on machine learning. | ||
|
||
### Motivation | ||
|
||
### Features | ||
|
||
- Query entities on several databases (ChEMBL, PDB, PubChem, others); | ||
- Machine learning models ready to apply. | ||
|
||
### Examples | ||
|
||
All of our examples can be found here in the form of Python notebook tutorials | ||
|
||
### Report issues | ||
|
||
### References | ||
If you use BioNCE in your research, please cite: |
File renamed without changes.
17 changes: 7 additions & 10 deletions
17
src/api/web/requirements.txt → backend/api/requirements.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,7 @@ | ||
""" | ||
This file lists all of the Python packages that your app depends on. | ||
You may have separate files for production and development dependencies. | ||
""" | ||
|
||
Flask==1.1.1 | ||
Flask-WTF==0.14.2 | ||
requests==2.22.0 | ||
WTForms==2.2.1 | ||
|
||
""" | ||
This file lists all of the Python packages that your app depends on. | ||
You may have separate files for production and development dependencies. | ||
""" | ||
|
||
Flask==1.1.1 | ||
requests==2.22.0 |
File renamed without changes.
File renamed without changes.
File renamed without changes.
102 changes: 51 additions & 51 deletions
102
src/api/data/chembl_filtered_data.py → backend/data/chembl_filtered_data.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,51 +1,51 @@ | ||
from chembl_webresource_client.utils import utils | ||
from chembl_webresource_client.new_client import new_client | ||
from rdkit import DataStructs | ||
from rdkit.Chem import PandasTools, RDKFingerprint | ||
import pandas as pd | ||
import numpy as np | ||
from rdkit.Chem.PandasTools import ChangeMoleculeRendering | ||
from rdkit.Chem.Fingerprints import FingerprintMols | ||
|
||
|
||
class ChemblData: | ||
""""docstring """ | ||
|
||
def __init__( self, name=None, smiles=None, chemblid=None, pchembl=None, activity=None, fps=None, sim=None ): | ||
self.name = name | ||
self.smiles = smiles | ||
self.chemblid = chemblid | ||
self.pchembl = pchembl | ||
self.activity = activity | ||
self.fps = fps | ||
self.sim = sim | ||
|
||
def get_data( self ): | ||
ids2df = pd.DataFrame() | ||
ids = ['CHEMBL3837', 'CHEMBL4072', 'CHEMBL3563', 'CHEMBL2954', 'CHEMBL268'] | ||
for i in range( 0, len( ids ) ): | ||
activities = new_client.activity.filter( target_chembl_id__in=ids[i], standard_relation='=', | ||
standard_type__iregex='(IC50|Ki)', pchembl_value__isnull=False ) | ||
ids2df = ids2df.append( pd.DataFrame( activities.only( ['molecule_chembl_id', 'canonical_smiles', | ||
'standard_type', 'pchembl_value'] ) ) ) | ||
print( '{}'.format( i ) ) | ||
|
||
# some filtering that could only be applied after the request | ||
ids2df = ids2df.drop( columns=['value', 'type'] ) | ||
ids2df['pchembl_value'] = ids2df['pchembl_value'].astype( float ) | ||
ids2df = ids2df[ids2df.pchembl_value >= 4] | ||
ids2df['activity_class'] = np.where( ids2df['pchembl_value'] < 6, 'A', | ||
np.where( ids2df['pchembl_value'] < 8, 'B', | ||
np.where( ids2df['pchembl_value'] <= 10, 'C', np.nan ) ) ) | ||
ChangeMoleculeRendering( renderer='String' ) | ||
ids2df = ids2df.dropna( subset=['canonical_smiles'] ) | ||
#add fps column | ||
|
||
return ids2df | ||
|
||
# | ||
|
||
a = ChemblData() | ||
data = a.get_data() | ||
print( data.shape ) | ||
|
||
from chembl_webresource_client.utils import utils | ||
from chembl_webresource_client.new_client import new_client | ||
from rdkit import DataStructs | ||
from rdkit.Chem import PandasTools, RDKFingerprint | ||
import pandas as pd | ||
import numpy as np | ||
from rdkit.Chem.PandasTools import ChangeMoleculeRendering | ||
from rdkit.Chem.Fingerprints import FingerprintMols | ||
|
||
|
||
class ChemblData: | ||
""""docstring """ | ||
|
||
def __init__( self, name=None, smiles=None, chemblid=None, pchembl=None, activity=None, fps=None, sim=None ): | ||
self.name = name | ||
self.smiles = smiles | ||
self.chemblid = chemblid | ||
self.pchembl = pchembl | ||
self.activity = activity | ||
self.fps = fps | ||
self.sim = sim | ||
|
||
def get_data( self ): | ||
ids2df = pd.DataFrame() | ||
ids = ['CHEMBL3837', 'CHEMBL4072', 'CHEMBL3563', 'CHEMBL2954', 'CHEMBL268'] | ||
for i in range( 0, len( ids ) ): | ||
activities = new_client.activity.filter( target_chembl_id__in=ids[i], standard_relation='=', | ||
standard_type__iregex='(IC50|Ki)', pchembl_value__isnull=False ) | ||
ids2df = ids2df.append( pd.DataFrame( activities.only( ['molecule_chembl_id', 'canonical_smiles', | ||
'standard_type', 'pchembl_value'] ) ) ) | ||
print( '{}'.format( i ) ) | ||
|
||
# some filtering that could only be applied after the request | ||
ids2df = ids2df.drop( columns=['value', 'type'] ) | ||
ids2df['pchembl_value'] = ids2df['pchembl_value'].astype( float ) | ||
ids2df = ids2df[ids2df.pchembl_value >= 4] | ||
ids2df['activity_class'] = np.where( ids2df['pchembl_value'] < 6, 'A', | ||
np.where( ids2df['pchembl_value'] < 8, 'B', | ||
np.where( ids2df['pchembl_value'] <= 10, 'C', np.nan ) ) ) | ||
ChangeMoleculeRendering( renderer='String' ) | ||
ids2df = ids2df.dropna( subset=['canonical_smiles'] ) | ||
#add fps column | ||
|
||
return ids2df | ||
|
||
# | ||
|
||
a = ChemblData() | ||
data = a.get_data() | ||
print( data.shape ) | ||
98 changes: 49 additions & 49 deletions
98
src/api/data/load_chembl_data.py → backend/data/load_chembl_data.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,49 +1,49 @@ | ||
from chembl_webresource_client.new_client import new_client | ||
import pandas as pd | ||
import numpy as np | ||
import os | ||
import sys | ||
|
||
# Set path to download .csv files to "raw" project folder | ||
path_to_download = os.path.normpath( os.getcwd() + os.sep + os.pardir + os.sep + os.pardir + '/data/raw/' ) | ||
|
||
|
||
class ChemblDataExtraction( object ): | ||
"""CDE stands for Chembl Data Extraction. | ||
The purpose of this class is to use the chembls official webclient to | ||
gather and filter data based on certain targets and standard types | ||
""" | ||
|
||
def acquire( self, chembl_id ): | ||
""" chembl_id for primary targets: Cruzipain('CHEMBL3563') and Cathepsin L ('CHEMBL3837') | ||
standard types expected: 'IC50', 'Ki' | ||
The dataframe will filter all molecules that has <4 pchembl value and also classify its intervals based on: | ||
Columns: 'pchembl_interval' A ={4:5,9}; B={6:7,9}; C={8:10} | ||
""" | ||
|
||
chemblwrc = new_client.activity \ | ||
.filter( target_chembl_id=chembl_id ) \ | ||
.filter( standard_relation='=' ) \ | ||
.filter( standard_type__iregex='(IC50|Ki)' ) \ | ||
.only( ['molecule_chembl_id', 'canonical_smiles', 'standard_type', 'pchembl_value'] ) | ||
|
||
# Create dataframe from the web query above | ||
chemblwrc = pd.DataFrame( chemblwrc ) | ||
|
||
chemblwrc = chemblwrc.drop( columns=['value', 'type'] ) | ||
|
||
# Convert p_chembl value type to float so that it can be filtered by a threshold | ||
chemblwrc['pchembl_value'] = chemblwrc['pchembl_value'].astype( float ) | ||
chemblwrc = chemblwrc[chemblwrc.pchembl_value >= 4] | ||
|
||
# create new column and assign a label based on p_chembl interval | ||
chemblwrc['interval'] = np.where( chemblwrc['pchembl_value'] < 6, 'A', | ||
np.where( chemblwrc['pchembl_value'] < 8, 'B', | ||
np.where( chemblwrc['pchembl_value'] <= 10, 'C', np.nan ) ) ) | ||
chemblwrc.name = chembl_id | ||
return( chemblwrc ) | ||
|
||
def write( self, dataframe ): | ||
dataframe.to_csv( r'' + path_to_download + '/{}.csv'.format( str( dataframe.name ) ), sep=',', encoding='utf-8', | ||
index=True, index_label='id' ) | ||
|
||
from chembl_webresource_client.new_client import new_client | ||
import pandas as pd | ||
import numpy as np | ||
import os | ||
import sys | ||
|
||
# Set path to download .csv files to "raw" project folder | ||
path_to_download = os.path.normpath( os.getcwd() + os.sep + os.pardir + os.sep + os.pardir + '/data/raw/' ) | ||
|
||
|
||
class ChemblDataExtraction( object ): | ||
"""CDE stands for Chembl Data Extraction. | ||
The purpose of this class is to use the chembls official webclient to | ||
gather and filter data based on certain targets and standard types | ||
""" | ||
|
||
def acquire( self, chembl_id ): | ||
""" chembl_id for primary targets: Cruzipain('CHEMBL3563') and Cathepsin L ('CHEMBL3837') | ||
standard types expected: 'IC50', 'Ki' | ||
The dataframe will filter all molecules that has <4 pchembl value and also classify its intervals based on: | ||
Columns: 'pchembl_interval' A ={4:5,9}; B={6:7,9}; C={8:10} | ||
""" | ||
|
||
chemblwrc = new_client.activity \ | ||
.filter( target_chembl_id=chembl_id ) \ | ||
.filter( standard_relation='=' ) \ | ||
.filter( standard_type__iregex='(IC50|Ki)' ) \ | ||
.only( ['molecule_chembl_id', 'canonical_smiles', 'standard_type', 'pchembl_value'] ) | ||
|
||
# Create dataframe from the web query above | ||
chemblwrc = pd.DataFrame( chemblwrc ) | ||
|
||
chemblwrc = chemblwrc.drop( columns=['value', 'type'] ) | ||
|
||
# Convert p_chembl value type to float so that it can be filtered by a threshold | ||
chemblwrc['pchembl_value'] = chemblwrc['pchembl_value'].astype( float ) | ||
chemblwrc = chemblwrc[chemblwrc.pchembl_value >= 4] | ||
|
||
# create new column and assign a label based on p_chembl interval | ||
chemblwrc['interval'] = np.where( chemblwrc['pchembl_value'] < 6, 'A', | ||
np.where( chemblwrc['pchembl_value'] < 8, 'B', | ||
np.where( chemblwrc['pchembl_value'] <= 10, 'C', np.nan ) ) ) | ||
chemblwrc.name = chembl_id | ||
return( chemblwrc ) | ||
|
||
def write( self, dataframe ): | ||
dataframe.to_csv( r'' + path_to_download + '/{}.csv'.format( str( dataframe.name ) ), sep=',', encoding='utf-8', | ||
index=True, index_label='id' ) | ||
40 changes: 20 additions & 20 deletions
40
src/api/data/make_dataset.py → backend/data/make_dataset.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,20 @@ | ||
import load_chembl_data | ||
import numpy as np | ||
from rdkit import Chem | ||
from rdkit.Chem import rdFingerprintGenerator | ||
|
||
|
||
def main(): | ||
""" | ||
Write .csv file for each target containing both IC50 and Ki types | ||
""" | ||
data = load_chembl_data.ChemblDataExtraction() | ||
cathepsin = data.acquire( 'CHEMBL3837' ) | ||
cruzipain = data.acquire( 'CHEMBL3563' ) | ||
data.write( cathepsin ) | ||
data.write( cruzipain ) | ||
cruzipain | ||
cathepsin | ||
|
||
main() | ||
|
||
import load_chembl_data | ||
import numpy as np | ||
from rdkit import Chem | ||
from rdkit.Chem import rdFingerprintGenerator | ||
|
||
|
||
def main(): | ||
""" | ||
Write .csv file for each target containing both IC50 and Ki types | ||
""" | ||
data = load_chembl_data.ChemblDataExtraction() | ||
cathepsin = data.acquire( 'CHEMBL3837' ) | ||
cruzipain = data.acquire( 'CHEMBL3563' ) | ||
data.write( cathepsin ) | ||
data.write( cruzipain ) | ||
cruzipain | ||
cathepsin | ||
|
||
main() | ||
Oops, something went wrong.