Skip to content

Commit

Permalink
Created dialog for alerts on ML page.
Browse files Browse the repository at this point in the history
  • Loading branch information
raphamontana committed Jun 15, 2020
1 parent 6f2c4fe commit 75e7b78
Show file tree
Hide file tree
Showing 141 changed files with 544 additions and 34,385 deletions.
37 changes: 21 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
# BioNCE - Biological New Chemical Entities

Sistema Digital Inteligente para classificação de móleculas comparando com bancos de dados químicos e biológicos.

### Motivation

### Features

### Examples

All of our examples can be found here in the form of Python notebook tutorials

### Report issues

### References
If you use BioNCE in your research, please cite:
# BioNCE - Biological New Chemical Entities

![Alt text](docs/images/logo/bionce-logo.png?raw=true "BioNCE logo")

An Intelligent digital system for classification of molecules querying chemical and biological databases and entities discovery based on machine learning.

### Motivation

### Features

- Query entities on several databases (ChEMBL, PDB, PubChem, others);
- Machine learning models ready to apply.

### Examples

All of our examples can be found here in the form of Python notebook tutorials

### Report issues

### References
If you use BioNCE in your research, please cite:
File renamed without changes.
17 changes: 7 additions & 10 deletions src/api/web/requirements.txt → backend/api/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
"""
This file lists all of the Python packages that your app depends on.
You may have separate files for production and development dependencies.
"""

Flask==1.1.1
Flask-WTF==0.14.2
requests==2.22.0
WTForms==2.2.1

"""
This file lists all of the Python packages that your app depends on.
You may have separate files for production and development dependencies.
"""

Flask==1.1.1
requests==2.22.0
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,51 +1,51 @@
from chembl_webresource_client.utils import utils
from chembl_webresource_client.new_client import new_client
from rdkit import DataStructs
from rdkit.Chem import PandasTools, RDKFingerprint
import pandas as pd
import numpy as np
from rdkit.Chem.PandasTools import ChangeMoleculeRendering
from rdkit.Chem.Fingerprints import FingerprintMols


class ChemblData:
""""docstring """

def __init__( self, name=None, smiles=None, chemblid=None, pchembl=None, activity=None, fps=None, sim=None ):
self.name = name
self.smiles = smiles
self.chemblid = chemblid
self.pchembl = pchembl
self.activity = activity
self.fps = fps
self.sim = sim

def get_data( self ):
ids2df = pd.DataFrame()
ids = ['CHEMBL3837', 'CHEMBL4072', 'CHEMBL3563', 'CHEMBL2954', 'CHEMBL268']
for i in range( 0, len( ids ) ):
activities = new_client.activity.filter( target_chembl_id__in=ids[i], standard_relation='=',
standard_type__iregex='(IC50|Ki)', pchembl_value__isnull=False )
ids2df = ids2df.append( pd.DataFrame( activities.only( ['molecule_chembl_id', 'canonical_smiles',
'standard_type', 'pchembl_value'] ) ) )
print( '{}'.format( i ) )

# some filtering that could only be applied after the request
ids2df = ids2df.drop( columns=['value', 'type'] )
ids2df['pchembl_value'] = ids2df['pchembl_value'].astype( float )
ids2df = ids2df[ids2df.pchembl_value >= 4]
ids2df['activity_class'] = np.where( ids2df['pchembl_value'] < 6, 'A',
np.where( ids2df['pchembl_value'] < 8, 'B',
np.where( ids2df['pchembl_value'] <= 10, 'C', np.nan ) ) )
ChangeMoleculeRendering( renderer='String' )
ids2df = ids2df.dropna( subset=['canonical_smiles'] )
#add fps column

return ids2df

#

a = ChemblData()
data = a.get_data()
print( data.shape )

from chembl_webresource_client.utils import utils
from chembl_webresource_client.new_client import new_client
from rdkit import DataStructs
from rdkit.Chem import PandasTools, RDKFingerprint
import pandas as pd
import numpy as np
from rdkit.Chem.PandasTools import ChangeMoleculeRendering
from rdkit.Chem.Fingerprints import FingerprintMols


class ChemblData:
""""docstring """

def __init__( self, name=None, smiles=None, chemblid=None, pchembl=None, activity=None, fps=None, sim=None ):
self.name = name
self.smiles = smiles
self.chemblid = chemblid
self.pchembl = pchembl
self.activity = activity
self.fps = fps
self.sim = sim

def get_data( self ):
ids2df = pd.DataFrame()
ids = ['CHEMBL3837', 'CHEMBL4072', 'CHEMBL3563', 'CHEMBL2954', 'CHEMBL268']
for i in range( 0, len( ids ) ):
activities = new_client.activity.filter( target_chembl_id__in=ids[i], standard_relation='=',
standard_type__iregex='(IC50|Ki)', pchembl_value__isnull=False )
ids2df = ids2df.append( pd.DataFrame( activities.only( ['molecule_chembl_id', 'canonical_smiles',
'standard_type', 'pchembl_value'] ) ) )
print( '{}'.format( i ) )

# some filtering that could only be applied after the request
ids2df = ids2df.drop( columns=['value', 'type'] )
ids2df['pchembl_value'] = ids2df['pchembl_value'].astype( float )
ids2df = ids2df[ids2df.pchembl_value >= 4]
ids2df['activity_class'] = np.where( ids2df['pchembl_value'] < 6, 'A',
np.where( ids2df['pchembl_value'] < 8, 'B',
np.where( ids2df['pchembl_value'] <= 10, 'C', np.nan ) ) )
ChangeMoleculeRendering( renderer='String' )
ids2df = ids2df.dropna( subset=['canonical_smiles'] )
#add fps column

return ids2df

#

a = ChemblData()
data = a.get_data()
print( data.shape )
Original file line number Diff line number Diff line change
@@ -1,49 +1,49 @@
from chembl_webresource_client.new_client import new_client
import pandas as pd
import numpy as np
import os
import sys

# Set path to download .csv files to "raw" project folder
path_to_download = os.path.normpath( os.getcwd() + os.sep + os.pardir + os.sep + os.pardir + '/data/raw/' )


class ChemblDataExtraction( object ):
"""CDE stands for Chembl Data Extraction.
The purpose of this class is to use the chembls official webclient to
gather and filter data based on certain targets and standard types
"""

def acquire( self, chembl_id ):
""" chembl_id for primary targets: Cruzipain('CHEMBL3563') and Cathepsin L ('CHEMBL3837')
standard types expected: 'IC50', 'Ki'
The dataframe will filter all molecules that has <4 pchembl value and also classify its intervals based on:
Columns: 'pchembl_interval' A ={4:5,9}; B={6:7,9}; C={8:10}
"""

chemblwrc = new_client.activity \
.filter( target_chembl_id=chembl_id ) \
.filter( standard_relation='=' ) \
.filter( standard_type__iregex='(IC50|Ki)' ) \
.only( ['molecule_chembl_id', 'canonical_smiles', 'standard_type', 'pchembl_value'] )

# Create dataframe from the web query above
chemblwrc = pd.DataFrame( chemblwrc )

chemblwrc = chemblwrc.drop( columns=['value', 'type'] )

# Convert p_chembl value type to float so that it can be filtered by a threshold
chemblwrc['pchembl_value'] = chemblwrc['pchembl_value'].astype( float )
chemblwrc = chemblwrc[chemblwrc.pchembl_value >= 4]

# create new column and assign a label based on p_chembl interval
chemblwrc['interval'] = np.where( chemblwrc['pchembl_value'] < 6, 'A',
np.where( chemblwrc['pchembl_value'] < 8, 'B',
np.where( chemblwrc['pchembl_value'] <= 10, 'C', np.nan ) ) )
chemblwrc.name = chembl_id
return( chemblwrc )

def write( self, dataframe ):
dataframe.to_csv( r'' + path_to_download + '/{}.csv'.format( str( dataframe.name ) ), sep=',', encoding='utf-8',
index=True, index_label='id' )

from chembl_webresource_client.new_client import new_client
import pandas as pd
import numpy as np
import os
import sys

# Set path to download .csv files to "raw" project folder
path_to_download = os.path.normpath( os.getcwd() + os.sep + os.pardir + os.sep + os.pardir + '/data/raw/' )


class ChemblDataExtraction( object ):
"""CDE stands for Chembl Data Extraction.
The purpose of this class is to use the chembls official webclient to
gather and filter data based on certain targets and standard types
"""

def acquire( self, chembl_id ):
""" chembl_id for primary targets: Cruzipain('CHEMBL3563') and Cathepsin L ('CHEMBL3837')
standard types expected: 'IC50', 'Ki'
The dataframe will filter all molecules that has <4 pchembl value and also classify its intervals based on:
Columns: 'pchembl_interval' A ={4:5,9}; B={6:7,9}; C={8:10}
"""

chemblwrc = new_client.activity \
.filter( target_chembl_id=chembl_id ) \
.filter( standard_relation='=' ) \
.filter( standard_type__iregex='(IC50|Ki)' ) \
.only( ['molecule_chembl_id', 'canonical_smiles', 'standard_type', 'pchembl_value'] )

# Create dataframe from the web query above
chemblwrc = pd.DataFrame( chemblwrc )

chemblwrc = chemblwrc.drop( columns=['value', 'type'] )

# Convert p_chembl value type to float so that it can be filtered by a threshold
chemblwrc['pchembl_value'] = chemblwrc['pchembl_value'].astype( float )
chemblwrc = chemblwrc[chemblwrc.pchembl_value >= 4]

# create new column and assign a label based on p_chembl interval
chemblwrc['interval'] = np.where( chemblwrc['pchembl_value'] < 6, 'A',
np.where( chemblwrc['pchembl_value'] < 8, 'B',
np.where( chemblwrc['pchembl_value'] <= 10, 'C', np.nan ) ) )
chemblwrc.name = chembl_id
return( chemblwrc )

def write( self, dataframe ):
dataframe.to_csv( r'' + path_to_download + '/{}.csv'.format( str( dataframe.name ) ), sep=',', encoding='utf-8',
index=True, index_label='id' )
40 changes: 20 additions & 20 deletions src/api/data/make_dataset.py → backend/data/make_dataset.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
import load_chembl_data
import numpy as np
from rdkit import Chem
from rdkit.Chem import rdFingerprintGenerator


def main():
"""
Write .csv file for each target containing both IC50 and Ki types
"""
data = load_chembl_data.ChemblDataExtraction()
cathepsin = data.acquire( 'CHEMBL3837' )
cruzipain = data.acquire( 'CHEMBL3563' )
data.write( cathepsin )
data.write( cruzipain )
cruzipain
cathepsin

main()

import load_chembl_data
import numpy as np
from rdkit import Chem
from rdkit.Chem import rdFingerprintGenerator


def main():
"""
Write .csv file for each target containing both IC50 and Ki types
"""
data = load_chembl_data.ChemblDataExtraction()
cathepsin = data.acquire( 'CHEMBL3837' )
cruzipain = data.acquire( 'CHEMBL3563' )
data.write( cathepsin )
data.write( cruzipain )
cruzipain
cathepsin

main()
Loading

0 comments on commit 75e7b78

Please sign in to comment.