-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
18 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,8 @@ | |
import pandas as pd | ||
from Bio import Entrez, Medline | ||
|
||
Entrez.email = "[email protected]" # TODO: This should probably be changed to the email of the person installing ckg? | ||
# TODO: This should probably be changed to the email of the person installing ckg? | ||
Entrez.email = "[email protected]" | ||
|
||
|
||
def getMedlineAbstracts(idList): | ||
|
@@ -54,17 +55,23 @@ def get_publications_abstracts( | |
index="PMID", | ||
): | ||
""" | ||
Accesses NCBI PubMed over the WWW and retrieves the abstracts corresponding to a list of one or more PubMed IDs. | ||
Accesses NCBI PubMed over the WWW and retrieves the abstracts corresponding | ||
to a list of one or more PubMed IDs. | ||
:param data: pandas dataframe of diseases and publications linked to a list of proteins (columns: 'Diseases', 'Proteins', 'linkout' and 'publication'). | ||
:param data: pandas dataframe of diseases and publications linked to a list of | ||
proteins (columns: 'Diseases', 'Proteins', 'linkout' and 'publication'). | ||
:param str publication_col: column label containing PubMed ids. | ||
:param list join_by: column labels to be kept from the input dataframe. | ||
:param str index: column label containing PubMed ids from the NCBI retrieved data. | ||
:return: Pandas dataframe with publication information and columns 'PMID', 'abstract', 'authors', 'date', 'journal', 'keywords', 'title', 'url', 'Proteins' and 'Diseases'. | ||
:return: Pandas dataframe with publication information and columns 'PMID', 'abstract', | ||
'authors', 'date', 'journal', 'keywords', 'title', 'url', 'Proteins' and 'Diseases'. | ||
Example:: | ||
result = get_publications_abstracts(data, publication_col='publication', join_by=['publication','Proteins','Diseases'], index='PMID') | ||
result = get_publications_abstracts(data, | ||
publication_col='publication', | ||
join_by=['publication','Proteins','Diseases'], | ||
index='PMID') | ||
""" | ||
abstracts = pd.DataFrame() | ||
if not data.empty: | ||
|