-
Notifications
You must be signed in to change notification settings - Fork 53
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #50 from rmusser01/main
Semantic Scholar integration
- Loading branch information
Showing
3 changed files
with
348 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
184 changes: 184 additions & 0 deletions
184
App_Function_Libraries/Gradio_UI/Semantic_Scholar_tab.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,184 @@ | ||
# Sematnic_Scholar_tab.py | ||
# Description: contains the code to create the Semantic Scholar tab in the Gradio UI. | ||
# | ||
# Imports | ||
# | ||
# External Libraries | ||
import gradio as gr | ||
# | ||
# Internal Libraries | ||
from App_Function_Libraries.Third_Party.Semantic_Scholar import search_and_display, FIELDS_OF_STUDY, PUBLICATION_TYPES | ||
|
||
|
||
# | ||
###################################################################################################################### | ||
# Functions | ||
def create_semantic_scholar_tab(): | ||
"""Create the Semantic Scholar tab for the Gradio UI""" | ||
with gr.Tab("Semantic Scholar Search"): | ||
with gr.Row(): | ||
with gr.Column(scale=2): | ||
gr.Markdown(""" | ||
## Semantic Scholar Paper Search | ||
This interface allows you to search for academic papers using the Semantic Scholar API with advanced filtering options: | ||
### Search Options | ||
- **Keywords**: Search across titles, abstracts, and other paper content | ||
- **Year Range**: Filter papers by publication year (e.g., "2020-2023" or "2020") | ||
- **Venue**: Filter by publication venue (journal or conference) | ||
- **Minimum Citations**: Filter papers by minimum citation count | ||
- **Fields of Study**: Filter papers by academic field | ||
- **Publication Types**: Filter by type of publication | ||
- **Open Access**: Option to show only papers with free PDF access | ||
### Results Include | ||
- Paper title | ||
- Author list | ||
- Publication year and venue | ||
- Citation count | ||
- Publication types | ||
- Abstract | ||
- Links to PDF (when available) and Semantic Scholar page | ||
""") | ||
with gr.Column(scale=2): | ||
gr.Markdown(""" | ||
### Pagination | ||
- 10 results per page | ||
- Navigate through results using Previous/Next buttons | ||
- Current page number and total results displayed | ||
### Usage Tips | ||
- Combine multiple filters for more specific results | ||
- Use specific terms for more focused results | ||
- Try different combinations of filters if you don't find what you're looking for | ||
""") | ||
with gr.Row(): | ||
with gr.Column(scale=2): | ||
search_input = gr.Textbox( | ||
label="Search Query", | ||
placeholder="Enter keywords to search for papers...", | ||
lines=1 | ||
) | ||
|
||
# Advanced search options | ||
with gr.Row(): | ||
year_range = gr.Textbox( | ||
label="Year Range", | ||
placeholder="e.g., 2020-2023 or 2020", | ||
lines=1 | ||
) | ||
venue = gr.Textbox( | ||
label="Venue", | ||
placeholder="e.g., Nature, Science", | ||
lines=1 | ||
) | ||
min_citations = gr.Number( | ||
label="Minimum Citations", | ||
value=0, | ||
minimum=0, | ||
step=1 | ||
) | ||
|
||
with gr.Row(): | ||
fields_of_study = gr.Dropdown( | ||
choices=FIELDS_OF_STUDY, | ||
label="Fields of Study", | ||
multiselect=True, | ||
value=[] | ||
) | ||
publication_types = gr.Dropdown( | ||
choices=PUBLICATION_TYPES, | ||
label="Publication Types", | ||
multiselect=True, | ||
value=[] | ||
) | ||
|
||
open_access_only = gr.Checkbox( | ||
label="Open Access Only", | ||
value=False | ||
) | ||
|
||
with gr.Column(scale=1): | ||
search_button = gr.Button("Search", variant="primary") | ||
|
||
# Pagination controls | ||
with gr.Row(): | ||
prev_button = gr.Button("← Previous") | ||
current_page = gr.Number(value=0, label="Page", minimum=0, step=1) | ||
max_page = gr.Number(value=0, label="Max Page", visible=False) | ||
next_button = gr.Button("Next →") | ||
|
||
total_results = gr.Textbox( | ||
label="Total Results", | ||
value="0", | ||
interactive=False | ||
) | ||
|
||
output_text = gr.Markdown( | ||
label="Results", | ||
value="Use the search options above to find papers." | ||
) | ||
|
||
def update_page(direction, current, maximum): | ||
new_page = current + direction | ||
if new_page < 0: | ||
return 0 | ||
if new_page > maximum: | ||
return maximum | ||
return new_page | ||
|
||
# Handle search and pagination | ||
def search_from_button(query, fields_of_study, publication_types, year_range, venue, min_citations, | ||
open_access_only): | ||
"""Wrapper to always search from page 0 when search button is clicked""" | ||
return search_and_display( | ||
query=query, | ||
page=0, # Force page 0 for new searches | ||
fields_of_study=fields_of_study, | ||
publication_types=publication_types, | ||
year_range=year_range, | ||
venue=venue, | ||
min_citations=min_citations, | ||
open_access_only=open_access_only | ||
) | ||
normal_search = search_and_display | ||
|
||
search_button.click( | ||
fn=search_from_button, | ||
inputs=[ | ||
search_input, fields_of_study, publication_types, | ||
year_range, venue, min_citations, open_access_only | ||
], | ||
outputs=[output_text, current_page, max_page, total_results] | ||
) | ||
|
||
prev_button.click( | ||
fn=lambda curr, max_p: update_page(-1, curr, max_p), | ||
inputs=[current_page, max_page], | ||
outputs=current_page | ||
).then( | ||
fn=normal_search, | ||
inputs=[ | ||
search_input, current_page, fields_of_study, publication_types, | ||
year_range, venue, min_citations, open_access_only | ||
], | ||
outputs=[output_text, current_page, max_page, total_results] | ||
) | ||
|
||
next_button.click( | ||
fn=lambda curr, max_p: update_page(1, curr, max_p), | ||
inputs=[current_page, max_page], | ||
outputs=current_page | ||
).then( | ||
fn=normal_search, | ||
inputs=[ | ||
search_input, current_page, fields_of_study, publication_types, | ||
year_range, venue, min_citations, open_access_only | ||
], | ||
outputs=[output_text, current_page, max_page, total_results] | ||
) | ||
|
||
# | ||
# End of Semantic_Scholar_tab.py | ||
###################################################################################################################### |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
# Semantic_Scholar.py | ||
# Description: This file contains the functions to interact with the Semantic Scholar API | ||
# | ||
# Imports | ||
from typing import List, Dict, Any | ||
|
||
import requests | ||
# | ||
#################################################################################################### | ||
# | ||
# Functions | ||
|
||
# Constants | ||
FIELDS_OF_STUDY = [ | ||
"Computer Science", "Medicine", "Chemistry", "Biology", "Materials Science", | ||
"Physics", "Geology", "Psychology", "Art", "History", "Geography", | ||
"Sociology", "Business", "Political Science", "Economics", "Philosophy", | ||
"Mathematics", "Engineering", "Environmental Science", | ||
"Agricultural and Food Sciences", "Education", "Law", "Linguistics" | ||
] | ||
|
||
PUBLICATION_TYPES = [ | ||
"Review", "JournalArticle", "CaseReport", "ClinicalTrial", "Conference", | ||
"Dataset", "Editorial", "LettersAndComments", "MetaAnalysis", "News", | ||
"Study", "Book", "BookSection" | ||
] | ||
|
||
|
||
def search_papers( | ||
query: str, | ||
page: int, | ||
fields_of_study: List[str], | ||
publication_types: List[str], | ||
year_range: str, | ||
venue: str, | ||
min_citations: int, | ||
open_access_only: bool, | ||
limit: int = 10 | ||
) -> Dict[str, Any]: | ||
"""Search for papers using the Semantic Scholar API with all available filters""" | ||
if not query.strip(): | ||
return {"total": 0, "offset": 0, "next": 0, "data": []} | ||
|
||
try: | ||
url = "https://api.semanticscholar.org/graph/v1/paper/search" | ||
params = { | ||
"query": query, | ||
"offset": page * limit, | ||
"limit": limit, | ||
"fields": "title,abstract,year,citationCount,authors,venue,openAccessPdf,url,publicationTypes,publicationDate" | ||
} | ||
|
||
# Add optional filters | ||
if fields_of_study: | ||
params["fieldsOfStudy"] = ",".join(fields_of_study) | ||
if publication_types: | ||
params["publicationTypes"] = ",".join(publication_types) | ||
if venue: | ||
params["venue"] = venue | ||
if min_citations: | ||
params["minCitationCount"] = str(min_citations) | ||
if open_access_only: | ||
params["openAccessPdf"] = "" | ||
if year_range: | ||
try: | ||
if "-" in year_range: | ||
start_year, end_year = year_range.split("-") | ||
params["year"] = f"{start_year.strip()}-{end_year.strip()}" | ||
else: | ||
params["year"] = year_range.strip() | ||
except ValueError: | ||
pass | ||
|
||
response = requests.get(url, params=params) | ||
response.raise_for_status() | ||
return response.json() | ||
except requests.exceptions.RequestException as e: | ||
return {"error": f"API Error: {str(e)}", "total": 0, "offset": 0, "data": []} | ||
|
||
|
||
def get_paper_details(paper_id): | ||
"""Get detailed information about a specific paper""" | ||
try: | ||
url = f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}" | ||
params = { | ||
"fields": "title,abstract,year,citationCount,authors,venue,openAccessPdf,url,references,citations" | ||
} | ||
response = requests.get(url, params=params) | ||
response.raise_for_status() | ||
return response.json() | ||
except requests.exceptions.RequestException as e: | ||
return {"error": f"API Error: {str(e)}"} | ||
|
||
|
||
def format_paper_info(paper: Dict[str, Any]) -> str: | ||
"""Format paper information for display""" | ||
authors = ", ".join([author["name"] for author in paper.get("authors", [])]) | ||
year = f"Year: {paper.get('year', 'N/A')}" | ||
venue = f"Venue: {paper.get('venue', 'N/A')}" | ||
citations = f"Citations: {paper.get('citationCount', 0)}" | ||
pub_types = f"Types: {', '.join(paper.get('publicationTypes', ['N/A']))}" | ||
|
||
pdf_link = "" | ||
if paper.get("openAccessPdf"): | ||
pdf_link = f"\nPDF: {paper['openAccessPdf']['url']}" | ||
|
||
s2_link = f"\nSemantic Scholar: {paper.get('url', '')}" | ||
|
||
formatted = f"""# {paper.get('title', 'No Title')} | ||
Authors: {authors} | ||
{year} | {venue} | {citations} | ||
{pub_types} | ||
Abstract: | ||
{paper.get('abstract', 'No abstract available')} | ||
Links:{pdf_link}{s2_link} | ||
""" | ||
return formatted | ||
|
||
|
||
def search_and_display( | ||
query: str, | ||
page: int, | ||
fields_of_study: List[str], | ||
publication_types: List[str], | ||
year_range: str, | ||
venue: str, | ||
min_citations: int, | ||
open_access_only: bool | ||
) -> tuple[str, int, int, str]: | ||
"""Search for papers and return formatted results with pagination info""" | ||
result = search_papers( | ||
query, page, fields_of_study, publication_types, | ||
year_range, venue, min_citations, open_access_only | ||
) | ||
|
||
if "error" in result: | ||
return result["error"], 0, 0, "0" | ||
|
||
if not result["data"]: | ||
return "No results found.", 0, 0, "0" | ||
|
||
papers = result["data"] | ||
total_results = int(result.get("total", "0")) | ||
max_pages = (total_results + 9) // 10 # Ceiling division | ||
|
||
results = [] | ||
for paper in papers: | ||
results.append(format_paper_info(paper)) | ||
|
||
formatted_results = "\n\n---\n\n".join(results) | ||
|
||
# Add pagination information | ||
pagination_info = f"\n\n---\n\nShowing results {result['offset'] + 1}-{result['offset'] + len(papers)} of {total_results}" | ||
|
||
return formatted_results + pagination_info, page, max_pages - 1, str(total_results) | ||
|
||
# | ||
# End of Semantic_Scholar.py | ||
#################################################################################################### |