Skip to content

Commit

Permalink
Merge pull request #50 from rmusser01/main
Browse files Browse the repository at this point in the history
Semantic Scholar integration
  • Loading branch information
rmusser01 authored Oct 25, 2024
2 parents a828788 + 5ec7881 commit 05ed3e0
Show file tree
Hide file tree
Showing 3 changed files with 348 additions and 0 deletions.
2 changes: 2 additions & 0 deletions App_Function_Libraries/Gradio_Related.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
from App_Function_Libraries.Gradio_UI.RAG_Chat_tab import create_rag_tab
from App_Function_Libraries.Gradio_UI.Embeddings_tab import create_embeddings_tab, create_view_embeddings_tab, \
create_purge_embeddings_tab
from App_Function_Libraries.Gradio_UI.Semantic_Scholar_tab import create_semantic_scholar_tab
from App_Function_Libraries.Gradio_UI.Trash import create_view_trash_tab, create_empty_trash_tab, \
create_delete_trash_tab, create_search_and_mark_trash_tab
from App_Function_Libraries.Gradio_UI.Utilities import create_utilities_yt_timestamp_tab, create_utilities_yt_audio_tab, \
Expand Down Expand Up @@ -287,6 +288,7 @@ def launch_ui(share_public=None, server_mode=False):
create_summarize_explain_tab()
create_live_recording_tab()
create_arxiv_tab()
create_semantic_scholar_tab()

with gr.TabItem("Text Search", id="text search", visible=True):
create_search_tab()
Expand Down
184 changes: 184 additions & 0 deletions App_Function_Libraries/Gradio_UI/Semantic_Scholar_tab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
# Sematnic_Scholar_tab.py
# Description: contains the code to create the Semantic Scholar tab in the Gradio UI.
#
# Imports
#
# External Libraries
import gradio as gr
#
# Internal Libraries
from App_Function_Libraries.Third_Party.Semantic_Scholar import search_and_display, FIELDS_OF_STUDY, PUBLICATION_TYPES


#
######################################################################################################################
# Functions
def create_semantic_scholar_tab():
"""Create the Semantic Scholar tab for the Gradio UI"""
with gr.Tab("Semantic Scholar Search"):
with gr.Row():
with gr.Column(scale=2):
gr.Markdown("""
## Semantic Scholar Paper Search
This interface allows you to search for academic papers using the Semantic Scholar API with advanced filtering options:
### Search Options
- **Keywords**: Search across titles, abstracts, and other paper content
- **Year Range**: Filter papers by publication year (e.g., "2020-2023" or "2020")
- **Venue**: Filter by publication venue (journal or conference)
- **Minimum Citations**: Filter papers by minimum citation count
- **Fields of Study**: Filter papers by academic field
- **Publication Types**: Filter by type of publication
- **Open Access**: Option to show only papers with free PDF access
### Results Include
- Paper title
- Author list
- Publication year and venue
- Citation count
- Publication types
- Abstract
- Links to PDF (when available) and Semantic Scholar page
""")
with gr.Column(scale=2):
gr.Markdown("""
### Pagination
- 10 results per page
- Navigate through results using Previous/Next buttons
- Current page number and total results displayed
### Usage Tips
- Combine multiple filters for more specific results
- Use specific terms for more focused results
- Try different combinations of filters if you don't find what you're looking for
""")
with gr.Row():
with gr.Column(scale=2):
search_input = gr.Textbox(
label="Search Query",
placeholder="Enter keywords to search for papers...",
lines=1
)

# Advanced search options
with gr.Row():
year_range = gr.Textbox(
label="Year Range",
placeholder="e.g., 2020-2023 or 2020",
lines=1
)
venue = gr.Textbox(
label="Venue",
placeholder="e.g., Nature, Science",
lines=1
)
min_citations = gr.Number(
label="Minimum Citations",
value=0,
minimum=0,
step=1
)

with gr.Row():
fields_of_study = gr.Dropdown(
choices=FIELDS_OF_STUDY,
label="Fields of Study",
multiselect=True,
value=[]
)
publication_types = gr.Dropdown(
choices=PUBLICATION_TYPES,
label="Publication Types",
multiselect=True,
value=[]
)

open_access_only = gr.Checkbox(
label="Open Access Only",
value=False
)

with gr.Column(scale=1):
search_button = gr.Button("Search", variant="primary")

# Pagination controls
with gr.Row():
prev_button = gr.Button("← Previous")
current_page = gr.Number(value=0, label="Page", minimum=0, step=1)
max_page = gr.Number(value=0, label="Max Page", visible=False)
next_button = gr.Button("Next →")

total_results = gr.Textbox(
label="Total Results",
value="0",
interactive=False
)

output_text = gr.Markdown(
label="Results",
value="Use the search options above to find papers."
)

def update_page(direction, current, maximum):
new_page = current + direction
if new_page < 0:
return 0
if new_page > maximum:
return maximum
return new_page

# Handle search and pagination
def search_from_button(query, fields_of_study, publication_types, year_range, venue, min_citations,
open_access_only):
"""Wrapper to always search from page 0 when search button is clicked"""
return search_and_display(
query=query,
page=0, # Force page 0 for new searches
fields_of_study=fields_of_study,
publication_types=publication_types,
year_range=year_range,
venue=venue,
min_citations=min_citations,
open_access_only=open_access_only
)
normal_search = search_and_display

search_button.click(
fn=search_from_button,
inputs=[
search_input, fields_of_study, publication_types,
year_range, venue, min_citations, open_access_only
],
outputs=[output_text, current_page, max_page, total_results]
)

prev_button.click(
fn=lambda curr, max_p: update_page(-1, curr, max_p),
inputs=[current_page, max_page],
outputs=current_page
).then(
fn=normal_search,
inputs=[
search_input, current_page, fields_of_study, publication_types,
year_range, venue, min_citations, open_access_only
],
outputs=[output_text, current_page, max_page, total_results]
)

next_button.click(
fn=lambda curr, max_p: update_page(1, curr, max_p),
inputs=[current_page, max_page],
outputs=current_page
).then(
fn=normal_search,
inputs=[
search_input, current_page, fields_of_study, publication_types,
year_range, venue, min_citations, open_access_only
],
outputs=[output_text, current_page, max_page, total_results]
)

#
# End of Semantic_Scholar_tab.py
######################################################################################################################
162 changes: 162 additions & 0 deletions App_Function_Libraries/Third_Party/Semantic_Scholar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
# Semantic_Scholar.py
# Description: This file contains the functions to interact with the Semantic Scholar API
#
# Imports
from typing import List, Dict, Any

import requests
#
####################################################################################################
#
# Functions

# Constants
FIELDS_OF_STUDY = [
"Computer Science", "Medicine", "Chemistry", "Biology", "Materials Science",
"Physics", "Geology", "Psychology", "Art", "History", "Geography",
"Sociology", "Business", "Political Science", "Economics", "Philosophy",
"Mathematics", "Engineering", "Environmental Science",
"Agricultural and Food Sciences", "Education", "Law", "Linguistics"
]

PUBLICATION_TYPES = [
"Review", "JournalArticle", "CaseReport", "ClinicalTrial", "Conference",
"Dataset", "Editorial", "LettersAndComments", "MetaAnalysis", "News",
"Study", "Book", "BookSection"
]


def search_papers(
query: str,
page: int,
fields_of_study: List[str],
publication_types: List[str],
year_range: str,
venue: str,
min_citations: int,
open_access_only: bool,
limit: int = 10
) -> Dict[str, Any]:
"""Search for papers using the Semantic Scholar API with all available filters"""
if not query.strip():
return {"total": 0, "offset": 0, "next": 0, "data": []}

try:
url = "https://api.semanticscholar.org/graph/v1/paper/search"
params = {
"query": query,
"offset": page * limit,
"limit": limit,
"fields": "title,abstract,year,citationCount,authors,venue,openAccessPdf,url,publicationTypes,publicationDate"
}

# Add optional filters
if fields_of_study:
params["fieldsOfStudy"] = ",".join(fields_of_study)
if publication_types:
params["publicationTypes"] = ",".join(publication_types)
if venue:
params["venue"] = venue
if min_citations:
params["minCitationCount"] = str(min_citations)
if open_access_only:
params["openAccessPdf"] = ""
if year_range:
try:
if "-" in year_range:
start_year, end_year = year_range.split("-")
params["year"] = f"{start_year.strip()}-{end_year.strip()}"
else:
params["year"] = year_range.strip()
except ValueError:
pass

response = requests.get(url, params=params)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
return {"error": f"API Error: {str(e)}", "total": 0, "offset": 0, "data": []}


def get_paper_details(paper_id):
"""Get detailed information about a specific paper"""
try:
url = f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}"
params = {
"fields": "title,abstract,year,citationCount,authors,venue,openAccessPdf,url,references,citations"
}
response = requests.get(url, params=params)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
return {"error": f"API Error: {str(e)}"}


def format_paper_info(paper: Dict[str, Any]) -> str:
"""Format paper information for display"""
authors = ", ".join([author["name"] for author in paper.get("authors", [])])
year = f"Year: {paper.get('year', 'N/A')}"
venue = f"Venue: {paper.get('venue', 'N/A')}"
citations = f"Citations: {paper.get('citationCount', 0)}"
pub_types = f"Types: {', '.join(paper.get('publicationTypes', ['N/A']))}"

pdf_link = ""
if paper.get("openAccessPdf"):
pdf_link = f"\nPDF: {paper['openAccessPdf']['url']}"

s2_link = f"\nSemantic Scholar: {paper.get('url', '')}"

formatted = f"""# {paper.get('title', 'No Title')}
Authors: {authors}
{year} | {venue} | {citations}
{pub_types}
Abstract:
{paper.get('abstract', 'No abstract available')}
Links:{pdf_link}{s2_link}
"""
return formatted


def search_and_display(
query: str,
page: int,
fields_of_study: List[str],
publication_types: List[str],
year_range: str,
venue: str,
min_citations: int,
open_access_only: bool
) -> tuple[str, int, int, str]:
"""Search for papers and return formatted results with pagination info"""
result = search_papers(
query, page, fields_of_study, publication_types,
year_range, venue, min_citations, open_access_only
)

if "error" in result:
return result["error"], 0, 0, "0"

if not result["data"]:
return "No results found.", 0, 0, "0"

papers = result["data"]
total_results = int(result.get("total", "0"))
max_pages = (total_results + 9) // 10 # Ceiling division

results = []
for paper in papers:
results.append(format_paper_info(paper))

formatted_results = "\n\n---\n\n".join(results)

# Add pagination information
pagination_info = f"\n\n---\n\nShowing results {result['offset'] + 1}-{result['offset'] + len(papers)} of {total_results}"

return formatted_results + pagination_info, page, max_pages - 1, str(total_results)

#
# End of Semantic_Scholar.py
####################################################################################################

0 comments on commit 05ed3e0

Please sign in to comment.