diff --git a/.gitignore b/.gitignore index c31ce338..eb392320 100644 --- a/.gitignore +++ b/.gitignore @@ -141,4 +141,7 @@ scripts/similarity/config.yml # Personal Data / Secrets *.local.yml + +# Processed or local files +/Data/Processed/* *.local.pdf diff --git a/Assets/img/favicon.ico b/Assets/img/favicon.ico new file mode 100644 index 00000000..bc066646 Binary files /dev/null and b/Assets/img/favicon.ico differ diff --git a/streamlit_interactive.py b/streamlit_interactive.py new file mode 100644 index 00000000..8bfb38db --- /dev/null +++ b/streamlit_interactive.py @@ -0,0 +1,414 @@ +# Import necessary libraries +import json +import os +from typing import List + +import networkx as nx +import nltk +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go +import streamlit as st +from annotated_text import annotated_text, parameters +from streamlit_extras import add_vertical_space as avs +from streamlit_extras.badges import badge + +from scripts import ResumeProcessor, JobDescriptionProcessor +from scripts.ReadPdf import read_single_pdf +from scripts.similarity import get_similarity_score, find_path, read_config +from scripts.parsers import ParseResume +from scripts.parsers import ParseJobDesc +from scripts.utils import get_filenames_from_dir + +# Set page configuration +st.set_page_config(page_title='Resume Matcher', page_icon="Assets/img/favicon.ico", initial_sidebar_state='auto', layout='wide') + +# Find the current working directory and configuration path +cwd = find_path('Resume-Matcher') +config_path = os.path.join(cwd, "scripts", "similarity") + +# Check if NLTK punkt data is available, if not, download it +try: + nltk.data.find('tokenizers/punkt') +except LookupError: + nltk.download('punkt') + +# Set some visualization parameters using the annotated_text library +parameters.SHOW_LABEL_SEPARATOR = False +parameters.BORDER_RADIUS = 3 +parameters.PADDING = "0.5 0.25rem" + + +# Function to set session state variables +def update_session_state(key, val): + st.session_state[key] = val + + +# Function to delete all files in a directory +def delete_from_dir(filepath: str) -> bool: + try: + for file in os.scandir(filepath): + os.remove(file.path) + + return True + except OSError as error: + print(f"Exception: {error}") + return False + + +# Function to create a star-shaped graph visualization +def create_star_graph(nodes_and_weights, title): + """ + Create a star-shaped graph visualization. + + Args: + nodes_and_weights (list): List of tuples containing nodes and their weights. + title (str): Title for the graph. + + Returns: + None + """ + # Create an empty graph + graph = nx.Graph() + + # Add the central node + central_node = "resume" + graph.add_node(central_node) + + # Add nodes and edges with weights to the graph + for node, weight in nodes_and_weights: + graph.add_node(node) + graph.add_edge(central_node, node, weight=weight * 100) + + # Get position layout for nodes + pos = nx.spring_layout(graph) + + # Create edge trace + edge_x = [] + edge_y = [] + for edge in graph.edges(): + x0, y0 = pos[edge[0]] + x1, y1 = pos[edge[1]] + edge_x.extend([x0, x1, None]) + edge_y.extend([y0, y1, None]) + + edge_trace = go.Scatter(x=edge_x, y=edge_y, line=dict( + width=0.5, color='#888'), hoverinfo='none', mode='lines') + + # Create node trace + node_x = [] + node_y = [] + for node in graph.nodes(): + x, y = pos[node] + node_x.append(x) + node_y.append(y) + + node_trace = go.Scatter(x=node_x, y=node_y, mode='markers', hoverinfo='text', + marker=dict(showscale=True, colorscale='Rainbow', reversescale=True, color=[], size=10, + colorbar=dict(thickness=15, title='Node Connections', xanchor='left', + titleside='right'), line_width=2)) + + # Color node points by number of connections + node_adjacencies = [] + node_text = [] + for node in graph.nodes(): + adjacencies = list(graph.adj[node]) # Changes here + node_adjacencies.append(len(adjacencies)) + node_text.append(f'{node}
# of connections: {len(adjacencies)}') + + node_trace.marker.color = node_adjacencies + node_trace.text = node_text + + # Create the figure + figure = go.Figure(data=[edge_trace, node_trace], + layout=go.Layout(title=title, titlefont=dict(size=16), showlegend=False, + hovermode='closest', margin=dict(b=20, l=5, r=5, t=40), + xaxis=dict( + showgrid=False, zeroline=False, showticklabels=False), + yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))) + + # Show the figure + st.plotly_chart(figure, use_container_width=True) + + +# Function to create annotated text with highlighting +def create_annotated_text(input_string: str, word_list: List[str], annotation: str, color_code: str): + """ + Create annotated text with highlighted keywords. + + Args: + input_string (str): The input text. + word_list (List[str]): List of keywords to be highlighted. + annotation (str): Annotation label for highlighted keywords. + color_code (str): Color code for highlighting. + + Returns: + List: Annotated text with highlighted keywords. + """ + # Tokenize the input string + tokens = nltk.word_tokenize(input_string) + + # Convert the list to a set for quick lookups + word_set = set(word_list) + + # Initialize an empty list to hold the annotated text + ret_annotated_text = [] + + for token in tokens: + # Check if the token is in the set + if token in word_set: + # If it is, append a tuple with the token, annotation, and color code + ret_annotated_text.append((token, annotation, color_code)) + else: + # If it's not, just append the token as a string + ret_annotated_text.append(token) + + return ret_annotated_text + + +# Function to read JSON data from a file +def read_json(filename): + """ + Read JSON data from a file. + + Args: + filename (str): The path to the JSON file. + + Returns: + dict: The JSON data. + """ + with open(filename) as f: + data = json.load(f) + return data + + +# Function to tokenize a string +def tokenize_string(input_string): + """ + Tokenize a string into words. + + Args: + input_string (str): The input string. + + Returns: + List[str]: List of tokens. + """ + tokens = nltk.word_tokenize(input_string) + return tokens + + +# Cleanup processed resume / job descriptions +delete_from_dir(os.path.join(cwd, "Data", "Processed", "Resumes")) +delete_from_dir(os.path.join(cwd, "Data", "Processed", "JobDescription")) + +# Set default session states for first run +if "resumeUploaded" not in st.session_state.keys(): + update_session_state("resumeUploaded", "Pending") + update_session_state("resumePath", "") +if "jobDescriptionUploaded" not in st.session_state.keys(): + update_session_state("jobDescriptionUploaded", "Pending") + update_session_state("jobDescriptionPath", "") + +# Display the main title and sub-headers +st.title(':blue[Resume Matcher]') +with st.sidebar: + st.image('Assets/img/header_image.png') + st.subheader('Free and Open Source ATS to help your resume pass the screening stage.') + st.markdown('Check the website [www.resumematcher.fyi](https://www.resumematcher.fyi/)') + st.markdown('Give Resume Matcher a ⭐ on [GitHub](https://github.com/srbhr/resume-matcher)') + badge(type="github", name="srbhr/Resume-Matcher") + st.markdown('For updates follow me on Twitter.') + badge(type="twitter", name="_srbhr_") + st.markdown('If you like the project and would like to further help in development please consider 👇') + badge(type="buymeacoffee", name="srbhr") + +st.divider() +avs.add_vertical_space(1) + +with st.container(): + resumeCol, jobDescriptionCol = st.columns(2) + with resumeCol: + uploaded_Resume = st.file_uploader("Choose a Resume", type="pdf") + if uploaded_Resume is not None: + if st.session_state["resumeUploaded"] == "Pending": + save_path_resume = os.path.join(cwd, "Data", "Resumes", uploaded_Resume.name) + + with open(save_path_resume, mode='wb') as w: + w.write(uploaded_Resume.getvalue()) + + if os.path.exists(save_path_resume): + st.toast(f'File {uploaded_Resume.name} is successfully saved!', icon="✔️") + update_session_state("resumeUploaded", "Uploaded") + update_session_state("resumePath", save_path_resume) + else: + update_session_state("resumeUploaded", "Pending") + update_session_state("resumePath", "") + + with jobDescriptionCol: + uploaded_JobDescription = st.file_uploader("Choose a Job Description", type="pdf") + if uploaded_JobDescription is not None: + if st.session_state["jobDescriptionUploaded"] == "Pending": + save_path_jobDescription = os.path.join(cwd, "Data", "JobDescription", uploaded_JobDescription.name) + + with open(save_path_jobDescription, mode='wb') as w: + w.write(uploaded_JobDescription.getvalue()) + + if os.path.exists(save_path_jobDescription): + st.toast(f'File {uploaded_JobDescription.name} is successfully saved!', icon="✔️") + update_session_state("jobDescriptionUploaded", "Uploaded") + update_session_state("jobDescriptionPath", save_path_jobDescription) + else: + update_session_state("jobDescriptionUploaded", "Pending") + update_session_state("jobDescriptionPath", "") + +with st.spinner('Please wait...'): + if (uploaded_Resume is not None and + st.session_state["jobDescriptionUploaded"] == "Uploaded" and + uploaded_JobDescription is not None and + st.session_state["jobDescriptionUploaded"] == "Uploaded"): + + resumeProcessor = ParseResume(read_single_pdf(st.session_state["resumePath"])) + jobDescriptionProcessor = ParseJobDesc(read_single_pdf(st.session_state["jobDescriptionPath"])) + + # Resume / JD output + selected_file = resumeProcessor.get_JSON() + selected_jd = jobDescriptionProcessor.get_JSON() + + # Add containers for each row to avoid overlap + with st.container(): + resumeCol, jobDescriptionCol = st.columns(2) + with resumeCol: + with st.expander("Parsed Resume Data"): + st.caption( + "This text is parsed from your resume. This is how it'll look like after getting parsed by an " + "ATS.") + st.caption("Utilize this to understand how to make your resume ATS friendly.") + avs.add_vertical_space(3) + st.write(selected_file["clean_data"]) + + with jobDescriptionCol: + with st.expander("Parsed Job Description"): + st.caption( + "Currently in the pipeline I'm parsing this from PDF but it'll be from txt or copy paste.") + avs.add_vertical_space(3) + st.write(selected_jd["clean_data"]) + + with st.container(): + resumeCol, jobDescriptionCol = st.columns(2) + with resumeCol: + with st.expander("Extracted Keywords"): + st.write("Now let's take a look at the extracted keywords from the resume.") + annotated_text(create_annotated_text( + selected_file["clean_data"], selected_file["extracted_keywords"], + "KW", "#0B666A")) + with jobDescriptionCol: + with st.expander("Extracted Keywords"): + st.write("Now let's take a look at the extracted keywords from the job description.") + annotated_text(create_annotated_text( + selected_jd["clean_data"], selected_jd["extracted_keywords"], + "KW", "#0B666A")) + + with st.container(): + resumeCol, jobDescriptionCol = st.columns(2) + with resumeCol: + with st.expander("Extracted Entities"): + st.write("Now let's take a look at the extracted entities from the resume.") + + # Call the function with your data + create_star_graph(selected_file['keyterms'], "Entities from Resume") + with jobDescriptionCol: + with st.expander("Extracted Entities"): + st.write("Now let's take a look at the extracted entities from the job description.") + + # Call the function with your data + create_star_graph(selected_jd['keyterms'], "Entities from Job Description") + + with st.container(): + resumeCol, jobDescriptionCol = st.columns(2) + with resumeCol: + with st.expander("Keywords & Values"): + df1 = pd.DataFrame(selected_file['keyterms'], columns=["keyword", "value"]) + + # Create the dictionary + keyword_dict = {} + for keyword, value in selected_file['keyterms']: + keyword_dict[keyword] = value * 100 + + fig = go.Figure(data=[go.Table(header=dict(values=["Keyword", "Value"], + font=dict(size=12, color="white"), + fill_color='#1d2078'), + cells=dict(values=[list(keyword_dict.keys()), + list(keyword_dict.values())], + line_color='darkslategray', + fill_color='#6DA9E4')) + ]) + st.plotly_chart(fig, use_container_width=True) + with jobDescriptionCol: + with st.expander("Keywords & Values"): + df2 = pd.DataFrame(selected_jd['keyterms'], columns=["keyword", "value"]) + + # Create the dictionary + keyword_dict = {} + for keyword, value in selected_jd['keyterms']: + keyword_dict[keyword] = value * 100 + + fig = go.Figure(data=[go.Table(header=dict(values=["Keyword", "Value"], + font=dict(size=12, color="white"), + fill_color='#1d2078'), + cells=dict(values=[list(keyword_dict.keys()), + list(keyword_dict.values())], + line_color='darkslategray', + fill_color='#6DA9E4')) + ]) + st.plotly_chart(fig, use_container_width=True) + + with st.container(): + resumeCol, jobDescriptionCol = st.columns(2) + with resumeCol: + with st.expander("Key Topics"): + fig = px.treemap(df1, path=['keyword'], values='value', + color_continuous_scale='Rainbow', + title='Key Terms/Topics Extracted from your Resume') + st.plotly_chart(fig, use_container_width=True) + + with jobDescriptionCol: + with st.expander("Key Topics"): + fig = px.treemap(df2, path=['keyword'], values='value', + color_continuous_scale='Rainbow', + title='Key Terms/Topics Extracted from Job Description') + st.plotly_chart(fig, use_container_width=True) + + avs.add_vertical_space(2) + config_file_path = config_path + "/config.yml" + if os.path.exists(config_file_path): + config_data = read_config(config_file_path) + if config_data: + print("Config file parsed successfully:") + resume_string = ' '.join(selected_file["extracted_keywords"]) + jd_string = ' '.join(selected_jd["extracted_keywords"]) + result = get_similarity_score(resume_string, jd_string) + similarity_score = round(result[0]["score"] * 100, 2) + + # Default color to green + score_color = "green" + if similarity_score < 60: + score_color = "red" + elif 60 <= similarity_score < 75: + score_color = "orange" + + st.markdown(f'Similarity Score obtained for the resume and job description is ' + f'{similarity_score}', + unsafe_allow_html=True) + else: + print("Config file does not exist.") + + avs.add_vertical_space(2) + with st.expander("Common words between Resume and Job Description:"): + annotated_text(create_annotated_text( + selected_file["clean_data"], selected_jd["extracted_keywords"], + "JD", "#F24C3D")) + +st.divider() + +# Go back to top +st.markdown('[:arrow_up: Back to Top](#resume-matcher)')