From e359e0adaf44d1f582e75d933db252c3692b0598 Mon Sep 17 00:00:00 2001 From: Abhishek Mathur <37048469+imhalcyon@users.noreply.github.com> Date: Sun, 22 Oct 2023 15:17:43 +0530 Subject: [PATCH] Feature: Streamlit Interactive v1.0 (#184) * Initial commit * Docs: ChatGPTd for comments and docstrings * Fix: Type warning for title-font in create_star_graph() * Format: Re-formatted as per PEP 8 * UI: Move intro to sidebar * UI: Remove footer links, add Back to Top link * UI: Add columns for resume and job description upload * Fix: header_image extension updated in script * Update: streamlit upgraded to 1.27.0 & streamlit-extras upgrade to 0.3.2+htbuilder upgraded to 0.6.2 * Misc: Formatting * UI: Add components for each column and add cleanup of processed files * UI: Add Favicon file * Fix: Updated syntax for string comparison * Cleanup: Delete unnecessary files * Fix: Remove dependency on run_first. Add dir delete function. Include icon in st.toast implementation. * Ignore: Add /Data/Processed/* in gitignore * UI: Add containers to fix columns together for each row * Fix: Reference issue for resume key topics. Also add wide layout as default. * Revert "Cleanup: Delete unnecessary files" This reverts commit cf3f1c73d8fa91f48b50eef2f669e68e9cbe80fa. --------- Co-authored-by: imhalcyon --- .gitignore | 3 + Assets/img/favicon.ico | Bin 0 -> 15406 bytes streamlit_interactive.py | 414 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 417 insertions(+) create mode 100644 Assets/img/favicon.ico create mode 100644 streamlit_interactive.py diff --git a/.gitignore b/.gitignore index c31ce338..eb392320 100644 --- a/.gitignore +++ b/.gitignore @@ -141,4 +141,7 @@ scripts/similarity/config.yml # Personal Data / Secrets *.local.yml + +# Processed or local files +/Data/Processed/* *.local.pdf diff --git a/Assets/img/favicon.ico b/Assets/img/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..bc066646ca8e1dcc232d5957460c312b5a488d8a GIT binary patch literal 15406 zcmeHOe^3)w9$!Qbk*avrwy5V&?Ny-VM|QJGP)_jIh;4614`LNjp+FMaKZ{sFT7>W;5ER6c&=8XEy_c{t2^(Ou z8z>!i<})w*^1l21yzhJa-ur&vTY?B60*SCN0;LgzYA!)Y2!e=+aNI{OCWvDwFFtO5)ectox~pA>N}?Wfk- zv?st7XLd)z`*}1}D8+yGCSOocFlRWGy45EM1vXqrT@B?*@@+Qx>gtr>?>gvG+lj?s z)&{!4^yeVxlZHaWw&kEvkcZji@9QMn!5{)#T?mYvn==Ljd*&heWl*6Iv&x(GqFv+3 z-_M#rzLQGlBX8;?_e@5Ti<*^w2TLbfZORw2fp6)2(%_YJRw>(E`_>Z ztbz|zV!N#>QF?fCv3^H`G1z*uBQYLvSn{Y{wo2)1e^mcq0e~4+cYD;(Rwd)nq*7 zlm4r)h=*pPS1A-8j@_MBCfo=-yZn7wg~Gt$W=*&U4g{`3(LB z*Fa+bNB#=)Jg%v+T~_R-pW!iy zbWleYaMkBozV(VEdZdRO;L5?$P4(USGG>M6^L)8)xh9nwJLuD9J=Hp_C!Mf#leb+l z+)e6G8J$r_pK7lkKZfQDk69W2S^&pMuyd4&FdHX9Mwq?6mlh~eP>YI{Noi#YX{90SA6z8Wy0Gfsw7!qeoF9ERkTQvw4_WaE2>n{ zCSPJmo-*1zX>Zvt*B>m{pEPeO>QMS(((e&>vnLNe_u4$V=iOZCHh=T~bw);Ld0v8n z4_7u>WDlEKK_4nidBU&#=xBbb=)e1(ww+FogwtrRi8IoppgJqo84q-=3yR-X7Rjka zerkh&hJDXU`sMU_{klcavvwi$B=`U%*OeFs_0O(=50#FXCR2y9T=L|R?C2l+gMY)G zjnT%P()y9@G0u1r6Ib#zZQk-IR4GVj3>%G4AEDSN-VPP|3NWN6mD$oLn~?nP$QHo) zwAJvzXdGUXBl@>L_!qtM$`Z>?i4prTLh{G$c5`bue59c4Z6Gv%tyUh`aB1_(Z$91j z68r)AO|Pv2Yr#tRHa~dscqteL>we~8*3`9T}_gTLV`ibVW7{lXvjp)LOBZ=L1OI!@W&VSn4dnIfmd z{z8qPy6a*Z#hbcjYX8?YP?DLf?s)K_848*lxlQBofZGyZ$$kNsdaUX#u9#vjKI zH7fdW#Q|A}5dGg<|H1sN=YQmL9lY_cP>JuBC?wkv_W*zM2XuBF@3~Lb=C0*f`R9nN zCE3)$!hB(3x8u#fOdQ484)_c>2UtEvag>yWaBppx9c`PJb!+!YvxKla({5d!>wo*X z2YlY#aqht_MQwqi$LSdV1@F_>FJb4; zR;TguPAvk+MDZlYnF{N-JIKFWQcyvz`uYAIV}H!QJrdy30{(Sy;creA-)`R_k@4m0 zDc^OI7Q6Xhm_Pnz0e^FVzc(1qz7ze2_**mMtVX%?x^8#UE bool: + try: + for file in os.scandir(filepath): + os.remove(file.path) + + return True + except OSError as error: + print(f"Exception: {error}") + return False + + +# Function to create a star-shaped graph visualization +def create_star_graph(nodes_and_weights, title): + """ + Create a star-shaped graph visualization. + + Args: + nodes_and_weights (list): List of tuples containing nodes and their weights. + title (str): Title for the graph. + + Returns: + None + """ + # Create an empty graph + graph = nx.Graph() + + # Add the central node + central_node = "resume" + graph.add_node(central_node) + + # Add nodes and edges with weights to the graph + for node, weight in nodes_and_weights: + graph.add_node(node) + graph.add_edge(central_node, node, weight=weight * 100) + + # Get position layout for nodes + pos = nx.spring_layout(graph) + + # Create edge trace + edge_x = [] + edge_y = [] + for edge in graph.edges(): + x0, y0 = pos[edge[0]] + x1, y1 = pos[edge[1]] + edge_x.extend([x0, x1, None]) + edge_y.extend([y0, y1, None]) + + edge_trace = go.Scatter(x=edge_x, y=edge_y, line=dict( + width=0.5, color='#888'), hoverinfo='none', mode='lines') + + # Create node trace + node_x = [] + node_y = [] + for node in graph.nodes(): + x, y = pos[node] + node_x.append(x) + node_y.append(y) + + node_trace = go.Scatter(x=node_x, y=node_y, mode='markers', hoverinfo='text', + marker=dict(showscale=True, colorscale='Rainbow', reversescale=True, color=[], size=10, + colorbar=dict(thickness=15, title='Node Connections', xanchor='left', + titleside='right'), line_width=2)) + + # Color node points by number of connections + node_adjacencies = [] + node_text = [] + for node in graph.nodes(): + adjacencies = list(graph.adj[node]) # Changes here + node_adjacencies.append(len(adjacencies)) + node_text.append(f'{node}
# of connections: {len(adjacencies)}') + + node_trace.marker.color = node_adjacencies + node_trace.text = node_text + + # Create the figure + figure = go.Figure(data=[edge_trace, node_trace], + layout=go.Layout(title=title, titlefont=dict(size=16), showlegend=False, + hovermode='closest', margin=dict(b=20, l=5, r=5, t=40), + xaxis=dict( + showgrid=False, zeroline=False, showticklabels=False), + yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))) + + # Show the figure + st.plotly_chart(figure, use_container_width=True) + + +# Function to create annotated text with highlighting +def create_annotated_text(input_string: str, word_list: List[str], annotation: str, color_code: str): + """ + Create annotated text with highlighted keywords. + + Args: + input_string (str): The input text. + word_list (List[str]): List of keywords to be highlighted. + annotation (str): Annotation label for highlighted keywords. + color_code (str): Color code for highlighting. + + Returns: + List: Annotated text with highlighted keywords. + """ + # Tokenize the input string + tokens = nltk.word_tokenize(input_string) + + # Convert the list to a set for quick lookups + word_set = set(word_list) + + # Initialize an empty list to hold the annotated text + ret_annotated_text = [] + + for token in tokens: + # Check if the token is in the set + if token in word_set: + # If it is, append a tuple with the token, annotation, and color code + ret_annotated_text.append((token, annotation, color_code)) + else: + # If it's not, just append the token as a string + ret_annotated_text.append(token) + + return ret_annotated_text + + +# Function to read JSON data from a file +def read_json(filename): + """ + Read JSON data from a file. + + Args: + filename (str): The path to the JSON file. + + Returns: + dict: The JSON data. + """ + with open(filename) as f: + data = json.load(f) + return data + + +# Function to tokenize a string +def tokenize_string(input_string): + """ + Tokenize a string into words. + + Args: + input_string (str): The input string. + + Returns: + List[str]: List of tokens. + """ + tokens = nltk.word_tokenize(input_string) + return tokens + + +# Cleanup processed resume / job descriptions +delete_from_dir(os.path.join(cwd, "Data", "Processed", "Resumes")) +delete_from_dir(os.path.join(cwd, "Data", "Processed", "JobDescription")) + +# Set default session states for first run +if "resumeUploaded" not in st.session_state.keys(): + update_session_state("resumeUploaded", "Pending") + update_session_state("resumePath", "") +if "jobDescriptionUploaded" not in st.session_state.keys(): + update_session_state("jobDescriptionUploaded", "Pending") + update_session_state("jobDescriptionPath", "") + +# Display the main title and sub-headers +st.title(':blue[Resume Matcher]') +with st.sidebar: + st.image('Assets/img/header_image.png') + st.subheader('Free and Open Source ATS to help your resume pass the screening stage.') + st.markdown('Check the website [www.resumematcher.fyi](https://www.resumematcher.fyi/)') + st.markdown('Give Resume Matcher a ⭐ on [GitHub](https://github.com/srbhr/resume-matcher)') + badge(type="github", name="srbhr/Resume-Matcher") + st.markdown('For updates follow me on Twitter.') + badge(type="twitter", name="_srbhr_") + st.markdown('If you like the project and would like to further help in development please consider 👇') + badge(type="buymeacoffee", name="srbhr") + +st.divider() +avs.add_vertical_space(1) + +with st.container(): + resumeCol, jobDescriptionCol = st.columns(2) + with resumeCol: + uploaded_Resume = st.file_uploader("Choose a Resume", type="pdf") + if uploaded_Resume is not None: + if st.session_state["resumeUploaded"] == "Pending": + save_path_resume = os.path.join(cwd, "Data", "Resumes", uploaded_Resume.name) + + with open(save_path_resume, mode='wb') as w: + w.write(uploaded_Resume.getvalue()) + + if os.path.exists(save_path_resume): + st.toast(f'File {uploaded_Resume.name} is successfully saved!', icon="✔️") + update_session_state("resumeUploaded", "Uploaded") + update_session_state("resumePath", save_path_resume) + else: + update_session_state("resumeUploaded", "Pending") + update_session_state("resumePath", "") + + with jobDescriptionCol: + uploaded_JobDescription = st.file_uploader("Choose a Job Description", type="pdf") + if uploaded_JobDescription is not None: + if st.session_state["jobDescriptionUploaded"] == "Pending": + save_path_jobDescription = os.path.join(cwd, "Data", "JobDescription", uploaded_JobDescription.name) + + with open(save_path_jobDescription, mode='wb') as w: + w.write(uploaded_JobDescription.getvalue()) + + if os.path.exists(save_path_jobDescription): + st.toast(f'File {uploaded_JobDescription.name} is successfully saved!', icon="✔️") + update_session_state("jobDescriptionUploaded", "Uploaded") + update_session_state("jobDescriptionPath", save_path_jobDescription) + else: + update_session_state("jobDescriptionUploaded", "Pending") + update_session_state("jobDescriptionPath", "") + +with st.spinner('Please wait...'): + if (uploaded_Resume is not None and + st.session_state["jobDescriptionUploaded"] == "Uploaded" and + uploaded_JobDescription is not None and + st.session_state["jobDescriptionUploaded"] == "Uploaded"): + + resumeProcessor = ParseResume(read_single_pdf(st.session_state["resumePath"])) + jobDescriptionProcessor = ParseJobDesc(read_single_pdf(st.session_state["jobDescriptionPath"])) + + # Resume / JD output + selected_file = resumeProcessor.get_JSON() + selected_jd = jobDescriptionProcessor.get_JSON() + + # Add containers for each row to avoid overlap + with st.container(): + resumeCol, jobDescriptionCol = st.columns(2) + with resumeCol: + with st.expander("Parsed Resume Data"): + st.caption( + "This text is parsed from your resume. This is how it'll look like after getting parsed by an " + "ATS.") + st.caption("Utilize this to understand how to make your resume ATS friendly.") + avs.add_vertical_space(3) + st.write(selected_file["clean_data"]) + + with jobDescriptionCol: + with st.expander("Parsed Job Description"): + st.caption( + "Currently in the pipeline I'm parsing this from PDF but it'll be from txt or copy paste.") + avs.add_vertical_space(3) + st.write(selected_jd["clean_data"]) + + with st.container(): + resumeCol, jobDescriptionCol = st.columns(2) + with resumeCol: + with st.expander("Extracted Keywords"): + st.write("Now let's take a look at the extracted keywords from the resume.") + annotated_text(create_annotated_text( + selected_file["clean_data"], selected_file["extracted_keywords"], + "KW", "#0B666A")) + with jobDescriptionCol: + with st.expander("Extracted Keywords"): + st.write("Now let's take a look at the extracted keywords from the job description.") + annotated_text(create_annotated_text( + selected_jd["clean_data"], selected_jd["extracted_keywords"], + "KW", "#0B666A")) + + with st.container(): + resumeCol, jobDescriptionCol = st.columns(2) + with resumeCol: + with st.expander("Extracted Entities"): + st.write("Now let's take a look at the extracted entities from the resume.") + + # Call the function with your data + create_star_graph(selected_file['keyterms'], "Entities from Resume") + with jobDescriptionCol: + with st.expander("Extracted Entities"): + st.write("Now let's take a look at the extracted entities from the job description.") + + # Call the function with your data + create_star_graph(selected_jd['keyterms'], "Entities from Job Description") + + with st.container(): + resumeCol, jobDescriptionCol = st.columns(2) + with resumeCol: + with st.expander("Keywords & Values"): + df1 = pd.DataFrame(selected_file['keyterms'], columns=["keyword", "value"]) + + # Create the dictionary + keyword_dict = {} + for keyword, value in selected_file['keyterms']: + keyword_dict[keyword] = value * 100 + + fig = go.Figure(data=[go.Table(header=dict(values=["Keyword", "Value"], + font=dict(size=12, color="white"), + fill_color='#1d2078'), + cells=dict(values=[list(keyword_dict.keys()), + list(keyword_dict.values())], + line_color='darkslategray', + fill_color='#6DA9E4')) + ]) + st.plotly_chart(fig, use_container_width=True) + with jobDescriptionCol: + with st.expander("Keywords & Values"): + df2 = pd.DataFrame(selected_jd['keyterms'], columns=["keyword", "value"]) + + # Create the dictionary + keyword_dict = {} + for keyword, value in selected_jd['keyterms']: + keyword_dict[keyword] = value * 100 + + fig = go.Figure(data=[go.Table(header=dict(values=["Keyword", "Value"], + font=dict(size=12, color="white"), + fill_color='#1d2078'), + cells=dict(values=[list(keyword_dict.keys()), + list(keyword_dict.values())], + line_color='darkslategray', + fill_color='#6DA9E4')) + ]) + st.plotly_chart(fig, use_container_width=True) + + with st.container(): + resumeCol, jobDescriptionCol = st.columns(2) + with resumeCol: + with st.expander("Key Topics"): + fig = px.treemap(df1, path=['keyword'], values='value', + color_continuous_scale='Rainbow', + title='Key Terms/Topics Extracted from your Resume') + st.plotly_chart(fig, use_container_width=True) + + with jobDescriptionCol: + with st.expander("Key Topics"): + fig = px.treemap(df2, path=['keyword'], values='value', + color_continuous_scale='Rainbow', + title='Key Terms/Topics Extracted from Job Description') + st.plotly_chart(fig, use_container_width=True) + + avs.add_vertical_space(2) + config_file_path = config_path + "/config.yml" + if os.path.exists(config_file_path): + config_data = read_config(config_file_path) + if config_data: + print("Config file parsed successfully:") + resume_string = ' '.join(selected_file["extracted_keywords"]) + jd_string = ' '.join(selected_jd["extracted_keywords"]) + result = get_similarity_score(resume_string, jd_string) + similarity_score = round(result[0]["score"] * 100, 2) + + # Default color to green + score_color = "green" + if similarity_score < 60: + score_color = "red" + elif 60 <= similarity_score < 75: + score_color = "orange" + + st.markdown(f'Similarity Score obtained for the resume and job description is ' + f'{similarity_score}', + unsafe_allow_html=True) + else: + print("Config file does not exist.") + + avs.add_vertical_space(2) + with st.expander("Common words between Resume and Job Description:"): + annotated_text(create_annotated_text( + selected_file["clean_data"], selected_jd["extracted_keywords"], + "JD", "#F24C3D")) + +st.divider() + +# Go back to top +st.markdown('[:arrow_up: Back to Top](#resume-matcher)')