|
| 1 | +import streamlit as st |
| 2 | +import pandas as pd |
| 3 | +from functools import reduce |
| 4 | +from collections import Counter |
| 5 | +from io import BytesIO |
| 6 | + |
| 7 | +st.header('Spreadsheet Query UI') |
| 8 | + |
| 9 | +# Initialize the session state |
| 10 | +if 'uploaded_files' not in st.session_state: |
| 11 | + st.session_state['uploaded_files'] = {} |
| 12 | + |
| 13 | +# Use st.expander for file management section |
| 14 | +with st.expander('File Management'): |
| 15 | + uploaded_file = st.file_uploader("Choose an Excel file", type=['xls', 'xlsx', 'xlsm', 'xlsb']) |
| 16 | + |
| 17 | + # Load data from uploaded Excel files into a dictionary of DataFrames |
| 18 | + if uploaded_file is not None: |
| 19 | + xls = pd.ExcelFile(BytesIO(uploaded_file.read())) |
| 20 | + filepaths = {uploaded_file.name: xls} |
| 21 | + st.session_state['uploaded_files'].update(filepaths) |
| 22 | + |
| 23 | + # Allow user to remove files |
| 24 | + if st.session_state['uploaded_files']: |
| 25 | + remove_file = st.selectbox('Select a file to remove', list(st.session_state['uploaded_files'])) |
| 26 | + if st.button('Remove File'): |
| 27 | + st.session_state['uploaded_files'].pop(remove_file) |
| 28 | + |
| 29 | +data = {} |
| 30 | +original_column_names = {} |
| 31 | + |
| 32 | +for path, xls in st.session_state['uploaded_files'].items(): |
| 33 | + for sheet_name in xls.sheet_names: |
| 34 | + df = xls.parse(sheet_name, dtype=str) # Ensure all data is loaded as strings |
| 35 | + |
| 36 | + # Replace spaces with underscores and remove special characters |
| 37 | + original_column_names.update({col.lower().replace(' ', '_').replace(r'\W', ''): col for col in df.columns}) |
| 38 | + df.columns = df.columns.str.replace(' ', '_') |
| 39 | + df.columns = df.columns.str.replace(r'\W', '') |
| 40 | + df.columns = df.columns.str.lower() |
| 41 | + |
| 42 | + data[f'{path}_{sheet_name}'] = df |
| 43 | + |
| 44 | +# Gather all column names across all DataFrames |
| 45 | +column_names = [col for df in data.values() for col in df.columns] |
| 46 | + |
| 47 | +# Count the frequency of each column name |
| 48 | +counter = Counter(column_names) |
| 49 | +common_columns = [name for name, count in counter.items() if count > 2] |
| 50 | +rare_columns = [name for name, count in counter.items() if count == 2] |
| 51 | +unique_columns = [name for name, count in counter.items() if count == 1] |
| 52 | + |
| 53 | +st.subheader('Pick Terms to Search For') |
| 54 | + |
| 55 | +# Generate column checkboxes under their respective subheaders |
| 56 | +st.markdown('### Common Fields') |
| 57 | +selected_common_columns = [column for column in common_columns if st.checkbox(original_column_names[column], key=f'common_{column}')] |
| 58 | + |
| 59 | +with st.expander("Rare Fields"): |
| 60 | + selected_rare_columns = [column for column in rare_columns if st.checkbox(original_column_names[column], key=f'rare_{column}')] |
| 61 | + |
| 62 | +with st.expander("Unique Fields"): |
| 63 | + selected_unique_columns = [column for column in unique_columns if st.checkbox(original_column_names[column], key=f'unique_{column}')] |
| 64 | + |
| 65 | +# Combine all selected columns |
| 66 | +selected_columns = selected_common_columns + selected_rare_columns + selected_unique_columns |
| 67 | + |
| 68 | +# Generate dropdowns for selected columns |
| 69 | +selected_values = {} |
| 70 | +for column in selected_columns: |
| 71 | + options = list(set([str(val) for df in data.values() if column in df.columns for val in df[column].dropna().unique()])) |
| 72 | + selected_values[column] = st.selectbox(f"{original_column_names[column]} values", options, key=f'selectbox_{column}') |
| 73 | + |
| 74 | +# Select logical operator |
| 75 | +logical_operator = st.selectbox("Logical Operator", options=["And", "Or"], index=0) |
| 76 | +view_all_columns = st.checkbox('View all columns in results') |
| 77 | + |
| 78 | +if st.button("🔍 Search 🔎"): |
| 79 | + results = [] |
| 80 | + # Iterate through all DataFrames and filter rows |
| 81 | + for name, df in data.items(): |
| 82 | + if logical_operator == "Or": |
| 83 | + conditions = [df[column] == selected_values[column] for column in selected_values if column in df.columns] |
| 84 | + if conditions: # if there is at least one condition |
| 85 | + matched_df = df[reduce(lambda a, b: a | b, conditions)] |
| 86 | + if not matched_df.empty: |
| 87 | + results.append({'name': name, 'data': matched_df}) |
| 88 | + else: # "And" operator |
| 89 | + conditions = [df[column] == selected_values[column] for column in selected_values if column in df.columns] |
| 90 | + if conditions: # if there is at least one condition |
| 91 | + matched_df = df[reduce(lambda a, b: a & b, conditions)] |
| 92 | + if not matched_df.empty: |
| 93 | + results.append({'name': name, 'data': matched_df}) |
| 94 | + |
| 95 | + # If there are any results, display them |
| 96 | + if results: |
| 97 | + st.subheader('Global Search Results') |
| 98 | + for result in results: |
| 99 | + st.markdown(f'### {result["name"]}') |
| 100 | + # Display all columns if the checkbox is checked, else display the first three columns |
| 101 | + st.dataframe(result['data'] if view_all_columns else result['data'].iloc[:, :3]) |
| 102 | + else: |
| 103 | + st.write('No results found') |
| 104 | + |
0 commit comments