Skip to content

Commit 5d8caa8

Browse files
authored
App.py and Requirements
0 parents  commit 5d8caa8

File tree

2 files changed

+107
-0
lines changed

2 files changed

+107
-0
lines changed

app.py

+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import streamlit as st
2+
import pandas as pd
3+
from functools import reduce
4+
from collections import Counter
5+
from io import BytesIO
6+
7+
st.header('Spreadsheet Query UI')
8+
9+
# Initialize the session state
10+
if 'uploaded_files' not in st.session_state:
11+
st.session_state['uploaded_files'] = {}
12+
13+
# Use st.expander for file management section
14+
with st.expander('File Management'):
15+
uploaded_file = st.file_uploader("Choose an Excel file", type=['xls', 'xlsx', 'xlsm', 'xlsb'])
16+
17+
# Load data from uploaded Excel files into a dictionary of DataFrames
18+
if uploaded_file is not None:
19+
xls = pd.ExcelFile(BytesIO(uploaded_file.read()))
20+
filepaths = {uploaded_file.name: xls}
21+
st.session_state['uploaded_files'].update(filepaths)
22+
23+
# Allow user to remove files
24+
if st.session_state['uploaded_files']:
25+
remove_file = st.selectbox('Select a file to remove', list(st.session_state['uploaded_files']))
26+
if st.button('Remove File'):
27+
st.session_state['uploaded_files'].pop(remove_file)
28+
29+
data = {}
30+
original_column_names = {}
31+
32+
for path, xls in st.session_state['uploaded_files'].items():
33+
for sheet_name in xls.sheet_names:
34+
df = xls.parse(sheet_name, dtype=str) # Ensure all data is loaded as strings
35+
36+
# Replace spaces with underscores and remove special characters
37+
original_column_names.update({col.lower().replace(' ', '_').replace(r'\W', ''): col for col in df.columns})
38+
df.columns = df.columns.str.replace(' ', '_')
39+
df.columns = df.columns.str.replace(r'\W', '')
40+
df.columns = df.columns.str.lower()
41+
42+
data[f'{path}_{sheet_name}'] = df
43+
44+
# Gather all column names across all DataFrames
45+
column_names = [col for df in data.values() for col in df.columns]
46+
47+
# Count the frequency of each column name
48+
counter = Counter(column_names)
49+
common_columns = [name for name, count in counter.items() if count > 2]
50+
rare_columns = [name for name, count in counter.items() if count == 2]
51+
unique_columns = [name for name, count in counter.items() if count == 1]
52+
53+
st.subheader('Pick Terms to Search For')
54+
55+
# Generate column checkboxes under their respective subheaders
56+
st.markdown('### Common Fields')
57+
selected_common_columns = [column for column in common_columns if st.checkbox(original_column_names[column], key=f'common_{column}')]
58+
59+
with st.expander("Rare Fields"):
60+
selected_rare_columns = [column for column in rare_columns if st.checkbox(original_column_names[column], key=f'rare_{column}')]
61+
62+
with st.expander("Unique Fields"):
63+
selected_unique_columns = [column for column in unique_columns if st.checkbox(original_column_names[column], key=f'unique_{column}')]
64+
65+
# Combine all selected columns
66+
selected_columns = selected_common_columns + selected_rare_columns + selected_unique_columns
67+
68+
# Generate dropdowns for selected columns
69+
selected_values = {}
70+
for column in selected_columns:
71+
options = list(set([str(val) for df in data.values() if column in df.columns for val in df[column].dropna().unique()]))
72+
selected_values[column] = st.selectbox(f"{original_column_names[column]} values", options, key=f'selectbox_{column}')
73+
74+
# Select logical operator
75+
logical_operator = st.selectbox("Logical Operator", options=["And", "Or"], index=0)
76+
view_all_columns = st.checkbox('View all columns in results')
77+
78+
if st.button("🔍 Search 🔎"):
79+
results = []
80+
# Iterate through all DataFrames and filter rows
81+
for name, df in data.items():
82+
if logical_operator == "Or":
83+
conditions = [df[column] == selected_values[column] for column in selected_values if column in df.columns]
84+
if conditions: # if there is at least one condition
85+
matched_df = df[reduce(lambda a, b: a | b, conditions)]
86+
if not matched_df.empty:
87+
results.append({'name': name, 'data': matched_df})
88+
else: # "And" operator
89+
conditions = [df[column] == selected_values[column] for column in selected_values if column in df.columns]
90+
if conditions: # if there is at least one condition
91+
matched_df = df[reduce(lambda a, b: a & b, conditions)]
92+
if not matched_df.empty:
93+
results.append({'name': name, 'data': matched_df})
94+
95+
# If there are any results, display them
96+
if results:
97+
st.subheader('Global Search Results')
98+
for result in results:
99+
st.markdown(f'### {result["name"]}')
100+
# Display all columns if the checkbox is checked, else display the first three columns
101+
st.dataframe(result['data'] if view_all_columns else result['data'].iloc[:, :3])
102+
else:
103+
st.write('No results found')
104+

requirements.txt

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
streamlit~=1.24.1
2+
pandas~=2.0.3
3+
openpyxl~=3.1.2

0 commit comments

Comments
 (0)