Skip to content

Commit 698cf2e

Browse files
enryHsayalaruano
andauthored
Use multiple sheets from ab excel file in reports (#115)
* 🎨 table_utils module, textwrapping and naming vars - clean-up and understanding the workflow * 📝 where should the caption come from? * ✨🚧 first draft to use multiple sheets from excel file in quarto reports * 🚧 add streamlit support using selection dropdown menu * 📝 To discuss * Merge branch 'main' into xlsx_with_multiple_sheets 🐛 quarto report is broken atm, relative paths do not work * 🐛 using the dataframe while qmd generation, need to redefine path - loading sheets on report generation makes it necessary have the path w.r.t to the folder on report generation - qmd notebook is moved to quart_report folder in outfolder, where the path is needed To check if this works fine with output folder definitions... * ✅ add excel table with two sheets - seems to be not working in docx properly (sheet is not changed) * ✅ add a xlsx example * 🐛 Fix(quarto_reportview.py): avoid showing suplicated sheets and remove Ipython prompts in static reports --------- Co-authored-by: sayalaruano <[email protected]>
1 parent 7c41955 commit 698cf2e

File tree

6 files changed

+162
-51
lines changed

6 files changed

+162
-51
lines changed

src/vuegen/config_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def _create_component_config_fromfile(self, file_path: Path) -> Dict[str, str]:
7474
file_path.resolve().as_posix()
7575
) # ! needs to be posix for all OS support
7676
component_config["description"] = ""
77-
component_config["caption"] = ""
77+
component_config["caption"] = "" # ? It is not populated here
7878

7979
# Infer component config
8080
if file_ext in [

src/vuegen/quarto_reportview.py

Lines changed: 94 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
import os
22
import subprocess
33
import sys
4+
import textwrap
45
from pathlib import Path
5-
from typing import List
6+
from typing import List, Optional
67

78
import networkx as nx
8-
import pandas as pd
99

1010
from . import report as r
11+
from . import table_utils
1112
from .utils import create_folder, get_relative_file_path, is_url, sort_imports
1213

1314

@@ -271,10 +272,6 @@ def run_report(self, output_dir: str = BASE_DIR) -> None:
271272
[self.quarto_path, "install", "tinytex", "--no-prompt"],
272273
check=True,
273274
)
274-
subprocess.run(
275-
[self.quarto_path, "install", "chromium", "--no-prompt"],
276-
check=True,
277-
)
278275
try:
279276
subprocess.run(
280277
args,
@@ -712,19 +709,16 @@ def _generate_dataframe_content(self, dataframe) -> List[str]:
712709

713710
# Append header for DataFrame loading
714711
dataframe_content.append(
715-
f"""```{{python}}
716-
#| label: '{dataframe.title} {dataframe.id}'
717-
#| fig-cap: ""
718-
"""
712+
textwrap.dedent(
713+
f"""\
714+
```{{python}}
715+
#| label: '{dataframe.title} {dataframe.id}'
716+
#| fig-cap: ""
717+
"""
718+
)
719719
)
720720
# Mapping of file extensions to read functions
721-
read_function_mapping = {
722-
r.DataFrameFormat.CSV.value_with_dot: pd.read_csv,
723-
r.DataFrameFormat.PARQUET.value_with_dot: pd.read_parquet,
724-
r.DataFrameFormat.TXT.value_with_dot: pd.read_table,
725-
r.DataFrameFormat.XLS.value_with_dot: pd.read_excel,
726-
r.DataFrameFormat.XLSX.value_with_dot: pd.read_excel,
727-
}
721+
read_function_mapping = table_utils.read_function_mapping
728722
try:
729723
# Check if the file extension matches any DataFrameFormat value
730724
file_extension = Path(dataframe.file_path).suffix.lower()
@@ -740,24 +734,68 @@ def _generate_dataframe_content(self, dataframe) -> List[str]:
740734
df_file_path = dataframe.file_path
741735
else:
742736
df_file_path = get_relative_file_path(
743-
dataframe.file_path, base_path=".."
737+
dataframe.file_path,
744738
)
739+
sheet_names = None
740+
# If the file is an Excel file, get the sheet names
741+
if file_extension in [
742+
r.DataFrameFormat.XLS.value_with_dot,
743+
r.DataFrameFormat.XLSX.value_with_dot,
744+
]:
745+
sheet_names = table_utils.get_sheet_names(df_file_path)
746+
if len(sheet_names) > 1:
747+
# If there are multiple sheets, use the first one
748+
self.report.logger.info(
749+
f"Multiple sheets found in the Excel file: {df_file_path}. "
750+
f"Sheets: {sheet_names}"
751+
)
752+
else:
753+
sheet_names = None
745754

755+
# Build the file path (URL or local file)
756+
if is_url(dataframe.file_path):
757+
df_file_path = dataframe.file_path
758+
else:
759+
df_file_path = get_relative_file_path(
760+
dataframe.file_path, base_path=".."
761+
)
746762
# Load the DataFrame using the correct function
747763
read_function = read_function_mapping[file_extension]
748764
dataframe_content.append(
749765
f"""df = pd.{read_function.__name__}('{df_file_path.as_posix()}')\n"""
750766
)
751-
752767
# Display the dataframe
753768
dataframe_content.extend(self._show_dataframe(dataframe))
754769

770+
# Add further sheets
771+
if sheet_names:
772+
for sheet_name in sheet_names[1:]:
773+
dataframe_content.append(f"#### {sheet_name}")
774+
dataframe_content.append(
775+
textwrap.dedent(
776+
f"""\
777+
```{{python}}
778+
#| label: '{dataframe.title} {dataframe.id} {sheet_name}'
779+
#| fig-cap: ""
780+
"""
781+
)
782+
)
783+
dataframe_content.append(
784+
f"df = pd.{read_function.__name__}('{df_file_path.as_posix()}', "
785+
f"sheet_name='{sheet_name}')\n"
786+
)
787+
# Display the dataframe
788+
dataframe_content.extend(
789+
self._show_dataframe(dataframe, suffix=sheet_name)
790+
)
791+
755792
except Exception as e:
756793
self.report.logger.error(
757794
f"Error generating content for DataFrame: {dataframe.title}. Error: {str(e)}"
758795
)
759796
raise
760797
# Add caption if available
798+
# ? Where should this come from?
761799
if dataframe.caption:
762800
dataframe_content.append(f">{dataframe.caption}\n")
763801

@@ -787,18 +825,24 @@ def _generate_markdown_content(self, markdown) -> List[str]:
787825
try:
788826
# Initialize md code with common structure
789827
markdown_content.append(
790-
f"""
791-
```{{python}}
792-
#| label: '{markdown.title} {markdown.id}'
793-
#| fig-cap: ""\n"""
828+
textwrap.dedent(
829+
f"""
830+
```{{python}}
831+
#| label: '{markdown.title} {markdown.id}'
832+
#| fig-cap: ""
833+
"""
834+
)
794835
)
795836
# If the file path is a URL, generate code to fetch content via requests
796837
if is_url(markdown.file_path):
797838
markdown_content.append(
798-
f"""
799-
response = requests.get('{markdown.file_path}')
800-
response.raise_for_status()
801-
markdown_content = response.text\n"""
839+
textwrap.dedent(
840+
f"""\
841+
response = requests.get('{markdown.file_path}')
842+
response.raise_for_status()
843+
markdown_content = response.text
844+
"""
845+
)
802846
)
803847
else: # If it's a local file
804848
md_rel_path = get_relative_file_path(markdown.file_path, base_path="..")
@@ -826,14 +870,17 @@ def _generate_markdown_content(self, markdown) -> List[str]:
826870
)
827871
return markdown_content
828872

829-
def _show_dataframe(self, dataframe) -> List[str]:
873+
def _show_dataframe(self, dataframe, suffix: Optional[str] = None) -> List[str]:
830874
"""
831875
Appends either a static image or an interactive representation of a DataFrame to the content list.
832876
833877
Parameters
834878
----------
835879
dataframe : DataFrame
836880
The DataFrame object containing the data to display.
881+
suffix : str, optional
882+
A suffix to append to the DataFrame image file name like a sheet name
883+
or another identifier (default is None).
837884
838885
Returns
839886
-------
@@ -843,14 +890,19 @@ def _show_dataframe(self, dataframe) -> List[str]:
843890
dataframe_content = []
844891
if self.is_report_static:
845892
# Generate path for the DataFrame image
846-
df_image = (
847-
Path(self.static_dir) / f"{dataframe.title.replace(' ', '_')}.png"
848-
)
893+
fpath_df_image = Path(self.static_dir) / dataframe.title.replace(" ", "_")
894+
if suffix:
895+
fpath_df_image = fpath_df_image.with_stem(
896+
fpath_df_image.stem + f"_{suffix.replace(' ', '_')}"
897+
)
898+
fpath_df_image = fpath_df_image.with_suffix(".png")
899+
849900
dataframe_content.append(
850-
f"df.dfi.export('{Path(df_image).relative_to('quarto_report').as_posix()}', max_rows=10, max_cols=5, table_conversion='matplotlib')\n```\n"
901+
f"df.dfi.export('{Path(fpath_df_image).relative_to('quarto_report').as_posix()}',"
902+
" max_rows=10, max_cols=5, table_conversion='matplotlib')\n```\n"
851903
)
852904
# Use helper method to add centered image content
853-
dataframe_content.append(self._generate_image_content(df_image))
905+
dataframe_content.append(self._generate_image_content(fpath_df_image))
854906
else:
855907
# Append code to display the DataFrame interactively
856908
dataframe_content.append(
@@ -961,10 +1013,13 @@ def _generate_component_imports(self, component: r.Component) -> List[str]:
9611013
"import json",
9621014
],
9631015
},
964-
"dataframe": [
1016+
"static_dataframe": [
9651017
"import pandas as pd",
966-
"from itables import show, init_notebook_mode",
9671018
"import dataframe_image as dfi",
1019+
],
1020+
"interactive_dataframe": [
1021+
"import pandas as pd",
1022+
"from itables import show, init_notebook_mode",
9681023
"init_notebook_mode(all_interactive=True)",
9691024
],
9701025
"markdown": ["import IPython.display as display", "import requests"],
@@ -980,7 +1035,10 @@ def _generate_component_imports(self, component: r.Component) -> List[str]:
9801035
if plot_type in components_imports["plot"]:
9811036
component_imports.extend(components_imports["plot"][plot_type])
9821037
elif component_type == r.ComponentType.DATAFRAME:
983-
component_imports.extend(components_imports["dataframe"])
1038+
if self.is_report_static:
1039+
component_imports.extend(components_imports["static_dataframe"])
1040+
else:
1041+
component_imports.extend(components_imports["interactive_dataframe"])
9841042
elif component_type == r.ComponentType.MARKDOWN:
9851043
component_imports.extend(components_imports["markdown"])
9861044

src/vuegen/streamlit_reportview.py

Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
from pathlib import Path
66
from typing import List
77

8-
import pandas as pd
98
from streamlit.web import cli as stcli
109

1110
from . import report as r
11+
from . import table_utils
1212
from .utils import create_folder, generate_footer, get_relative_file_path, is_url
1313
from .utils.variables import make_valid_identifier
1414

@@ -721,13 +721,7 @@ def _generate_dataframe_content(self, dataframe) -> List[str]:
721721
)
722722

723723
# Mapping of file extensions to read functions
724-
read_function_mapping = {
725-
r.DataFrameFormat.CSV.value_with_dot: pd.read_csv,
726-
r.DataFrameFormat.PARQUET.value_with_dot: pd.read_parquet,
727-
r.DataFrameFormat.TXT.value_with_dot: pd.read_table,
728-
r.DataFrameFormat.XLS.value_with_dot: pd.read_excel,
729-
r.DataFrameFormat.XLSX.value_with_dot: pd.read_excel,
730-
}
724+
read_function_mapping = table_utils.read_function_mapping
731725

732726
try:
733727
# Check if the file extension matches any DataFrameFormat value
@@ -738,19 +732,47 @@ def _generate_dataframe_content(self, dataframe) -> List[str]:
738732
self.report.logger.error(
739733
f"Unsupported file extension: {file_extension}. Supported extensions are: {', '.join(fmt.value for fmt in r.DataFrameFormat)}."
740734
)
741-
742-
# Load the DataFrame using the correct function
743-
read_function = read_function_mapping[file_extension]
735+
# return [] # Skip execution if unsupported file extension
736+
# Should it not return here? Can we even call the method with an unsupported file extension?
744737

745738
# Build the file path (URL or local file)
746739
if is_url(dataframe.file_path):
747740
df_file_path = dataframe.file_path
748741
else:
749742
df_file_path = get_relative_file_path(dataframe.file_path)
750-
dataframe_content.append(
751-
f"""df = pd.{read_function.__name__}('{df_file_path.as_posix()}')\n"""
752-
)
753743

744+
if file_extension in [
745+
r.DataFrameFormat.XLS.value_with_dot,
746+
r.DataFrameFormat.XLSX.value_with_dot,
747+
]:
748+
dataframe_content.append("selected_sheet = 0")
749+
sheet_names = table_utils.get_sheet_names(dataframe.file_path)
750+
if len(sheet_names) > 1:
751+
# If there are multiple sheets, ask the user to select one
752+
753+
dataframe_content.append(
754+
textwrap.dedent(
755+
f"""\
756+
sheet_names = table_utils.get_sheet_names("{dataframe.file_path}")
757+
selected_sheet = st.selectbox("Select a sheet to display", options=sheet_names)
758+
"""
759+
)
760+
)
761+
762+
# Load the DataFrame using the correct function
763+
read_function = read_function_mapping[file_extension]
764+
if file_extension in [
765+
r.DataFrameFormat.XLS.value_with_dot,
766+
r.DataFrameFormat.XLSX.value_with_dot,
767+
]:
768+
dataframe_content.append(
769+
f"""df = pd.{read_function.__name__}('{dataframe.file_path}', sheet_name=selected_sheet)\n"""
770+
)
771+
else:
772+
dataframe_content.append(
773+
f"""df = pd.{read_function.__name__}('{df_file_path.as_posix()}')\n"""
774+
)
775+
# ! Alternative to select box: iterate over sheets in DataFrame
754776
# Displays a DataFrame using AgGrid with configurable options.
755777
dataframe_content.append(
756778
"""
@@ -1169,6 +1191,7 @@ def _generate_component_imports(self, component: r.Component) -> List[str]:
11691191
"dataframe": [
11701192
"import pandas as pd",
11711193
"from st_aggrid import AgGrid, GridOptionsBuilder",
1194+
"from vuegen import table_utils",
11721195
],
11731196
"markdown": ["import requests"],
11741197
"chatbot": ["import time", "import json", "import requests"],

src/vuegen/table_utils.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import pandas as pd
2+
3+
from . import report as r
4+
5+
# Mapping of file extensions to read functions
6+
read_function_mapping = {
7+
r.DataFrameFormat.CSV.value_with_dot: pd.read_csv,
8+
r.DataFrameFormat.PARQUET.value_with_dot: pd.read_parquet,
9+
r.DataFrameFormat.TXT.value_with_dot: pd.read_table,
10+
r.DataFrameFormat.XLS.value_with_dot: pd.read_excel,
11+
r.DataFrameFormat.XLSX.value_with_dot: pd.read_excel,
12+
}
13+
14+
15+
def get_sheet_names(
16+
file_path: str,
17+
) -> list[str]:
18+
"""Get the sheet names of an Excel file.
19+
20+
Parameters
21+
----------
22+
file_path : str
23+
Path to the Excel file.
24+
25+
Returns
26+
-------
27+
list[str]
28+
List of sheet names.
29+
"""
30+
return pd.ExcelFile(file_path).sheet_names

0 commit comments

Comments
 (0)