Skip to content

Use multiple sheets from ab excel file in reports #115

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
May 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion src/vuegen/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def _create_component_config_fromfile(self, file_path: Path) -> Dict[str, str]:
file_path.resolve().as_posix()
) # ! needs to be posix for all OS support
component_config["description"] = ""
component_config["caption"] = ""
component_config["caption"] = "" # ? It is not populated here

# Infer component config
if file_ext in [
Expand Down
130 changes: 94 additions & 36 deletions src/vuegen/quarto_reportview.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import os
import subprocess
import sys
import textwrap
from pathlib import Path
from typing import List
from typing import List, Optional

import networkx as nx
import pandas as pd

from . import report as r
from . import table_utils
from .utils import create_folder, get_relative_file_path, is_url, sort_imports


Expand Down Expand Up @@ -271,10 +272,6 @@ def run_report(self, output_dir: str = BASE_DIR) -> None:
[self.quarto_path, "install", "tinytex", "--no-prompt"],
check=True,
)
subprocess.run(
[self.quarto_path, "install", "chromium", "--no-prompt"],
check=True,
)
try:
subprocess.run(
args,
Expand Down Expand Up @@ -712,19 +709,16 @@ def _generate_dataframe_content(self, dataframe) -> List[str]:

# Append header for DataFrame loading
dataframe_content.append(
f"""```{{python}}
#| label: '{dataframe.title} {dataframe.id}'
#| fig-cap: ""
"""
textwrap.dedent(
f"""\
```{{python}}
#| label: '{dataframe.title} {dataframe.id}'
#| fig-cap: ""
"""
)
)
# Mapping of file extensions to read functions
read_function_mapping = {
r.DataFrameFormat.CSV.value_with_dot: pd.read_csv,
r.DataFrameFormat.PARQUET.value_with_dot: pd.read_parquet,
r.DataFrameFormat.TXT.value_with_dot: pd.read_table,
r.DataFrameFormat.XLS.value_with_dot: pd.read_excel,
r.DataFrameFormat.XLSX.value_with_dot: pd.read_excel,
}
read_function_mapping = table_utils.read_function_mapping
try:
# Check if the file extension matches any DataFrameFormat value
file_extension = Path(dataframe.file_path).suffix.lower()
Expand All @@ -740,24 +734,68 @@ def _generate_dataframe_content(self, dataframe) -> List[str]:
df_file_path = dataframe.file_path
else:
df_file_path = get_relative_file_path(
dataframe.file_path, base_path=".."
dataframe.file_path,
)
sheet_names = None
# If the file is an Excel file, get the sheet names
if file_extension in [
r.DataFrameFormat.XLS.value_with_dot,
r.DataFrameFormat.XLSX.value_with_dot,
]:
sheet_names = table_utils.get_sheet_names(df_file_path)
if len(sheet_names) > 1:
# If there are multiple sheets, use the first one
self.report.logger.info(
f"Multiple sheets found in the Excel file: {df_file_path}. "
f"Sheets: {sheet_names}"
)
else:
sheet_names = None

# Build the file path (URL or local file)
if is_url(dataframe.file_path):
df_file_path = dataframe.file_path
else:
df_file_path = get_relative_file_path(
dataframe.file_path, base_path=".."
)
# Load the DataFrame using the correct function
read_function = read_function_mapping[file_extension]
dataframe_content.append(
f"""df = pd.{read_function.__name__}('{df_file_path.as_posix()}')\n"""
)

# Display the dataframe
dataframe_content.extend(self._show_dataframe(dataframe))

# Add further sheets
if sheet_names:
for sheet_name in sheet_names[1:]:
dataframe_content.append(f"#### {sheet_name}")
dataframe_content.append(
textwrap.dedent(
f"""\
```{{python}}
#| label: '{dataframe.title} {dataframe.id} {sheet_name}'
#| fig-cap: ""
"""
)
)
dataframe_content.append(
f"df = pd.{read_function.__name__}('{df_file_path.as_posix()}', "
f"sheet_name='{sheet_name}')\n"
)
# Display the dataframe
dataframe_content.extend(
self._show_dataframe(dataframe, suffix=sheet_name)
)

except Exception as e:
self.report.logger.error(
f"Error generating content for DataFrame: {dataframe.title}. Error: {str(e)}"
)
raise
# Add caption if available
# ? Where should this come from?
if dataframe.caption:
dataframe_content.append(f">{dataframe.caption}\n")

Expand Down Expand Up @@ -787,18 +825,24 @@ def _generate_markdown_content(self, markdown) -> List[str]:
try:
# Initialize md code with common structure
markdown_content.append(
f"""
```{{python}}
#| label: '{markdown.title} {markdown.id}'
#| fig-cap: ""\n"""
textwrap.dedent(
f"""
```{{python}}
#| label: '{markdown.title} {markdown.id}'
#| fig-cap: ""
"""
)
)
# If the file path is a URL, generate code to fetch content via requests
if is_url(markdown.file_path):
markdown_content.append(
f"""
response = requests.get('{markdown.file_path}')
response.raise_for_status()
markdown_content = response.text\n"""
textwrap.dedent(
f"""\
response = requests.get('{markdown.file_path}')
response.raise_for_status()
markdown_content = response.text
"""
)
)
else: # If it's a local file
md_rel_path = get_relative_file_path(markdown.file_path, base_path="..")
Expand Down Expand Up @@ -826,14 +870,17 @@ def _generate_markdown_content(self, markdown) -> List[str]:
)
return markdown_content

def _show_dataframe(self, dataframe) -> List[str]:
def _show_dataframe(self, dataframe, suffix: Optional[str] = None) -> List[str]:
"""
Appends either a static image or an interactive representation of a DataFrame to the content list.

Parameters
----------
dataframe : DataFrame
The DataFrame object containing the data to display.
suffix : str, optional
A suffix to append to the DataFrame image file name like a sheet name
or another identifier (default is None).

Returns
-------
Expand All @@ -843,14 +890,19 @@ def _show_dataframe(self, dataframe) -> List[str]:
dataframe_content = []
if self.is_report_static:
# Generate path for the DataFrame image
df_image = (
Path(self.static_dir) / f"{dataframe.title.replace(' ', '_')}.png"
)
fpath_df_image = Path(self.static_dir) / dataframe.title.replace(" ", "_")
if suffix:
fpath_df_image = fpath_df_image.with_stem(
fpath_df_image.stem + f"_{suffix.replace(' ', '_')}"
)
fpath_df_image = fpath_df_image.with_suffix(".png")

dataframe_content.append(
f"df.dfi.export('{Path(df_image).relative_to('quarto_report').as_posix()}', max_rows=10, max_cols=5, table_conversion='matplotlib')\n```\n"
f"df.dfi.export('{Path(fpath_df_image).relative_to('quarto_report').as_posix()}',"
" max_rows=10, max_cols=5, table_conversion='matplotlib')\n```\n"
)
# Use helper method to add centered image content
dataframe_content.append(self._generate_image_content(df_image))
dataframe_content.append(self._generate_image_content(fpath_df_image))
else:
# Append code to display the DataFrame interactively
dataframe_content.append(
Expand Down Expand Up @@ -961,10 +1013,13 @@ def _generate_component_imports(self, component: r.Component) -> List[str]:
"import json",
],
},
"dataframe": [
"static_dataframe": [
"import pandas as pd",
"from itables import show, init_notebook_mode",
"import dataframe_image as dfi",
],
"interactive_dataframe": [
"import pandas as pd",
"from itables import show, init_notebook_mode",
"init_notebook_mode(all_interactive=True)",
],
"markdown": ["import IPython.display as display", "import requests"],
Expand All @@ -980,7 +1035,10 @@ def _generate_component_imports(self, component: r.Component) -> List[str]:
if plot_type in components_imports["plot"]:
component_imports.extend(components_imports["plot"][plot_type])
elif component_type == r.ComponentType.DATAFRAME:
component_imports.extend(components_imports["dataframe"])
if self.is_report_static:
component_imports.extend(components_imports["static_dataframe"])
else:
component_imports.extend(components_imports["interactive_dataframe"])
elif component_type == r.ComponentType.MARKDOWN:
component_imports.extend(components_imports["markdown"])

Expand Down
51 changes: 37 additions & 14 deletions src/vuegen/streamlit_reportview.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
from pathlib import Path
from typing import List

import pandas as pd
from streamlit.web import cli as stcli

from . import report as r
from . import table_utils
from .utils import create_folder, generate_footer, get_relative_file_path, is_url
from .utils.variables import make_valid_identifier

Expand Down Expand Up @@ -721,13 +721,7 @@ def _generate_dataframe_content(self, dataframe) -> List[str]:
)

# Mapping of file extensions to read functions
read_function_mapping = {
r.DataFrameFormat.CSV.value_with_dot: pd.read_csv,
r.DataFrameFormat.PARQUET.value_with_dot: pd.read_parquet,
r.DataFrameFormat.TXT.value_with_dot: pd.read_table,
r.DataFrameFormat.XLS.value_with_dot: pd.read_excel,
r.DataFrameFormat.XLSX.value_with_dot: pd.read_excel,
}
read_function_mapping = table_utils.read_function_mapping

try:
# Check if the file extension matches any DataFrameFormat value
Expand All @@ -738,19 +732,47 @@ def _generate_dataframe_content(self, dataframe) -> List[str]:
self.report.logger.error(
f"Unsupported file extension: {file_extension}. Supported extensions are: {', '.join(fmt.value for fmt in r.DataFrameFormat)}."
)

# Load the DataFrame using the correct function
read_function = read_function_mapping[file_extension]
# return [] # Skip execution if unsupported file extension
# Should it not return here? Can we even call the method with an unsupported file extension?

# Build the file path (URL or local file)
if is_url(dataframe.file_path):
df_file_path = dataframe.file_path
else:
df_file_path = get_relative_file_path(dataframe.file_path)
dataframe_content.append(
f"""df = pd.{read_function.__name__}('{df_file_path.as_posix()}')\n"""
)

if file_extension in [
r.DataFrameFormat.XLS.value_with_dot,
r.DataFrameFormat.XLSX.value_with_dot,
]:
dataframe_content.append("selected_sheet = 0")
sheet_names = table_utils.get_sheet_names(dataframe.file_path)
if len(sheet_names) > 1:
# If there are multiple sheets, ask the user to select one

dataframe_content.append(
textwrap.dedent(
f"""\
sheet_names = table_utils.get_sheet_names("{dataframe.file_path}")
selected_sheet = st.selectbox("Select a sheet to display", options=sheet_names)
"""
)
)

# Load the DataFrame using the correct function
read_function = read_function_mapping[file_extension]
if file_extension in [
r.DataFrameFormat.XLS.value_with_dot,
r.DataFrameFormat.XLSX.value_with_dot,
]:
dataframe_content.append(
f"""df = pd.{read_function.__name__}('{dataframe.file_path}', sheet_name=selected_sheet)\n"""
)
else:
dataframe_content.append(
f"""df = pd.{read_function.__name__}('{df_file_path.as_posix()}')\n"""
)
# ! Alternative to select box: iterate over sheets in DataFrame
# Displays a DataFrame using AgGrid with configurable options.
dataframe_content.append(
"""
Expand Down Expand Up @@ -1169,6 +1191,7 @@ def _generate_component_imports(self, component: r.Component) -> List[str]:
"dataframe": [
"import pandas as pd",
"from st_aggrid import AgGrid, GridOptionsBuilder",
"from vuegen import table_utils",
],
"markdown": ["import requests"],
"chatbot": ["import time", "import json", "import requests"],
Expand Down
30 changes: 30 additions & 0 deletions src/vuegen/table_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import pandas as pd

from . import report as r

# Mapping of file extensions to read functions
read_function_mapping = {
r.DataFrameFormat.CSV.value_with_dot: pd.read_csv,
r.DataFrameFormat.PARQUET.value_with_dot: pd.read_parquet,
r.DataFrameFormat.TXT.value_with_dot: pd.read_table,
r.DataFrameFormat.XLS.value_with_dot: pd.read_excel,
r.DataFrameFormat.XLSX.value_with_dot: pd.read_excel,
}


def get_sheet_names(
file_path: str,
) -> list[str]:
"""Get the sheet names of an Excel file.

Parameters
----------
file_path : str
Path to the Excel file.

Returns
-------
list[str]
List of sheet names.
"""
return pd.ExcelFile(file_path).sheet_names