Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from __future__ import annotations

import io
from enum import Enum
from pathlib import Path
from typing import TYPE_CHECKING
Expand Down Expand Up @@ -144,19 +145,20 @@ def generate_analysis_report(

render_list.append(Rule(style=RULE_STYLE))

console = Console(record=save_path is not None)
console.print(Group(*render_list), markup=False)

if save_path is not None:
recording_console = Console(record=True, file=io.StringIO())
recording_console.print(Group(*render_list), markup=False)
save_path = str(save_path)
Comment on lines 148 to 151
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't print to the terminal when --save-results is passed

if save_path.endswith(".html"):
console.save_html(save_path)
recording_console.save_html(save_path)
elif save_path.endswith(".svg"):
console.save_svg(save_path, title="")
recording_console.save_svg(save_path, title="")
else:
raise AnalysisReportError(
f"🛑 The extension of the save path must be either .html or .svg. You provided {save_path}."
)
else:
Console().print(Group(*render_list), markup=False)


def create_judge_score_summary_table(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from __future__ import annotations

import html
import io
import json
import logging
import os
Expand Down Expand Up @@ -435,11 +436,14 @@ def display_sample_record(
render_list.append(index_label)

if save_path is not None:
recording_console = Console(record=True, width=display_width)
recording_console = Console(record=True, width=display_width, file=io.StringIO())
recording_console.print(Group(*render_list), markup=False)
_save_console_output(recording_console, save_path, theme=theme)
else:
console.print(Group(*render_list), markup=False)
terminal_width = console.width
capped_width = min(terminal_width, display_width)
display_console = Console(width=capped_width)
display_console.print(Group(*render_list), markup=False)

# Display images at the bottom with captions (only in notebook)
if len(images_to_display_later) > 0:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ def validation_output() -> dict:
}


@pytest.fixture
def record_series(validation_output: dict) -> pd.Series:
"""Fixture providing a sample record as a pandas Series."""
return lazy.pd.Series({"code": "print('hello world')", "code_validation_result": validation_output})


@pytest.fixture
def config_builder_with_validation(stub_model_configs: list) -> DataDesignerConfigBuilder:
"""Fixture providing a DataDesignerConfigBuilder with a validation column."""
Expand All @@ -54,14 +60,9 @@ def config_builder_with_validation(stub_model_configs: list) -> DataDesignerConf


def test_display_sample_record_twice_no_errors(
validation_output: dict, config_builder_with_validation: DataDesignerConfigBuilder
record_series: pd.Series, config_builder_with_validation: DataDesignerConfigBuilder
) -> None:
"""Test that calling display_sample_record twice on validation output produces no errors."""
sample_record = {"code": "print('hello world')", "code_validation_result": validation_output}

# Convert to pandas Series to match expected input format
record_series = lazy.pd.Series(sample_record)

display_sample_record(record_series, config_builder_with_validation)
display_sample_record(record_series, config_builder_with_validation)

Expand Down Expand Up @@ -98,11 +99,9 @@ def test_get_truncated_list_as_string() -> None:


def test_display_sample_record_save_html(
validation_output: dict, config_builder_with_validation: DataDesignerConfigBuilder, tmp_path: Path
record_series: pd.Series, config_builder_with_validation: DataDesignerConfigBuilder, tmp_path: Path
) -> None:
"""Test that display_sample_record saves HTML with dark-mode style block injected."""
sample_record = {"code": "print('hello world')", "code_validation_result": validation_output}
record_series = lazy.pd.Series(sample_record)
save_path = tmp_path / "output.html"

display_sample_record(record_series, config_builder_with_validation, save_path=save_path)
Expand All @@ -115,11 +114,9 @@ def test_display_sample_record_save_html(


def test_display_sample_record_save_svg(
validation_output: dict, config_builder_with_validation: DataDesignerConfigBuilder, tmp_path: Path
record_series: pd.Series, config_builder_with_validation: DataDesignerConfigBuilder, tmp_path: Path
) -> None:
"""Test that display_sample_record can save output as an SVG file."""
sample_record = {"code": "print('hello world')", "code_validation_result": validation_output}
record_series = lazy.pd.Series(sample_record)
save_path = tmp_path / "output.svg"

display_sample_record(record_series, config_builder_with_validation, save_path=save_path)
Expand All @@ -130,24 +127,19 @@ def test_display_sample_record_save_svg(


def test_display_sample_record_save_invalid_extension(
validation_output: dict, config_builder_with_validation: DataDesignerConfigBuilder, tmp_path: Path
record_series: pd.Series, config_builder_with_validation: DataDesignerConfigBuilder, tmp_path: Path
) -> None:
"""Test that display_sample_record raises an error for unsupported file extensions."""
sample_record = {"code": "print('hello world')", "code_validation_result": validation_output}
record_series = lazy.pd.Series(sample_record)
save_path = tmp_path / "output.txt"

with pytest.raises(DatasetSampleDisplayError, match="must be either .html or .svg"):
display_sample_record(record_series, config_builder_with_validation, save_path=save_path)


def test_display_sample_record_save_path_none_default(
validation_output: dict, config_builder_with_validation: DataDesignerConfigBuilder, tmp_path: Path
record_series: pd.Series, config_builder_with_validation: DataDesignerConfigBuilder, tmp_path: Path
) -> None:
"""Test that display_sample_record with save_path=None prints to console without creating files."""
sample_record = {"code": "print('hello world')", "code_validation_result": validation_output}
record_series = lazy.pd.Series(sample_record)

display_sample_record(record_series, config_builder_with_validation, save_path=None)

assert list(tmp_path.iterdir()) == []
Expand Down Expand Up @@ -222,11 +214,9 @@ def testapply_html_post_processing_always_injects_viewport(tmp_path: Path) -> No


def test_save_console_output_svg_no_dark_mode(
validation_output: dict, config_builder_with_validation: DataDesignerConfigBuilder, tmp_path: Path
record_series: pd.Series, config_builder_with_validation: DataDesignerConfigBuilder, tmp_path: Path
) -> None:
"""Test that SVG files do not receive dark mode CSS injection."""
sample_record = {"code": "print('hello world')", "code_validation_result": validation_output}
record_series = lazy.pd.Series(sample_record)
save_path = tmp_path / "output.svg"

display_sample_record(record_series, config_builder_with_validation, save_path=save_path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ def preview_command(
non_interactive: bool = typer.Option(
False,
"--non-interactive",
help="Display all records at once instead of browsing interactively.",
help="Display all records at once instead of browsing interactively. Ignored when --save-results is used.",
),
save_results: bool = typer.Option(
False,
"--save-results",
help="Save preview results (dataset, report, and sample records) to the artifact path.",
help="Save results to disk without displaying sample records in the terminal.",
),
artifact_path: str | None = typer.Option(
None,
Expand All @@ -44,49 +44,16 @@ def preview_command(
"dark",
"--theme",
click_type=click.Choice(["dark", "light"], case_sensitive=False),
help="Color theme for saved HTML files (dark or light). Only applies when --save-results is used.",
help="Color theme for HTML output (dark or light). Only applies when --save-results is used.",
),
display_width: int = typer.Option(
DEFAULT_DISPLAY_WIDTH,
"--display-width",
help="Width of the rendered record output in characters.",
help="Maximum width of the rendered record output in characters.",
min=40,
),
) -> None:
"""Generate a preview dataset for fast iteration on your configuration.

Preview results are displayed in the terminal. Use this to quickly validate
your configuration before running a full dataset creation.

By default, records are displayed one at a time in interactive mode. Use
--non-interactive to display all records at once (also used automatically
Comment on lines -56 to -62
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

didn't realize the entire docstring prints on --help

when output is piped).

Examples:
# Preview from a YAML config
data-designer preview my_config.yaml

# Preview from a Python module
data-designer preview my_config.py

# Preview with custom number of records
data-designer preview my_config.yaml --num-records 5

# Preview from a remote config URL
data-designer preview https://example.com/my_config.yaml

# Display all records without interactive browsing
data-designer preview my_config.yaml --non-interactive

# Save preview results to disk
data-designer preview my_config.yaml --save-results

# Save results to a custom path
data-designer preview my_config.yaml --save-results --artifact-path ./output

# Save results with light theme
data-designer preview my_config.yaml --save-results --theme light
"""
"""Generate a preview dataset for fast iteration on your configuration."""
controller = GenerationController()
controller.run_preview(
config_source=config_source,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from __future__ import annotations

import logging
import sys
from datetime import datetime
from pathlib import Path
Expand All @@ -16,6 +17,9 @@
from data_designer.config.errors import InvalidConfigError
from data_designer.config.utils.constants import DEFAULT_DISPLAY_WIDTH
from data_designer.interface import DataDesigner
from data_designer.logging import LOG_INDENT

logger = logging.getLogger(__name__)

if TYPE_CHECKING:
from data_designer.config.config_builder import DataDesignerConfigBuilder
Expand Down Expand Up @@ -43,8 +47,8 @@ def run_preview(
non_interactive: If True, display all records at once instead of browsing.
save_results: If True, save all preview artifacts to the artifact path.
artifact_path: Directory to save results in, or None for ./artifacts.
theme: Color theme for saved HTML files (dark or light).
display_width: Width of the rendered record output in characters.
theme: Color theme for HTML output (dark or light).
display_width: Maximum width of the rendered record output in characters.
"""
config_builder = self._load_config(config_source)

Expand All @@ -65,50 +69,19 @@ def run_preview(
raise typer.Exit(code=1)

total = len(results.dataset)
use_interactive = not non_interactive and sys.stdin.isatty() and sys.stdout.isatty() and total > 1

if use_interactive:
self._browse_records_interactively(results, total)
else:
self._display_all_records(results, total)

if results.analysis is not None:
console.print()
results.analysis.to_report()

# Save artifacts when requested
if save_results:
try:
resolved_artifact_path = Path(artifact_path) if artifact_path else Path.cwd() / "artifacts"
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
results_dir = resolved_artifact_path / f"preview_results_{timestamp}"
results_dir.mkdir(parents=True, exist_ok=True)

if results.analysis is not None:
results.analysis.to_report(save_path=results_dir / "report.html")

results.dataset.to_parquet(results_dir / "dataset.parquet")

sample_records_dir = results_dir / "sample_records"
sample_records_dir.mkdir(parents=True, exist_ok=True)
for i in range(total):
results.display_sample_record(
index=i,
save_path=sample_records_dir / f"record_{i}.html",
theme=theme,
display_width=display_width,
)
create_sample_records_pager(
sample_records_dir=sample_records_dir,
num_records=total,
num_columns=len(results.dataset.columns),
)
self._save_preview_results(results, total, artifact_path, theme, display_width)
else:
use_interactive = not non_interactive and sys.stdin.isatty() and sys.stdout.isatty() and total > 1
if use_interactive:
self._browse_records_interactively(results, total, display_width)
else:
self._display_all_records(results, total, display_width)

console.print(f" Results saved to: [bold]{results_dir}[/bold]")
console.print(f" Browse records: [bold]{sample_records_dir / PAGER_FILENAME}[/bold]")
except OSError as e:
print_error(f"Failed to save preview results: {e}")
raise typer.Exit(code=1)
if results.analysis is not None:
console.print()
results.analysis.to_report()

console.print()
print_success(f"Preview complete — {total} record(s) generated")
Expand Down Expand Up @@ -204,20 +177,66 @@ def _load_config(self, config_source: str) -> DataDesignerConfigBuilder:
print_error(str(e))
raise typer.Exit(code=1)

def _display_record_with_header(self, results: PreviewResults, index: int, total: int) -> None:
def _save_preview_results(
self,
results: PreviewResults,
total: int,
artifact_path: str | None,
theme: Literal["dark", "light"],
display_width: int,
) -> None:
"""Save all preview artifacts to disk without displaying in the terminal."""
try:
resolved_artifact_path = Path(artifact_path) if artifact_path else Path.cwd() / "artifacts"
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
results_dir = resolved_artifact_path / f"preview_results_{timestamp}"
results_dir.mkdir(parents=True, exist_ok=True)

if results.analysis is not None:
results.analysis.to_report(save_path=results_dir / "report.html")

results.dataset.to_parquet(results_dir / "dataset.parquet")

sample_records_dir = results_dir / "sample_records"
sample_records_dir.mkdir(parents=True, exist_ok=True)
for i in range(total):
results.display_sample_record(
index=i,
save_path=sample_records_dir / f"record_{i}.html",
theme=theme,
display_width=display_width,
)
create_sample_records_pager(
sample_records_dir=sample_records_dir,
num_records=total,
num_columns=len(results.dataset.columns),
theme=theme,
)

logger.info(f"{LOG_INDENT}Results path: {results_dir}")
logger.info(f"{LOG_INDENT}Browser path: {sample_records_dir / PAGER_FILENAME}")
except OSError as e:
print_error(f"Failed to save preview results: {e}")
raise typer.Exit(code=1)

def _display_record_with_header(
self, results: PreviewResults, index: int, total: int, display_width: int = DEFAULT_DISPLAY_WIDTH
) -> None:
"""Display a single record with a record number header."""
console.print(f" [bold]Record {index + 1} of {total}[/bold]")
results.display_sample_record(index=index)
results.display_sample_record(index=index, display_width=display_width)

def _browse_records_interactively(self, results: PreviewResults, total: int) -> None:
def _browse_records_interactively(
self, results: PreviewResults, total: int, display_width: int = DEFAULT_DISPLAY_WIDTH
) -> None:
"""Interactively browse records with single-keypress navigation.

Shows the first record immediately, then waits for navigation keys.
Controls: n/enter=next, p=previous, q/Escape/Ctrl+C=quit.
Navigation wraps around at both ends.
"""
current_index = 0
self._display_record_with_header(results, current_index, total)
self._display_record_with_header(results, current_index, total, display_width)

while True:
console.print()
Expand All @@ -231,9 +250,11 @@ def _browse_records_interactively(self, results: PreviewResults, total: int) ->
else:
current_index = (current_index + 1) % total

self._display_record_with_header(results, current_index, total)
self._display_record_with_header(results, current_index, total, display_width)

def _display_all_records(self, results: PreviewResults, total: int) -> None:
def _display_all_records(
self, results: PreviewResults, total: int, display_width: int = DEFAULT_DISPLAY_WIDTH
) -> None:
"""Display all records without interactive prompts."""
for i in range(total):
self._display_record_with_header(results, i, total)
self._display_record_with_header(results, i, total, display_width)
Loading