Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
b6dc03d
feat: add agent-context CLI introspection
johnnygreco Feb 13, 2026
093c6ad
fix: correct agent-context field descriptions in column configs
johnnygreco Feb 15, 2026
1803015
feat: enhance pydantic and method inspectors with richer field details
johnnygreco Feb 16, 2026
9b4b55f
feat: add Field descriptions and docstrings to config models
johnnygreco Feb 16, 2026
d148d97
feat: enhance formatters with rich field display, dedup, and new form…
johnnygreco Feb 16, 2026
b8255cb
feat: add discovery for namespace tree, interface classes, and imports
johnnygreco Feb 16, 2026
e116352
refactor: rename agent-context CLI to introspect and add new subcommands
johnnygreco Feb 16, 2026
27c7a2d
test: add CLI usage scenario integration tests
johnnygreco Feb 16, 2026
c4ba7ca
refactor: replace introspect command with types and reference command…
johnnygreco Feb 16, 2026
6f05aa2
refactor: update formatters and tests for new types/reference CLI str…
johnnygreco Feb 16, 2026
8370b2d
drop stale review
johnnygreco Feb 16, 2026
8bdfec4
refactor: replace hardcoded discovery functions with introspection-ba…
johnnygreco Feb 16, 2026
e5b9a27
fix: improve introspection defaults and depth checks
johnnygreco Feb 16, 2026
8d6a75e
fix: align enum output across text/json and remove dead try/except
johnnygreco Feb 16, 2026
f1d7157
fix: surface namespace import failures in debug logs
johnnygreco Feb 16, 2026
89a09b3
sort
johnnygreco Feb 16, 2026
db2d471
refactor introspection discovery and normalize typed schema output
johnnygreco Feb 16, 2026
e60a8c2
feat: add data-designer list-assets agent-helper command
johnnygreco Feb 17, 2026
1cabcd0
refactor: replace types/reference commands with inspect agent-helper
johnnygreco Feb 18, 2026
c181f91
feat: add list agent-helper command group
johnnygreco Feb 18, 2026
7aa0e2d
docs: clarify that constraints apply only to sampler columns
johnnygreco Feb 18, 2026
21c2dfb
refactor: rename inspect "builder" subcommand to "config_builder"
johnnygreco Feb 18, 2026
76337af
docs: improve agent-helper CLI help descriptions for agent consumption
johnnygreco Feb 18, 2026
79ad56a
fix: use hyphenated config-builder for CLI subcommand name
johnnygreco Feb 18, 2026
f4bc2b1
docs: tighten agent-helper CLI help descriptions
johnnygreco Feb 18, 2026
3ecf38a
docs: use column header names in list command tips for clarity
johnnygreco Feb 18, 2026
8127348
docs: sharpen inspect and list group-level help descriptions
johnnygreco Feb 18, 2026
aa1d0ad
refactor: remove related_inspect_tip from inspect command output
johnnygreco Feb 18, 2026
f1f076d
refactor: remove dead code from introspection services
johnnygreco Feb 18, 2026
9c8dc76
fix: harden introspection service layer
johnnygreco Feb 18, 2026
d173066
refactor: clean up IntrospectionController
johnnygreco Feb 18, 2026
2c96e92
fix: harden ListController and eliminate DRY violation
johnnygreco Feb 18, 2026
bcd0ca9
docs: polish help text and field description consistency
johnnygreco Feb 18, 2026
b9ce2a6
test: add coverage for introspection edge cases and crash paths
johnnygreco Feb 18, 2026
1f8e3fc
refactor: simplify introspection inspectors without changing output
johnnygreco Feb 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions packages/data-designer-config/src/data_designer/config/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from abc import ABC, abstractmethod

from pydantic import BaseModel, ConfigDict
from pydantic import BaseModel, ConfigDict, Field


class ConfigBase(BaseModel):
Expand All @@ -35,9 +35,11 @@ class SingleColumnConfig(ConfigBase, ABC):
Subclasses must override this field to specify the column type with a `Literal` value.
"""

name: str
drop: bool = False
column_type: str
name: str = Field(description="Unique name of the column to be generated")
drop: bool = Field(
default=False, description="If True, the column will be generated but removed from the final dataset"
)
column_type: str = Field(description="Discriminator field that identifies the specific column type")

@staticmethod
def get_column_emoji() -> str:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,22 @@ class SamplerColumnConfig(SingleColumnConfig):
```
"""

sampler_type: SamplerType
params: Annotated[SamplerParamsT, Discriminator("sampler_type")]
conditional_params: dict[str, Annotated[SamplerParamsT, Discriminator("sampler_type")]] = {}
convert_to: str | None = None
column_type: Literal["sampler"] = "sampler"
sampler_type: SamplerType = Field(
description="Type of sampler to use (e.g., uuid, category, uniform, gaussian, person, datetime)"
)
params: Annotated[SamplerParamsT, Discriminator("sampler_type")] = Field(
description="Parameters specific to the chosen sampler type"
)
conditional_params: dict[str, Annotated[SamplerParamsT, Discriminator("sampler_type")]] = Field(
default_factory=dict,
description="Optional dictionary for conditional parameters; keys are conditions, values are params to use when met",
)
convert_to: str | None = Field(
default=None, description="Optional type conversion after sampling: 'float', 'int', or 'str'"
)
column_type: Literal["sampler"] = Field(
default="sampler", description="Discriminator field, always 'sampler' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -136,14 +147,28 @@ class LLMTextColumnConfig(SingleColumnConfig):
column_type: Discriminator field, always "llm-text" for this configuration type.
"""

prompt: str
model_alias: str
system_prompt: str | None = None
multi_modal_context: list[ImageContext] | None = None
tool_alias: str | None = None
with_trace: TraceType = TraceType.NONE
extract_reasoning_content: bool = False
column_type: Literal["llm-text"] = "llm-text"
prompt: str = Field(
description="Jinja2 template for the LLM prompt; can reference other columns via {{ column_name }}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it worth warning here or somewhere else about providing fstrings here that could mess up the jinja template? I've found that cursor likes to auto convert this to f""

)
model_alias: str = Field(description="Alias of the model configuration to use for generation")
system_prompt: str | None = Field(
default=None, description="Optional system prompt to set model behavior and constraints"
)
multi_modal_context: list[ImageContext] | None = Field(
default=None, description="Optional list of ImageContext for vision model inputs"
)
tool_alias: str | None = Field(
default=None, description="Optional alias of the tool configuration to use for MCP tool calls"
)
with_trace: TraceType = Field(
default=TraceType.NONE, description="Trace capture mode: NONE, LAST_MESSAGE, or ALL_MESSAGES"
)
extract_reasoning_content: bool = Field(
default=False, description="If True, capture chain-of-thought in {name}__reasoning_content column"
)
column_type: Literal["llm-text"] = Field(
default="llm-text", description="Discriminator field, always 'llm-text' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -219,8 +244,12 @@ class LLMCodeColumnConfig(LLMTextColumnConfig):
column containing the reasoning content from the final assistant response.
"""

code_lang: CodeLang
column_type: Literal["llm-code"] = "llm-code"
code_lang: CodeLang = Field(
description="Target programming language or SQL dialect for code extraction from LLM response"
)
column_type: Literal["llm-code"] = Field(
default="llm-code", description="Discriminator field, always 'llm-code' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -252,8 +281,12 @@ class LLMStructuredColumnConfig(LLMTextColumnConfig):
column containing the reasoning content from the final assistant response.
"""

output_format: dict | type[BaseModel]
column_type: Literal["llm-structured"] = "llm-structured"
output_format: dict | type[BaseModel] = Field(
description="Pydantic model or JSON schema dict defining the expected structured output shape"
)
column_type: Literal["llm-structured"] = Field(
default="llm-structured", description="Discriminator field, always 'llm-structured' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -317,8 +350,12 @@ class LLMJudgeColumnConfig(LLMTextColumnConfig):
column containing the reasoning content from the final assistant response.
"""

scores: list[Score] = Field(..., min_length=1)
column_type: Literal["llm-judge"] = "llm-judge"
scores: list[Score] = Field(
..., min_length=1, description="List of Score objects defining rubric criteria for LLM judge evaluation"
)
column_type: Literal["llm-judge"] = Field(
default="llm-judge", description="Discriminator field, always 'llm-judge' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand All @@ -341,10 +378,13 @@ class ExpressionColumnConfig(SingleColumnConfig):
column_type: Discriminator field, always "expression" for this configuration type.
"""

name: str
expr: str
dtype: Literal["int", "float", "str", "bool"] = "str"
column_type: Literal["expression"] = "expression"
expr: str = Field(description="Jinja2 expression to compute the column value from other columns")
dtype: Literal["int", "float", "str", "bool"] = Field(
default="str", description="Data type for expression result: 'int', 'float', 'str', or 'bool'"
)
column_type: Literal["expression"] = Field(
default="expression", description="Discriminator field, always 'expression' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -410,11 +450,13 @@ class ValidationColumnConfig(SingleColumnConfig):
column_type: Discriminator field, always "validation" for this configuration type.
"""

target_columns: list[str]
validator_type: ValidatorType
validator_params: ValidatorParamsT
target_columns: list[str] = Field(description="List of column names to validate")
validator_type: ValidatorType = Field(description="Validation method: 'code', 'local_callable', or 'remote'")
Copy link
Contributor

@nabinchha nabinchha Feb 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is 'code', 'local_callable', or 'remote' necessary since it's already strongly typed? Same comment for other similar chanages.

validator_params: ValidatorParamsT = Field(description="Validator-specific parameters (e.g., CodeValidatorParams)")
batch_size: int = Field(default=10, ge=1, description="Number of records to process in each batch")
column_type: Literal["validation"] = "validation"
column_type: Literal["validation"] = Field(
default="validation", description="Discriminator field, always 'validation' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand All @@ -441,7 +483,9 @@ class SeedDatasetColumnConfig(SingleColumnConfig):
column_type: Discriminator field, always "seed-dataset" for this configuration type.
"""

column_type: Literal["seed-dataset"] = "seed-dataset"
column_type: Literal["seed-dataset"] = Field(
default="seed-dataset", description="Discriminator field, always 'seed-dataset' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand All @@ -468,9 +512,11 @@ class EmbeddingColumnConfig(SingleColumnConfig):
column_type: Discriminator field, always "embedding" for this configuration type.
"""

target_column: str
model_alias: str
column_type: Literal["embedding"] = "embedding"
target_column: str = Field(description="Name of the text column to generate embeddings for")
model_alias: str = Field(description="Alias of the model to use for embedding generation")
column_type: Literal["embedding"] = Field(
default="embedding", description="Discriminator field, always 'embedding' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -502,10 +548,16 @@ class ImageColumnConfig(SingleColumnConfig):
column_type: Discriminator field, always "image" for this configuration type.
"""

prompt: str
model_alias: str
multi_modal_context: list[ImageContext] | None = None
column_type: Literal["image"] = "image"
prompt: str = Field(
description="Jinja2 template for the image generation prompt; can reference other columns via {{ column_name }}"
)
model_alias: str = Field(description="Alias of the model to use for image generation")
multi_modal_context: list[ImageContext] | None = Field(
default=None, description="Optional list of ImageContext for image-to-image generation inputs"
)
column_type: Literal["image"] = Field(
default="image", description="Discriminator field, always 'image' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -562,7 +614,9 @@ class CustomColumnConfig(SingleColumnConfig):
default=None,
description="Optional typed configuration object passed as second argument to generator function",
)
column_type: Literal["custom"] = "custom"
column_type: Literal["custom"] = Field(
default="custom", description="Discriminator field, always 'custom' for this configuration type"
)

@field_validator("generator_function")
@classmethod
Expand Down
38 changes: 24 additions & 14 deletions packages/data-designer-config/src/data_designer/config/mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@ class MCPProvider(ConfigBase):
... )
"""

provider_type: Literal["sse"] = "sse"
name: str
endpoint: str
api_key: str | None = None
provider_type: Literal["sse"] = Field(
default="sse", description="Transport type discriminator, always 'sse' for remote MCP providers"
)
name: str = Field(description="Unique name used to reference this MCP provider")
endpoint: str = Field(description="SSE endpoint URL for connecting to the remote MCP server")
api_key: str | None = Field(default=None, description="Optional API key for authentication")


class LocalStdioMCPProvider(ConfigBase):
Expand All @@ -63,11 +65,15 @@ class LocalStdioMCPProvider(ConfigBase):
... )
"""

provider_type: Literal["stdio"] = "stdio"
name: str
command: str
args: list[str] = Field(default_factory=list)
env: dict[str, str] = Field(default_factory=dict)
provider_type: Literal["stdio"] = Field(
default="stdio", description="Transport type discriminator, always 'stdio' for local subprocess MCP providers"
)
name: str = Field(description="Unique name used to reference this MCP provider")
command: str = Field(description="Executable to launch the MCP server via stdio transport")
args: list[str] = Field(default_factory=list, description="Arguments passed to the MCP server executable")
env: dict[str, str] = Field(
default_factory=dict, description="Environment variables passed to the MCP server subprocess"
)


MCPProviderT: TypeAlias = Annotated[MCPProvider | LocalStdioMCPProvider, Field(discriminator="provider_type")]
Expand Down Expand Up @@ -102,8 +108,12 @@ class ToolConfig(ConfigBase):
... )
"""

tool_alias: str
providers: list[str]
allow_tools: list[str] | None = None
max_tool_call_turns: int = Field(default=5, ge=1)
timeout_sec: float | None = Field(default=None, gt=0)
tool_alias: str = Field(description="User-defined alias to reference this tool configuration in column configs")
providers: list[str] = Field(description="Names of the MCP providers to use for tool calls")
allow_tools: list[str] | None = Field(
default=None, description="Optional allowlist of tool names that restricts which tools are permitted"
)
max_tool_call_turns: int = Field(
default=5, ge=1, description="Maximum number of tool-calling turns permitted in a single generation"
)
timeout_sec: float | None = Field(default=None, gt=0, description="Timeout in seconds for MCP tool calls")
Loading