Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/guidellm/benchmark/entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ async def benchmark_generative_text(
else "infinite" # default to infinite so we don't run out of data
),
random_seed=random_seed,
max_requests=max_requests,
)
unique_requests = request_loader.num_unique_items(raise_err=False)
console.print_line(
Expand Down
4 changes: 3 additions & 1 deletion src/guidellm/dataset/creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,15 @@ def create(
processor_args: Optional[dict[str, Any]],
random_seed: int = 42,
split_pref_order: Optional[list[str]] = None,
max_requests: Optional[int] = None,
) -> tuple[Union[Dataset, IterableDataset], dict[ColumnInputTypes, str]]:
if not cls.is_supported(data, data_args):
raise ValueError(f"Unsupported data type: {type(data)} given for {data}. ")

split = cls.extract_args_split(data_args)
column_mappings = cls.extract_args_column_mappings(data_args)
dataset = cls.handle_create(
data, data_args, processor, processor_args, random_seed
data, data_args, processor, processor_args, random_seed, max_requests
)

if isinstance(dataset, (DatasetDict, IterableDatasetDict)):
Expand Down Expand Up @@ -210,4 +211,5 @@ def handle_create(
processor: Optional[Union[str, Path, PreTrainedTokenizerBase]],
processor_args: Optional[dict[str, Any]],
random_seed: int,
max_requests: Optional[int] = None,
) -> Union[Dataset, DatasetDict, IterableDataset, IterableDatasetDict]: ...
2 changes: 2 additions & 0 deletions src/guidellm/dataset/entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def load_dataset(
processor_args: Optional[dict[str, Any]],
random_seed: int = 42,
split_pref_order: Optional[list[str]] = None,
max_requests: Optional[int] = None,
) -> tuple[Union[Dataset, IterableDataset], dict[ColumnInputTypes, str]]:
creators = [
InMemoryDatasetCreator,
Expand All @@ -37,6 +38,7 @@ def load_dataset(
processor_args,
random_seed,
split_pref_order,
max_requests,
)

raise ValueError(f"Unsupported data type: {type(data)} given for {data}. ")
1 change: 1 addition & 0 deletions src/guidellm/dataset/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def handle_create(
processor: Optional[Union[str, Path, PreTrainedTokenizerBase]], # noqa: ARG003
processor_args: Optional[dict[str, Any]], # noqa: ARG003
random_seed: int, # noqa: ARG003
max_requests: Optional[int] = None, # noqa: ARG003
) -> Union[Dataset, DatasetDict, IterableDataset, IterableDatasetDict]:
if not isinstance(data, (str, Path)):
raise ValueError(f"Unsupported data type: {type(data)} given for {data}. ")
Expand Down
1 change: 1 addition & 0 deletions src/guidellm/dataset/hf_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def handle_create(
processor: Optional[Union[str, Path, PreTrainedTokenizerBase]], # noqa: ARG003
processor_args: Optional[dict[str, Any]], # noqa: ARG003
random_seed: int, # noqa: ARG003
max_requests: Optional[int] = None, # noqa: ARG003
) -> Union[Dataset, DatasetDict, IterableDataset, IterableDatasetDict]:
if isinstance(data, (str, Path)):
data = load_dataset(data, **(data_args or {}))
Expand Down
1 change: 1 addition & 0 deletions src/guidellm/dataset/in_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def handle_create(
processor: Optional[Union[str, Path, PreTrainedTokenizerBase]], # noqa: ARG003
processor_args: Optional[dict[str, Any]], # noqa: ARG003
random_seed: int, # noqa: ARG003
max_requests: Optional[int] = None, # noqa: ARG003
) -> Union[Dataset, DatasetDict, IterableDataset, IterableDatasetDict]:
if not isinstance(data, Iterable):
raise TypeError(
Expand Down
3 changes: 3 additions & 0 deletions src/guidellm/dataset/synthetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ def handle_create(
processor: Optional[Union[str, Path, PreTrainedTokenizerBase]],
processor_args: Optional[dict[str, Any]],
random_seed: int,
max_requests: Optional[int] = None,
) -> Union[Dataset, DatasetDict, IterableDataset, IterableDatasetDict]:
processor = check_load_processor(
processor,
Expand All @@ -262,6 +263,8 @@ def handle_create(
)

config = SyntheticDatasetConfig.parse_str(data)
if "samples=" not in str(data) and max_requests is not None:
config.samples = max_requests
generator = SyntheticTextItemsGenerator(config, processor, random_seed)
items = list(generator)

Expand Down
4 changes: 3 additions & 1 deletion src/guidellm/request/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def __init__(
shuffle: bool = True,
iter_type: Literal["finite", "infinite"] = "finite",
random_seed: int = 42,
max_requests: Optional[int] = None,
):
self.data = data
self.data_args = data_args
Expand All @@ -93,6 +94,7 @@ def __init__(
processor,
processor_args,
random_seed,
max_requests=max_requests,
)
self.dataset = dataset
self.processor = processor
Expand Down Expand Up @@ -281,4 +283,4 @@ def _create_request(self, item: dict[str, Any]) -> GenerationRequest:
constraints=(
{"output_tokens": output_tokens} if output_tokens is not None else {}
),
)
)