Skip to content

Commit 9f037e9

Browse files
author
The TensorFlow Datasets Authors
committed
Add nondeterministic_order to dataset_info.py
PiperOrigin-RevId: 691798490
1 parent 7104383 commit 9f037e9

File tree

2 files changed

+19
-1
lines changed

2 files changed

+19
-1
lines changed

tensorflow_datasets/core/dataset_info.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ def __init__(
186186
features: feature_lib.FeatureConnector | None = None,
187187
supervised_keys: SupervisedKeysType | None = None,
188188
disable_shuffling: bool = False,
189+
nondeterministic_order: bool = False,
189190
homepage: str | None = None,
190191
citation: str | None = None,
191192
metadata: Metadata | None = None,
@@ -228,7 +229,11 @@ def __init__(
228229
229230
Note that selecting features in nested `tfds.features.FeaturesDict`
230231
objects is not supported.
231-
disable_shuffling: `bool`, specify whether to shuffle the examples.
232+
disable_shuffling: `bool`, specifies whether to shuffle the examples.
233+
nondeterministic_order: `bool`, if True and the dataset uses beam, it will
234+
use `NoShuffleBeamWriter` which does not assure deterministic
235+
shuffling when writing' examples to disk. This might result in quicker
236+
dataset preparation.
232237
homepage: `str`, optional, the homepage for this dataset.
233238
citation: `str`, optional, the citation to use for this dataset.
234239
metadata: `tfds.core.Metadata`, additonal object which will be
@@ -268,6 +273,7 @@ def __init__(
268273
version=str(self._identity.version),
269274
release_notes=self._identity.release_notes,
270275
disable_shuffling=disable_shuffling,
276+
nondeterministic_order=nondeterministic_order,
271277
config_name=self._identity.config_name,
272278
config_description=self._identity.config_description,
273279
config_tags=self._identity.config_tags,
@@ -342,6 +348,7 @@ def from_proto(
342348
features=features,
343349
supervised_keys=supervised_keys,
344350
disable_shuffling=proto.disable_shuffling,
351+
nondeterministic_order=proto.nondeterministic_order,
345352
citation=proto.citation,
346353
license=proto.redistribution_info.license,
347354
split_dict=splits_lib.SplitDict.from_proto(
@@ -400,6 +407,13 @@ def release_notes(self) -> dict[str, str] | None:
400407
def disable_shuffling(self) -> bool:
401408
return self.as_proto.disable_shuffling
402409

410+
@property
411+
def nondeterministic_order(self) -> bool:
412+
return self._info_proto.nondeterministic_order
413+
414+
def set_nondeterministic_order(self, nondeterministic_order: bool) -> None:
415+
self._info_proto.nondeterministic_order = nondeterministic_order
416+
403417
@property
404418
def homepage(self) -> str:
405419
urls = self.as_proto.location.urls
@@ -923,6 +937,7 @@ def __repr__(self):
923937
("features", _indent(repr(self.features))),
924938
("supervised_keys", self.supervised_keys),
925939
("disable_shuffling", self.disable_shuffling),
940+
("nondeterministic_order", self.nondeterministic_order),
926941
("splits", splits),
927942
("citation", _indent(f'"""{self.citation}"""')),
928943
# Proto add a \n that we strip.
@@ -940,6 +955,7 @@ def __getstate__(self):
940955
"features": self.features,
941956
"supervised_keys": self.supervised_keys,
942957
"disable_shuffling": self.disable_shuffling,
958+
"nondeterministic_order": self.nondeterministic_order,
943959
"homepage": self.homepage,
944960
"citation": self.citation,
945961
"metadata": self.metadata,
@@ -956,6 +972,7 @@ def __setstate__(self, state):
956972
features=state["features"],
957973
supervised_keys=state["supervised_keys"],
958974
disable_shuffling=state["disable_shuffling"],
975+
nondeterministic_order=state["nondeterministic_order"],
959976
homepage=state["homepage"],
960977
citation=state["citation"],
961978
metadata=state["metadata"],

tensorflow_datasets/core/dataset_info_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -818,6 +818,7 @@ def test_get_split_info_from_proto_unavailable_format(self):
818818
}),
819819
supervised_keys=('image', 'label'),
820820
disable_shuffling=False,
821+
nondeterministic_order=False,
821822
splits={
822823
'test': <SplitInfo num_examples=20, num_shards=1>,
823824
'train': <SplitInfo num_examples=20, num_shards=1>,

0 commit comments

Comments
 (0)