@@ -186,6 +186,7 @@ def __init__(
186
186
features : feature_lib .FeatureConnector | None = None ,
187
187
supervised_keys : SupervisedKeysType | None = None ,
188
188
disable_shuffling : bool = False ,
189
+ nondeterministic_order : bool = False ,
189
190
homepage : str | None = None ,
190
191
citation : str | None = None ,
191
192
metadata : Metadata | None = None ,
@@ -228,7 +229,11 @@ def __init__(
228
229
229
230
Note that selecting features in nested `tfds.features.FeaturesDict`
230
231
objects is not supported.
231
- disable_shuffling: `bool`, specify whether to shuffle the examples.
232
+ disable_shuffling: `bool`, specifies whether to shuffle the examples.
233
+ nondeterministic_order: `bool`, if True and the dataset uses beam, it will
234
+ use `NoShuffleBeamWriter` which does not assure deterministic
235
+ shuffling when writing' examples to disk. This might result in quicker
236
+ dataset preparation.
232
237
homepage: `str`, optional, the homepage for this dataset.
233
238
citation: `str`, optional, the citation to use for this dataset.
234
239
metadata: `tfds.core.Metadata`, additonal object which will be
@@ -268,6 +273,7 @@ def __init__(
268
273
version = str (self ._identity .version ),
269
274
release_notes = self ._identity .release_notes ,
270
275
disable_shuffling = disable_shuffling ,
276
+ nondeterministic_order = nondeterministic_order ,
271
277
config_name = self ._identity .config_name ,
272
278
config_description = self ._identity .config_description ,
273
279
config_tags = self ._identity .config_tags ,
@@ -342,6 +348,7 @@ def from_proto(
342
348
features = features ,
343
349
supervised_keys = supervised_keys ,
344
350
disable_shuffling = proto .disable_shuffling ,
351
+ nondeterministic_order = proto .nondeterministic_order ,
345
352
citation = proto .citation ,
346
353
license = proto .redistribution_info .license ,
347
354
split_dict = splits_lib .SplitDict .from_proto (
@@ -400,6 +407,13 @@ def release_notes(self) -> dict[str, str] | None:
400
407
def disable_shuffling (self ) -> bool :
401
408
return self .as_proto .disable_shuffling
402
409
410
+ @property
411
+ def nondeterministic_order (self ) -> bool :
412
+ return self ._info_proto .nondeterministic_order
413
+
414
+ def set_nondeterministic_order (self , nondeterministic_order : bool ) -> None :
415
+ self ._info_proto .nondeterministic_order = nondeterministic_order
416
+
403
417
@property
404
418
def homepage (self ) -> str :
405
419
urls = self .as_proto .location .urls
@@ -923,6 +937,7 @@ def __repr__(self):
923
937
("features" , _indent (repr (self .features ))),
924
938
("supervised_keys" , self .supervised_keys ),
925
939
("disable_shuffling" , self .disable_shuffling ),
940
+ ("nondeterministic_order" , self .nondeterministic_order ),
926
941
("splits" , splits ),
927
942
("citation" , _indent (f'"""{ self .citation } """' )),
928
943
# Proto add a \n that we strip.
@@ -940,6 +955,7 @@ def __getstate__(self):
940
955
"features" : self .features ,
941
956
"supervised_keys" : self .supervised_keys ,
942
957
"disable_shuffling" : self .disable_shuffling ,
958
+ "nondeterministic_order" : self .nondeterministic_order ,
943
959
"homepage" : self .homepage ,
944
960
"citation" : self .citation ,
945
961
"metadata" : self .metadata ,
@@ -956,6 +972,7 @@ def __setstate__(self, state):
956
972
features = state ["features" ],
957
973
supervised_keys = state ["supervised_keys" ],
958
974
disable_shuffling = state ["disable_shuffling" ],
975
+ nondeterministic_order = state ["nondeterministic_order" ],
959
976
homepage = state ["homepage" ],
960
977
citation = state ["citation" ],
961
978
metadata = state ["metadata" ],
0 commit comments