17
17
18
18
import abc
19
19
import collections
20
- from collections .abc import Iterator
21
20
import contextlib
22
21
import functools
23
22
import importlib
24
23
import inspect
25
24
import os .path
26
- import time
27
- from typing import ClassVar , Type
25
+ from typing import ClassVar , Dict , Iterator , List , Type , Text , Tuple
28
26
29
- from absl import logging
30
27
from etils import epath
31
28
from tensorflow_datasets .core import constants
32
29
from tensorflow_datasets .core import naming
33
30
from tensorflow_datasets .core import visibility
34
- import tensorflow_datasets .core .logging as _tfds_logging
35
- from tensorflow_datasets .core .logging import call_metadata as _call_metadata
36
31
from tensorflow_datasets .core .utils import py_utils
37
32
from tensorflow_datasets .core .utils import resource_utils
38
33
43
38
# <str snake_cased_name, abstract DatasetBuilder subclass>
44
39
_ABSTRACT_DATASET_REGISTRY = {}
45
40
46
- # Keep track of dict [str (module name), list [DatasetBuilder]]
41
+ # Keep track of Dict [str (module name), List [DatasetBuilder]]
47
42
# This is directly accessed by `tfds.community.builder_cls_from_module` when
48
43
# importing community packages.
49
44
_MODULE_TO_DATASETS = collections .defaultdict (list )
56
51
# <str snake_cased_name, abstract DatasetCollectionBuilder subclass>
57
52
_ABSTRACT_DATASET_COLLECTION_REGISTRY = {}
58
53
59
- # Keep track of dict [str (module name), list [DatasetCollectionBuilder]]
54
+ # Keep track of Dict [str (module name), List [DatasetCollectionBuilder]]
60
55
_MODULE_TO_DATASET_COLLECTIONS = collections .defaultdict (list )
61
56
62
57
# eg for dataset "foo": "tensorflow_datasets.datasets.foo.foo_dataset_builder".
@@ -85,70 +80,6 @@ def skip_registration() -> Iterator[None]:
85
80
_skip_registration = False
86
81
87
82
88
- @functools .cache
89
- def _import_legacy_builders () -> None :
90
- """Imports legacy builders."""
91
- modules_to_import = [
92
- 'audio' ,
93
- 'graphs' ,
94
- 'image' ,
95
- 'image_classification' ,
96
- 'object_detection' ,
97
- 'nearest_neighbors' ,
98
- 'question_answering' ,
99
- 'd4rl' ,
100
- 'ranking' ,
101
- 'recommendation' ,
102
- 'rl_unplugged' ,
103
- 'rlds.datasets' ,
104
- 'robotics' ,
105
- 'robomimic' ,
106
- 'structured' ,
107
- 'summarization' ,
108
- 'text' ,
109
- 'text_simplification' ,
110
- 'time_series' ,
111
- 'translate' ,
112
- 'video' ,
113
- 'vision_language' ,
114
- ]
115
-
116
- before_dataset_imports = time .time ()
117
- metadata = _call_metadata .CallMetadata ()
118
- metadata .start_time_micros = int (before_dataset_imports * 1e6 )
119
- try :
120
- # For builds that don't include all dataset builders, we don't want to fail
121
- # on import errors of dataset builders.
122
- try :
123
- for module in modules_to_import :
124
- importlib .import_module (f'tensorflow_datasets.{ module } ' )
125
- except (ImportError , ModuleNotFoundError ):
126
- pass
127
-
128
- except Exception as exception : # pylint: disable=broad-except
129
- metadata .mark_error ()
130
- logging .exception (exception )
131
- finally :
132
- import_time_ms_dataset_builders = int (
133
- (time .time () - before_dataset_imports ) * 1000
134
- )
135
- metadata .mark_end ()
136
- _tfds_logging .tfds_import (
137
- metadata = metadata ,
138
- import_time_ms_tensorflow = 0 ,
139
- import_time_ms_dataset_builders = import_time_ms_dataset_builders ,
140
- )
141
-
142
-
143
- @functools .cache
144
- def _import_dataset_collections () -> None :
145
- """Imports dataset collections."""
146
- try :
147
- importlib .import_module ('tensorflow_datasets.dataset_collections' )
148
- except (ImportError , ModuleNotFoundError ):
149
- pass
150
-
151
-
152
83
# The implementation of this class follows closely RegisteredDataset.
153
84
class RegisteredDatasetCollection (abc .ABC ):
154
85
"""Subclasses will be registered and given a `name` property."""
@@ -198,24 +129,23 @@ def __init_subclass__(cls, skip_registration=False, **kwargs): # pylint: disabl
198
129
_DATASET_COLLECTION_REGISTRY [cls .name ] = cls
199
130
200
131
201
- def list_imported_dataset_collections () -> list [str ]:
132
+ def list_imported_dataset_collections () -> List [str ]:
202
133
"""Returns the string names of all `tfds.core.DatasetCollection`s."""
203
- _import_dataset_collections ()
204
- all_dataset_collections = list (_DATASET_COLLECTION_REGISTRY .keys ())
134
+ all_dataset_collections = [
135
+ dataset_collection_name
136
+ for dataset_collection_name , dataset_collection_cls in _DATASET_COLLECTION_REGISTRY .items ()
137
+ ]
205
138
return sorted (all_dataset_collections )
206
139
207
140
208
141
def is_dataset_collection (name : str ) -> bool :
209
- _import_dataset_collections ()
210
142
return name in _DATASET_COLLECTION_REGISTRY
211
143
212
144
213
145
def imported_dataset_collection_cls (
214
146
name : str ,
215
147
) -> Type [RegisteredDatasetCollection ]:
216
148
"""Returns the Registered dataset class."""
217
- _import_dataset_collections ()
218
-
219
149
if name in _ABSTRACT_DATASET_COLLECTION_REGISTRY :
220
150
raise AssertionError (f'DatasetCollection { name } is an abstract class.' )
221
151
@@ -294,9 +224,8 @@ def _is_builder_available(builder_cls: Type[RegisteredDataset]) -> bool:
294
224
return visibility .DatasetType .TFDS_PUBLIC .is_available ()
295
225
296
226
297
- def list_imported_builders () -> list [str ]:
227
+ def list_imported_builders () -> List [str ]:
298
228
"""Returns the string names of all `tfds.core.DatasetBuilder`s."""
299
- _import_legacy_builders ()
300
229
all_builders = [
301
230
builder_name
302
231
for builder_name , builder_cls in _DATASET_REGISTRY .items ()
@@ -307,8 +236,8 @@ def list_imported_builders() -> list[str]:
307
236
308
237
@functools .lru_cache (maxsize = None )
309
238
def _get_existing_dataset_packages (
310
- datasets_dir : str ,
311
- ) -> dict [ str , tuple [epath .Path , str ]]:
239
+ datasets_dir : Text ,
240
+ ) -> Dict [ Text , Tuple [epath .Path , Text ]]:
312
241
"""Returns existing datasets.
313
242
314
243
Args:
@@ -364,12 +293,7 @@ def imported_builder_cls(name: str) -> Type[RegisteredDataset]:
364
293
raise AssertionError (f'Dataset { name } is an abstract class.' )
365
294
366
295
if name not in _DATASET_REGISTRY :
367
- # Dataset not found in the registry, try to import legacy builders.
368
- # Dataset builders are imported lazily to avoid slowing down the startup
369
- # of the binary.
370
- _import_legacy_builders ()
371
- if name not in _DATASET_REGISTRY :
372
- raise DatasetNotFoundError (f'Dataset { name } not found.' )
296
+ raise DatasetNotFoundError (f'Dataset { name } not found.' )
373
297
374
298
builder_cls = _DATASET_REGISTRY [name ]
375
299
if not _is_builder_available (builder_cls ):
0 commit comments