Remove is_vocabulary_tfrecord_supported and remove instances of raising an error when vocabulary is called with 'tfrecord_gzip' format when it's not.

zoyahav · tfx-copybara · commit 3147adf6cf38 · 2022-12-06T16:44:32.000-08:00
PiperOrigin-RevId: 493447110
diff --git a/tensorflow_transform/analyzers.py b/tensorflow_transform/analyzers.py
@@ -2010,11 +2010,6 @@ def _vocabulary_analyzer_nodes(
     vocabulary_key: Optional[str] = None
 ) -> common_types.TemporaryAnalyzerOutputType:
   """Internal helper for analyzing vocab. See `vocabulary` doc string."""
-  if (file_format == 'tfrecord_gzip' and
-      not tf_utils.is_vocabulary_tfrecord_supported()):
-    raise ValueError(
-        'Vocabulary file_format "tfrecord_gzip" not yet supported for '
-        f'{tf.version.VERSION}.')
 
   input_values_node = analyzer_nodes.get_input_tensors_value_nodes(
       analyzer_inputs)
diff --git a/tensorflow_transform/beam/vocabulary_tfrecord_gzip_integration_test.py b/tensorflow_transform/beam/vocabulary_tfrecord_gzip_integration_test.py
@@ -14,40 +14,13 @@
 # limitations under the License.
 """Tests for tfrecord_gzip tft.vocabulary and tft.compute_and_apply_vocabulary."""
 
-import tensorflow as tf
-from tensorflow_transform import tf2_utils
-from tensorflow_transform import tf_utils
 from tensorflow_transform.beam import tft_unit
 from tensorflow_transform.beam import vocabulary_integration_test
 
-import unittest
-
-mock = tf.compat.v1.test.mock
-
 
 class TFRecordVocabularyIntegrationTest(
     vocabulary_integration_test.VocabularyIntegrationTest):
 
-  def setUp(self):
-    # TODO(b/164921571): Remove mock once tfrecord vocabularies are supported in
-    # all TF versions.
-    if not tf2_utils.use_tf_compat_v1(force_tf_compat_v1=False):
-      self.is_vocabulary_tfrecord_supported_patch = mock.patch(
-          'tensorflow_transform.tf_utils.is_vocabulary_tfrecord_supported')
-      mock_is_vocabulary_tfrecord_supported = (
-          self.is_vocabulary_tfrecord_supported_patch.start())
-      mock_is_vocabulary_tfrecord_supported.side_effect = lambda: True
-
-    if (tft_unit.is_external_environment() and
-        not tf_utils.is_vocabulary_tfrecord_supported()):
-      raise unittest.SkipTest('Test requires async DatasetInitializer')
-    super().setUp()
-
-  def tearDown(self):
-    if not tf2_utils.use_tf_compat_v1(force_tf_compat_v1=False):
-      self.is_vocabulary_tfrecord_supported_patch.stop()
-    super().tearDown()
-
   def _VocabFormat(self):
     return 'tfrecord_gzip'
 
diff --git a/tensorflow_transform/experimental/analyzers.py b/tensorflow_transform/experimental/analyzers.py
@@ -431,11 +431,6 @@ def _approximate_vocabulary_analyzer_nodes(
     file_format: common_types.VocabularyFileFormatType,
     vocabulary_key: str) -> common_types.TemporaryAnalyzerOutputType:
   """Internal helper for analyzing vocab. See `vocabulary` doc string."""
-  if (file_format == 'tfrecord_gzip' and
-      not tf_utils.is_vocabulary_tfrecord_supported()):
-    raise ValueError(
-        'Vocabulary file_format "tfrecord_gzip" requires TF version >= 2.4')
-
   # TODO(b/208879020): Add vocabulary size annotation for this analyzer.
   analyzers.register_vocab(
       vocab_filename, vocabulary_key=vocabulary_key, file_format=file_format)
diff --git a/tensorflow_transform/mappers.py b/tensorflow_transform/mappers.py
@@ -1088,11 +1088,6 @@ def apply_vocabulary(
     starting from zero, and string value not in the vocabulary is
     assigned default_value.
   """
-  if (file_format == 'tfrecord_gzip' and
-      not tf_utils.is_vocabulary_tfrecord_supported()):
-    raise ValueError(
-        'Vocabulary file_format "tfrecord_gzip" not yet supported for '
-        f'{tf.version.VERSION}.')
   with tf.compat.v1.name_scope(name, 'apply_vocab'):
     if x.dtype != tf.string and not x.dtype.is_integer:
       raise ValueError('expected tf.string or tf.int[8|16|32|64] but got %r' %
diff --git a/tensorflow_transform/tf_utils.py b/tensorflow_transform/tf_utils.py
@@ -17,7 +17,6 @@
 import enum
 from typing import Callable, Optional, Tuple, Union
 
-from packaging import version
 import tensorflow as tf
 from tensorflow_transform import annotators
 from tensorflow_transform import common_types
@@ -536,16 +535,6 @@ def reorder_histogram(bucket_vocab: tf.Tensor, counts: tf.Tensor,
   return tf.gather(counts, ordering)
 
 
-# TODO(b/62379925): Remove this once all supported TF versions have
-# tf.data.experimental.DatasetInitializer.
-def is_vocabulary_tfrecord_supported() -> bool:
-  if isinstance(ops.get_default_graph(), func_graph.FuncGraph):
-    return False
-  return ((hasattr(tf.data.experimental, 'DatasetInitializer') or
-           hasattr(tf.lookup.experimental, 'DatasetInitializer')) and
-          version.parse(tf.version.VERSION) >= version.parse('2.4'))
-
-
 # Used to decide which bucket boundary index to assign to a value.
 class Side(enum.Enum):
   RIGHT = 'right'
diff --git a/tensorflow_transform/tf_utils_test.py b/tensorflow_transform/tf_utils_test.py
@@ -23,7 +23,6 @@
 from tensorflow_transform import tf_utils
 from tensorflow_transform import test_case
 
-import unittest
 from tensorflow.python.framework import composite_tensor  # pylint: disable=g-direct-tensorflow-import
 
 _CONSTRUCT_TABLE_PARAMETERS = [
@@ -2273,12 +2272,6 @@ def _to_idf(df, corpus_size):
 
 class VocabTFUtilsTest(test_case.TransformTestCase):
 
-  def setUp(self):
-    if (not tf_utils.is_vocabulary_tfrecord_supported() and
-        test_case.is_external_environment()):
-      raise unittest.SkipTest('Test requires DatasetInitializer')
-    super().setUp()
-
   def _write_tfrecords(self, path, bytes_records):
     with tf.io.TFRecordWriter(path, 'GZIP') as writer:
       for record in bytes_records: