Support creating text-mode detectors

tyler-romero · tyler-romero · commit b364c6f371a9 · 2025-05-14T17:37:24.000-07:00
diff --git a/src/groundlight/experimental_api.py b/src/groundlight/experimental_api.py
@@ -30,6 +30,7 @@
 from groundlight_openapi_client.model.payload_template_request import PayloadTemplateRequest
 from groundlight_openapi_client.model.rule_request import RuleRequest
 from groundlight_openapi_client.model.status_enum import StatusEnum
+from groundlight_openapi_client.model.text_mode_configuration import TextModeConfiguration
 from groundlight_openapi_client.model.webhook_action_request import WebhookActionRequest
 from model import (
     ROI,
@@ -1053,6 +1054,60 @@ def create_bounding_box_detector(  # noqa: PLR0913 # pylint: disable=too-many-ar
         obj = self.detectors_api.create_detector(detector_creation_input, _request_timeout=DEFAULT_REQUEST_TIMEOUT)
         return Detector.parse_obj(obj.to_dict())
 
+    def create_text_recognition_detector(
+        self,
+        name: str,
+        query: str,
+        *,
+        group_name: Optional[str] = None,
+        confidence_threshold: Optional[float] = None,
+        patience_time: Optional[float] = None,
+        pipeline_config: Optional[str] = None,
+        metadata: Union[dict, str, None] = None,
+    ) -> Detector:
+        """
+        Creates a text recognition detector that can read specified spans of text from images.
+
+        **Example usage**::
+
+            gl = ExperimentalApi()
+
+            # Create a text recognition detector
+            detector = gl.create_text_recognition_detector(
+                name="date_and_time_detector",
+                query="Read the date and time from the bottom left corner of the image.",
+            )
+
+        :param name: A short, descriptive name for the detector.
+        :param query: A question about the object to detect in the image.
+        :param group_name: Optional name of a group to organize related detectors together.
+        :param confidence_threshold: A value that sets the minimum confidence level required for the ML model's
+                            predictions. If confidence is below this threshold, the query may be sent for human review.
+        :param patience_time: The maximum time in seconds that Groundlight will attempt to generate a
+                            confident prediction before falling back to human review. Defaults to 30 seconds.
+        :param pipeline_config: Advanced usage only. Configuration string needed to instantiate a specific
+                              prediction pipeline for this detector.
+        :param metadata: A dictionary or JSON string containing custom key/value pairs to associate with
+
+        :return: The created Detector object
+        """
+
+        detector_creation_input = self._prep_create_detector(
+            name=name,
+            query=query,
+            group_name=group_name,
+            confidence_threshold=confidence_threshold,
+            patience_time=patience_time,
+            pipeline_config=pipeline_config,
+            metadata=metadata,
+        )
+        detector_creation_input.mode = ModeEnum.TEXT
+        mode_config = TextModeConfiguration()
+
+        detector_creation_input.mode_configuration = mode_config
+        obj = self.detectors_api.create_detector(detector_creation_input, _request_timeout=DEFAULT_REQUEST_TIMEOUT)
+        return Detector.parse_obj(obj.to_dict())
+
     def _download_mlbinary_url(self, detector: Union[str, Detector]) -> EdgeModelInfo:
         """
         Gets a temporary presigned URL to download the model binaries for the given detector, along
diff --git a/test/unit/test_experimental.py b/test/unit/test_experimental.py
@@ -147,6 +147,23 @@ def test_multiclass_detector(gl_experimental: ExperimentalApi):
     assert mc_iq.result.label in class_names
 
 
+@pytest.mark.skip(
+    reason=(
+        "General users currently currently can't use text recognition detectors. If you have questions, reach out"
+        " to Groundlight support, or upgrade your plan."
+    )
+)
+def test_text_recognition_detector(gl_experimental: ExperimentalApi):
+    """
+    verify that we can create and submit to a text recognition detector
+    """
+    name = f"Test {datetime.utcnow()}"
+    created_detector = gl_experimental.create_text_recognition_detector(name, "What is the date and time?")
+    assert created_detector is not None
+    mc_iq = gl_experimental.submit_image_query(created_detector, "test/assets/dog.jpeg")
+    assert mc_iq.result.label is not None
+
+
 @pytest.mark.skip(
     reason=(
         "General users currently currently can't use bounding box detectors. If you have questions, reach out"
diff --git a/test/unit/test_labels.py b/test/unit/test_labels.py
@@ -64,3 +64,21 @@ def test_multiclass_labels(gl_experimental: ExperimentalApi):
     assert iq1.result.label == "cherry"
     with pytest.raises(ApiException) as _:
         gl_experimental.add_label(iq1, "MAYBE")
+
+
+def test_text_recognition_labels(gl_experimental: ExperimentalApi):
+    name = f"Test text recognition labels{datetime.utcnow()}"
+    det = gl_experimental.create_text_recognition_detector(name, "test_query")
+    iq1 = gl_experimental.submit_image_query(det, "test/assets/cat.jpeg")
+    gl_experimental.add_label(iq1, "apple text")
+    iq1 = gl_experimental.get_image_query(iq1.id)
+    assert iq1.result.label == "apple text"
+    gl_experimental.add_label(iq1, "banana text")
+    iq1 = gl_experimental.get_image_query(iq1.id)
+    assert iq1.result.label == "banana text"
+    gl_experimental.add_label(iq1, "")
+    iq1 = gl_experimental.get_image_query(iq1.id)
+    assert iq1.result.label == ""
+
+    with pytest.raises(ApiException) as _:
+        gl_experimental.add_label(iq1, "MAYBE")