Use MappedItem in ML processors

digitalmethodsinitiative · Feb 20, 2024 · 7119862 · 7119862
1 parent 32b8790
commit 7119862
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 5 deletions.
diff --git a/processors/machine-learning/clip_categorize_images.py b/processors/machine-learning/clip_categorize_images.py
@@ -10,6 +10,7 @@
 from common.lib.exceptions import ProcessorInterruptedException
 from common.lib.user_input import UserInput
 from common.config_manager import config
+from common.lib.item_mapping import MappedItem
 
 __author__ = "Dale Wahl"
 __credits__ = ["Dale Wahl"]
@@ -246,15 +247,15 @@ def map_item(item):
             top_cats.append(cat)
             percent += cat[1]
         all_cats = {cat[0]: cat[1] for cat in item.get("categories", [])}
-        return {
+        return MappedItem({
             "id": item.get("id"),
             "top_categories": ", ".join([f"{cat[0]}: {100* cat[1]:.2f}%" for cat in top_cats]),
             "original_url": image_metadata.get("url", ""),
             "image_filename": image_metadata.get("filename", ""),
             "post_ids": ", ".join([str(post_id) for post_id in image_metadata.get("post_ids", [])]),
             "from_dataset": image_metadata.get("from_dataset", ""),
             **all_cats
-        }
+        })
 
     @staticmethod
     def count_result_files(directory):

diff --git a/processors/machine-learning/text_from_image.py b/processors/machine-learning/text_from_image.py
@@ -14,6 +14,7 @@
 from common.lib.helpers import UserInput, convert_to_int
 from backend.lib.processor import BasicProcessor
 from common.lib.exceptions import ProcessorInterruptedException, ProcessorException
+from common.lib.item_mapping import MappedItem
 
 __author__ = "Dale Wahl"
 __credits__ = ["Dale Wahl"]
@@ -258,4 +259,10 @@ def map_item(item):
         """
         For preview frontend
         """
-        return {"filename": item.get("filename"), "model_type": item.get("model_type"), "text": item.get("simplified_text", {}).get("raw_text"), "post_ids": ", ".join([str(post_id) for post_id in item.get("image_metadata", {}).get("post_ids", [])]), "image_url": item.get("image_metadata", {}).get("url")}
+        return MappedItem({
+            "filename": item.get("filename"),
+            "model_type": item.get("model_type"),
+            "text": item.get("simplified_text", {}).get("raw_text"),
+            "post_ids": ", ".join([str(post_id) for post_id in item.get("image_metadata", {}).get("post_ids", [])]),
+            "image_url": item.get("image_metadata", {}).get("url")
+        })
diff --git a/processors/machine-learning/whisper_speech_to_text.py b/processors/machine-learning/whisper_speech_to_text.py
@@ -9,6 +9,7 @@
 from common.lib.exceptions import ProcessorException, ProcessorInterruptedException
 from common.lib.user_input import UserInput
 from common.config_manager import config
+from common.lib.item_mapping import MappedItem
 
 __author__ = "Dale Wahl"
 __credits__ = ["Dale Wahl"]
@@ -251,7 +252,7 @@ def map_item(item):
         """
         fourcat_metadata = item.get("4CAT_metadata")
         audio_metadata = fourcat_metadata.get("audio_metadata")
-        return {
+        return MappedItem({
             "id": fourcat_metadata.get("audio_id"),
             "body": item.get("text", ""),
             "language": item.get("language", ""),
@@ -261,4 +262,4 @@ def map_item(item):
             "original_video_url": audio_metadata.get("url", ""),
             "post_ids": ", ".join(audio_metadata.get("post_ids", [])),
             "from_dataset": audio_metadata.get("from_dataset", "")
-        }
+        })