Skip to content

Commit

Permalink
Use MappedItem in ML processors
Browse files Browse the repository at this point in the history
  • Loading branch information
stijn-uva committed Feb 20, 2024
1 parent 32b8790 commit 7119862
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 5 deletions.
5 changes: 3 additions & 2 deletions processors/machine-learning/clip_categorize_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from common.lib.exceptions import ProcessorInterruptedException
from common.lib.user_input import UserInput
from common.config_manager import config
from common.lib.item_mapping import MappedItem

__author__ = "Dale Wahl"
__credits__ = ["Dale Wahl"]
Expand Down Expand Up @@ -246,15 +247,15 @@ def map_item(item):
top_cats.append(cat)
percent += cat[1]
all_cats = {cat[0]: cat[1] for cat in item.get("categories", [])}
return {
return MappedItem({
"id": item.get("id"),
"top_categories": ", ".join([f"{cat[0]}: {100* cat[1]:.2f}%" for cat in top_cats]),
"original_url": image_metadata.get("url", ""),
"image_filename": image_metadata.get("filename", ""),
"post_ids": ", ".join([str(post_id) for post_id in image_metadata.get("post_ids", [])]),
"from_dataset": image_metadata.get("from_dataset", ""),
**all_cats
}
})

@staticmethod
def count_result_files(directory):
Expand Down
9 changes: 8 additions & 1 deletion processors/machine-learning/text_from_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from common.lib.helpers import UserInput, convert_to_int
from backend.lib.processor import BasicProcessor
from common.lib.exceptions import ProcessorInterruptedException, ProcessorException
from common.lib.item_mapping import MappedItem

__author__ = "Dale Wahl"
__credits__ = ["Dale Wahl"]
Expand Down Expand Up @@ -258,4 +259,10 @@ def map_item(item):
"""
For preview frontend
"""
return {"filename": item.get("filename"), "model_type": item.get("model_type"), "text": item.get("simplified_text", {}).get("raw_text"), "post_ids": ", ".join([str(post_id) for post_id in item.get("image_metadata", {}).get("post_ids", [])]), "image_url": item.get("image_metadata", {}).get("url")}
return MappedItem({
"filename": item.get("filename"),
"model_type": item.get("model_type"),
"text": item.get("simplified_text", {}).get("raw_text"),
"post_ids": ", ".join([str(post_id) for post_id in item.get("image_metadata", {}).get("post_ids", [])]),
"image_url": item.get("image_metadata", {}).get("url")
})
5 changes: 3 additions & 2 deletions processors/machine-learning/whisper_speech_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from common.lib.exceptions import ProcessorException, ProcessorInterruptedException
from common.lib.user_input import UserInput
from common.config_manager import config
from common.lib.item_mapping import MappedItem

__author__ = "Dale Wahl"
__credits__ = ["Dale Wahl"]
Expand Down Expand Up @@ -251,7 +252,7 @@ def map_item(item):
"""
fourcat_metadata = item.get("4CAT_metadata")
audio_metadata = fourcat_metadata.get("audio_metadata")
return {
return MappedItem({
"id": fourcat_metadata.get("audio_id"),
"body": item.get("text", ""),
"language": item.get("language", ""),
Expand All @@ -261,4 +262,4 @@ def map_item(item):
"original_video_url": audio_metadata.get("url", ""),
"post_ids": ", ".join(audio_metadata.get("post_ids", [])),
"from_dataset": audio_metadata.get("from_dataset", "")
}
})

0 comments on commit 7119862

Please sign in to comment.