Skip to content

Commit

Permalink
clip: handle new and old format
Browse files Browse the repository at this point in the history
  • Loading branch information
dale-wahl committed Jul 17, 2024
1 parent eb76937 commit 78698f6
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions processors/machine_learning/clip_categorize_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,14 +244,23 @@ def map_item(item):
:return:
"""
image_metadata = item.get("image_metadata")
# Updates to CLIP output; categories used to be a list of categories, but now is a dict with: {"predictions": [[category_label, precent_float],]}
categories = item.get("categories")
if type(categories) == list:
pass
elif type(categories) == dict and "predictions" in categories:
categories = categories.get("predictions")
else:
raise KeyError("Unexpected categories format; check NDJSON")

top_cats = []
percent = 0
for cat in item.get("categories", []):
for cat in categories:
if percent > .7:
break
top_cats.append(cat)
percent += cat[1]
all_cats = {cat[0]: cat[1] for cat in item.get("categories", [])}
all_cats = {cat[0]: cat[1] for cat in categories}
return MappedItem({
"id": item.get("id"),
"top_categories": ", ".join([f"{cat[0]}: {100* cat[1]:.2f}%" for cat in top_cats]),
Expand Down

0 comments on commit 78698f6

Please sign in to comment.