Skip to content

Commit

Permalink
image_category_wall fix float categories
Browse files Browse the repository at this point in the history
  • Loading branch information
dale-wahl committed May 30, 2024
1 parent e0c55a8 commit 0087457
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 9 deletions.
2 changes: 1 addition & 1 deletion datasources/douyin/search_douyin.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def map_item(item):
"post_source_domain": urllib.parse.unquote(metadata.get("source_platform_url")),
# Adding this as different Douyin pages contain different data
"post_url": f"https://www.douyin.com/video/{item[aweme_id_key]}",
"region": item.get("region"),
"region": item.get("region", ""),
"hashtags": ",".join(
[tag[hashtag_key] for tag in (item[text_extra_key] if item[text_extra_key] is not None else []) if
hashtag_key in tag]),
Expand Down
19 changes: 11 additions & 8 deletions processors/visualisation/image_category_wall.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def process(self):
image_data = json.load(file)
filename_map = {post_id: staging_area.joinpath(image.get("filename")) for image in image_data.values()
if image.get("success") for post_id in image.get("post_ids")}
self.dataset.log(filename_map)

# Organize posts into categories
category_type = None
categories = {}
Expand All @@ -200,10 +200,7 @@ def process(self):
continue

# Identify category type and collect post_category
if post.get(category_column) is None:
self.dataset.finish_with_error("Unable to find category column in dataset")
return
elif special_case and category_column == "top_categories":
if special_case and category_column == "top_categories":
if category_type is None:
category_type = float
# Special case
Expand All @@ -218,8 +215,11 @@ def process(self):
else:
if category_type is None:
try:
float(post.get(category_column))
category_type = float
if post.get(category_column) is None:
category_type = str
else:
float(post.get(category_column))
category_type = float
except ValueError:
category_type = str

Expand All @@ -232,6 +232,9 @@ def process(self):
else:
categories[post_category].append({"id": post.get("id")})
elif category_type == float:
if post.get(category_column) is None:
self.dataset.log(f"Post {post.get('id')} has no data; skipping")
continue
try:
post_category = float(post.get(category_column))
post_values.append((post_category, post.get("id")))
Expand All @@ -240,7 +243,7 @@ def process(self):
raise ProcessorException(
f"Mixed category types detected; unable to render image wall (item {i} {post_category})")

if len(categories) == 0:
if len(categories) == 0 and len(post_values) == 0:
self.dataset.finish_with_error("No categories found")
return

Expand Down

0 comments on commit 0087457

Please sign in to comment.