Skip to content

Commit

Permalink
update video hasher and create network vis and allow category wall pr…
Browse files Browse the repository at this point in the history
…ocessor to group similar vids
  • Loading branch information
dale-wahl committed Dec 14, 2023
1 parent ad59aab commit a4e6904
Show file tree
Hide file tree
Showing 2 changed files with 286 additions and 24 deletions.
23 changes: 18 additions & 5 deletions processors/visualisation/image_category_wall.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ class ImageWallGenerator(BasicProcessor):

number_of_ranges = 10 # number of ranges to use for numeric categories

image_datasets = ["image-downloader", "video-hasher-1"]

config = {
"image-visuals.max_per_cat": {
"type": UserInput.OPTION_TEXT,
Expand All @@ -62,7 +64,10 @@ def is_compatible_with(cls, module=None, user=None):
:param module: Dataset or processor to determine compatibility with
"""
return module.type.startswith("image-to-categories") or module.type.startswith("image-downloader")
return module.type.startswith("image-to-categories") or \
module.type.startswith("image-downloader") or \
module.type.startswith("video-hasher-1") or \
module.type.startswith("video-hash-similarity-matrix")

@classmethod
def get_options(cls, parent_dataset=None, user=None):
Expand Down Expand Up @@ -123,10 +128,10 @@ def identity_dataset_types(source_dataset):
"""
Identify dataset types that are compatible with this processor
"""
if source_dataset.type.startswith("image-downloader"):
if any([source_dataset.type.startswith(dataset_prefix) for dataset_prefix in ImageWallGenerator.image_datasets]):
image_dataset = source_dataset
category_dataset = source_dataset.top_parent()
elif source_dataset.get_parent().type.startswith("image-downloader"):
elif any([source_dataset.get_parent().type.startswith(dataset_prefix) for dataset_prefix in ImageWallGenerator.image_datasets]):
image_dataset = source_dataset.get_parent()
category_dataset = source_dataset
else:
Expand All @@ -147,6 +152,7 @@ def process(self):
if image_dataset.num_rows == 0 or category_dataset == 0:
self.dataset.finish_with_error("No images/categories available to render to image wall.")
return
self.dataset.log(f"Found {image_dataset.type} w/ {image_dataset.num_rows} images and {category_dataset.type} w/ {category_dataset.num_rows} items")

category_column = self.parameters.get("category")
if category_column is None:
Expand All @@ -167,7 +173,10 @@ def process(self):
staging_area = self.unpack_archive_contents(image_dataset.get_results_path())

# Map post IDs to filenames
if special_case:
if image_dataset.type == "video-hasher-1":
# We know the post ID is the filename.stem as this dataset is derived from the image dataset
filename_map = {filename.stem + ".mp4": filename for filename in staging_area.iterdir()}
elif special_case:
# We know the post ID is the filename.stem as this dataset is derived from the image dataset
filename_map = {filename.stem: filename for filename in staging_area.iterdir()}
else:
Expand All @@ -176,7 +185,7 @@ def process(self):
image_data = json.load(file)
filename_map = {post_id: staging_area.joinpath(image.get("filename")) for image in image_data.values()
if image.get("success") for post_id in image.get("post_ids")}

self.dataset.log(filename_map)
# Organize posts into categories
category_type = None
categories = {}
Expand Down Expand Up @@ -231,6 +240,10 @@ def process(self):
raise ProcessorException(
f"Mixed category types detected; unable to render image wall (item {i} {post_category})")

if len(categories) == 0:
self.dataset.finish_with_error("No categories found")
return

# Sort collected category results as needed
self.dataset.update_status("Sorting categories")
if special_case and category_column == "top_categories":
Expand Down
Loading

0 comments on commit a4e6904

Please sign in to comment.