Skip to content

Commit

Permalink
Fix bug in duplicate image detection
Browse files Browse the repository at this point in the history
Let's skip the image if we're unable to identify it.
  • Loading branch information
bpepple committed Mar 21, 2024
1 parent 62ceeff commit 4ac9e3c
Showing 1 changed file with 23 additions and 12 deletions.
35 changes: 23 additions & 12 deletions metrontagger/duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def _image_hashes(self: "Duplicates") -> list[dict[str, any]]:
"""Method to get a list of dicts containing the file path, page index, and page hashes."""
hashes_lst = []
for item in self._file_lst:
comic = Comic(str(item))
comic = Comic(item)
if not comic.is_writable():
questionary.print(f"'{comic}' is not writable. Skipping...")
continue
Expand All @@ -44,17 +44,28 @@ def _image_hashes(self: "Duplicates") -> list[dict[str, any]]:
style=Styles.WARNING,
)
for i in range(comic.get_number_of_pages()):
with Image.open(io.BytesIO(comic.get_page(i))) as img:
try:
img_hash = average_hash(img)
except OSError:
questionary.print(
f"Unable to get image hash for page {i} of '{comic}'",
style=Styles.ERROR,
)
continue
image_info = {"path": str(comic.path), "index": i, "hash": str(img_hash)}
hashes_lst.append(image_info)
try:
with Image.open(io.BytesIO(comic.get_page(i))) as img:
try:
img_hash = average_hash(img)
except OSError:
questionary.print(
f"Unable to get image hash for page {i} of '{comic}'",
style=Styles.ERROR,
)
continue
image_info = {
"path": str(comic.path),
"index": i,
"hash": str(img_hash),
}
hashes_lst.append(image_info)
except UnidentifiedImageError:
questionary.print(
f"UnidentifiedImageError: Skipping page {i} of '{comic}'",
style=Styles.ERROR,
)
continue
return hashes_lst

def _get_page_hashes(self: "Duplicates") -> pd.DataFrame:
Expand Down

0 comments on commit 4ac9e3c

Please sign in to comment.