Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Source ID from ComicInfo.xml to Metadata #133

Merged
merged 8 commits into from
Nov 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion darkseid/comicinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,15 @@
Basic,
Credit,
ImageMetadata,
InfoSources,
Links,
Metadata,
Notes,
Publisher,
Role,
Series,
)
from darkseid.utils import list_to_string, xlate
from darkseid.utils import get_issue_id_from_note, list_to_string, xlate


class ComicInfo:
Expand Down Expand Up @@ -368,6 +369,10 @@ def get_age_rating(age_text: str) -> AgeRatings | None:
md.alternate_count = xlate(get("AlternateCount"), True)
md.comments = xlate(get("Summary"))
md.notes = get_note(xlate(get("Notes")))
if md.notes is not None and md.notes.comic_rack is not None:
src = get_issue_id_from_note(md.notes.comic_rack)
if src is not None:
md.info_source = [InfoSources(src["source"], src["id"], True)]
# Cover Year
tmp_year = xlate(get("Year"), True)
tmp_month = xlate(get("Month"), True)
Expand Down
6 changes: 4 additions & 2 deletions darkseid/metroninfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,10 @@ def _assign_series(root: ET.Element, series: Series) -> None:
create_sub_element = ET.SubElement

create_sub_element(series_node, "Name").text = series.name
create_sub_element(series_node, "SortName").text = series.sort_name
create_sub_element(series_node, "Volume").text = str(series.volume)
if series.sort_name is not None:
create_sub_element(series_node, "SortName").text = series.sort_name
if series.volume is not None:
create_sub_element(series_node, "Volume").text = str(series.volume)
create_sub_element(series_node, "Format").text = (
series.format if series.format in MetronInfo.mix_series_format else "Single Issue"
)
Expand Down
44 changes: 44 additions & 0 deletions darkseid/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,54 @@
# Copyright 2019 Brian Pepple

import itertools
import re
from collections import defaultdict
from enum import Enum
from pathlib import Path


# TODO: Change to StrEnum when Python-3.10 support dropped
class DataSources(str, Enum):
COMIC_VINE = "Comic Vine"
METRON = "Metron"
GCD = "Grand Comics Database"


def get_issue_id_from_note(note_txt: str) -> dict[str, str] | None:
"""
Extracts the issue ID from a given note text based on specific keywords and formats.
This function identifies the source of the issue ID and returns it along with the ID itself.

Args:
note_txt (str): The text from which to extract the issue ID.

Returns:
dict[str, str] | None: A dictionary containing the source and the issue ID if found,
otherwise None.
"""

if not note_txt:
return None

note_lower = note_txt.lower()
source_map = {
"comic vine": DataSources.COMIC_VINE,
"metron": DataSources.METRON,
"grand comics database": DataSources.GCD,
}

if "comictagger" in note_lower:
if match := re.search(r"(issue id (\d+))|(cvdb(\d+))", note_lower):
for website, src_enum in source_map.items():
if website in note_lower:
return {"source": src_enum, "id": match[2] or match[4]}
elif "metrontagger" in note_lower: # NOQA: SIM102
if match := re.search(r"issue_id:(\d+)", note_lower):
return {"source": DataSources.METRON, "id": match[1]}

return None


def cast_id_as_str(id_: str | int) -> str:
"""Convert an ID to a string.

Expand Down
56 changes: 56 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,62 @@
import pytest

from darkseid import utils
from darkseid.utils import DataSources, get_issue_id_from_note


@pytest.mark.parametrize(
("note_txt", "expected"),
[
# Happy path tests
(
"Tagged with ComicTagger 1.6.0b3.dev0 using info from Comic Vine on 2024-11-26 10:44:04. [Issue ID 806681]",
{"source": DataSources.COMIC_VINE, "id": "806681"},
),
(
"Tagged with the ninjas.walk.alone fork of ComicTagger 1.3.5 using info from Comic Vine on "
"2023-05-09 22:26:42. [CVDB806681]",
{"source": DataSources.COMIC_VINE, "id": "806681"},
),
(
"Tagged with ComicTagger 1.6.0b3.dev0 using info from Grand Comics Database on "
"2024-11-26 10:44:04. [Issue ID 806681]",
{"source": DataSources.GCD, "id": "806681"},
),
(
"Tagged with MetronTagger-2.3.0 using info from Metron on 2024-06-22 20:32:47. [issue_id:48013]",
{"source": DataSources.METRON, "id": "48013"},
),
# Edge cases
("", None),
("ComicTagger issue id 0000 Comic Vine", {"source": DataSources.COMIC_VINE, "id": "0000"}),
("ComicTagger issue id 1234", None),
("MetronTagger issue_id:", None),
# Error cases
("Random text with no issue id", None),
("ComicTagger issue id abc Comic Vine", None),
("MetronTagger issue_id:abc", None),
],
ids=[
"comic_vine_issue_id",
"comic_vine_cvdb",
"gcd_issue_id",
"metrontagger_issue_id",
"zero_issue_id",
"empty_note",
"missing_source",
"missing_id_after_colon",
"no_issue_id",
"non_numeric_issue_id",
"non_numeric_metron_id",
],
)
def test_get_issue_id_from_note(note_txt, expected):
# Act
result = get_issue_id_from_note(note_txt)

# Assert
assert result == expected


test_articles = [
pytest.param("The Champions & Inhumans", "Test string with '&'", "Champions Inhumans"),
Expand Down
Loading