diff --git a/assets/docs/sources/tutorial/0_demo.md b/assets/docs/sources/tutorial/0_demo.md index ff09e484..b59e5882 100644 --- a/assets/docs/sources/tutorial/0_demo.md +++ b/assets/docs/sources/tutorial/0_demo.md @@ -144,17 +144,62 @@ page: JmCategoryPage = cl.month_ranking(1) page: JmCategoryPage = cl.week_ranking(1) # 循环获取分页,使用 cl.categories_filter_gen -for page in cl.categories_filter_gen(1, # 起始页码 +for page in cl.categories_filter_gen(page=1, # 起始页码 # 下面是分类参数 - JmMagicConstants.TIME_WEEK, - JmMagicConstants.CATEGORY_ALL, - JmMagicConstants.ORDER_BY_VIEW, + time=JmMagicConstants.TIME_WEEK, + category=JmMagicConstants.CATEGORY_ALL, + order_by=JmMagicConstants.ORDER_BY_VIEW, ): for aid, atitle in page: print(aid, atitle) ``` +## 高级搜索(分类/副分类) + +禁漫网页端的搜索除了常规条件,还支持【分类】和【副分类】的搜索。 + +在任一搜索页面,你会看到本子图的右上方有两个标签。左边的是【分类】,右边的是【副分类】。 + +下面演示代码如何编写。 + +* **注意!!禁漫移动端没有提供如下功能,以下代码仅对网页端生效。** + +```python +# 在编写代码前,建议先熟悉禁漫网页的搜本功能,下面的代码都是对照网页编写的。 +# 网页搜索示例:https://18comic.vip/search/photos/doujin/sub/CG?main_tag=0&search_query=mana&page=1&o=mr&t=a + +from jmcomic import * + +op = create_option_by_file('op.yml') +# 创建网页端client +html_cl = op.new_jm_client(impl='html') + +# 使用站内搜索,指定【分类】和【副分类】 +# 分类 = JmMagicConstants.CATEGORY_DOUJIN = 同人本 +# 副分类 = JmMagicConstants.SUB_DOUJIN_CG = CG本 +# 实际URL:https://18comic.vip/search/photos/doujin/sub/CG?main_tag=0&search_query=mana&page=1&o=mr&t=a +page = html_cl.search_site(search_query='mana', + category=JmMagicConstants.CATEGORY_DOUJIN, + sub_category=JmMagicConstants.SUB_DOUJIN_CG, + page=1, + ) +# 打印page内容 +for aid, atitle in page.iter_id_title(): + print(aid, atitle) + +# 循环获取分页 +for page in html_cl.search_gen(search_query='mana', + category=JmMagicConstants.CATEGORY_DOUJIN, + sub_category=JmMagicConstants.SUB_DOUJIN_CG, + page=1, # 起始页码 + ): + # 打印page内容 + for aid, atitle in page.iter_id_title(): + print(aid, atitle) +``` + + ## 手动创建Client ```python @@ -184,6 +229,4 @@ cl = JmApiClient( domain_list=JmModuleConfig.DOMAIN_API_LIST, retry_times=1 ) - - ``` diff --git a/src/jmcomic/__init__.py b/src/jmcomic/__init__.py index 3ead2eb9..7121c1e0 100644 --- a/src/jmcomic/__init__.py +++ b/src/jmcomic/__init__.py @@ -2,7 +2,7 @@ # 被依赖方 <--- 使用方 # config <--- entity <--- toolkit <--- client <--- option <--- downloader -__version__ = '2.5.12' +__version__ = '2.5.14' from .api import * from .jm_plugin import * diff --git a/src/jmcomic/jm_client_impl.py b/src/jmcomic/jm_client_impl.py index 0eda472b..ac255df0 100644 --- a/src/jmcomic/jm_client_impl.py +++ b/src/jmcomic/jm_client_impl.py @@ -237,6 +237,9 @@ class JmHtmlClient(AbstractJmClient): func_to_cache = ['search', 'fetch_detail_entity'] + API_SEARCH = '/search/photos' + API_CATEGORY = '/albums' + def add_favorite_album(self, album_id, folder_id='0', @@ -304,7 +307,12 @@ def search(self, main_tag: int, order_by: str, time: str, + category: str, + sub_category: Optional[str], ) -> JmSearchPage: + """ + 网页搜索API + """ params = { 'main_tag': main_tag, 'search_query': search_query, @@ -313,8 +321,10 @@ def search(self, 't': time, } + url = self.build_search_url(self.API_SEARCH, category, sub_category) + resp = self.get_jm_html( - self.append_params_to_url('/search/photos', params), + self.append_params_to_url(url, params), allow_redirects=True, ) @@ -326,11 +336,31 @@ def search(self, else: return JmPageTool.parse_html_to_search_page(resp.text) + @classmethod + def build_search_url(cls, base: str, category: str, sub_category: Optional[str]): + """ + 构建网页搜索/分类的URL + + 示例: + :param base: "/search/photos" + :param category CATEGORY_DOUJIN + :param sub_category SUB_DOUJIN_CG + :return "/search/photos/doujin/sub/CG" + """ + if category == JmMagicConstants.CATEGORY_ALL: + return base + + if sub_category is None: + return f'{base}/{category}' + else: + return f'{base}/{category}/sub/{sub_category}' + def categories_filter(self, page: int, time: str, category: str, order_by: str, + sub_category: Optional[str] = None, ) -> JmCategoryPage: params = { 'page': page, @@ -338,7 +368,7 @@ def categories_filter(self, 't': time, } - url = f'/albums/' + (category if category != JmMagicConstants.CATEGORY_ALL else '') + url = self.build_search_url(self.API_CATEGORY, category, sub_category) resp = self.get_jm_html( self.append_params_to_url(url, params), @@ -573,7 +603,12 @@ def search(self, main_tag: int, order_by: str, time: str, + category: str, + sub_category: Optional[str], ) -> JmSearchPage: + """ + 移动端暂不支持 category和sub_category + """ params = { 'main_tag': main_tag, 'search_query': search_query, @@ -603,7 +638,11 @@ def categories_filter(self, time: str, category: str, order_by: str, + sub_category: Optional[str] = None, ): + """ + 移动端不支持 sub_category + """ # o: mv, mv_m, mv_w, mv_t o = f'{order_by}_{time}' if time != JmMagicConstants.TIME_ALL else order_by diff --git a/src/jmcomic/jm_client_interface.py b/src/jmcomic/jm_client_interface.py index c8abace3..16f86aad 100644 --- a/src/jmcomic/jm_client_interface.py +++ b/src/jmcomic/jm_client_interface.py @@ -308,9 +308,14 @@ def search(self, main_tag: int, order_by: str, time: str, + category: str, + sub_category: Optional[str], ) -> JmSearchPage: """ 搜索【成人A漫】 + 网页端与移动端的搜索有差别: + + - 移动端不支持 category, sub_category参数,网页端支持全部参数 """ raise NotImplementedError @@ -319,55 +324,65 @@ def search_site(self, page: int = 1, order_by: str = JmMagicConstants.ORDER_BY_LATEST, time: str = JmMagicConstants.TIME_ALL, + category: str = JmMagicConstants.CATEGORY_ALL, + sub_category: Optional[str] = None, ): """ 对应禁漫的站内搜索 """ - return self.search(search_query, page, 0, order_by, time) + return self.search(search_query, page, 0, order_by, time, category, sub_category) def search_work(self, search_query: str, page: int = 1, order_by: str = JmMagicConstants.ORDER_BY_LATEST, time: str = JmMagicConstants.TIME_ALL, + category: str = JmMagicConstants.CATEGORY_ALL, + sub_category: Optional[str] = None, ): """ 搜索album的作品 work """ - return self.search(search_query, page, 1, order_by, time) + return self.search(search_query, page, 1, order_by, time, category, sub_category) def search_author(self, search_query: str, page: int = 1, order_by: str = JmMagicConstants.ORDER_BY_LATEST, time: str = JmMagicConstants.TIME_ALL, + category: str = JmMagicConstants.CATEGORY_ALL, + sub_category: Optional[str] = None, ): """ 搜索album的作者 author """ - return self.search(search_query, page, 2, order_by, time) + return self.search(search_query, page, 2, order_by, time, category, sub_category) def search_tag(self, search_query: str, page: int = 1, order_by: str = JmMagicConstants.ORDER_BY_LATEST, time: str = JmMagicConstants.TIME_ALL, + category: str = JmMagicConstants.CATEGORY_ALL, + sub_category: Optional[str] = None, ): """ 搜索album的标签 tag """ - return self.search(search_query, page, 3, order_by, time) + return self.search(search_query, page, 3, order_by, time, category, sub_category) def search_actor(self, search_query: str, page: int = 1, order_by: str = JmMagicConstants.ORDER_BY_LATEST, time: str = JmMagicConstants.TIME_ALL, + category: str = JmMagicConstants.CATEGORY_ALL, + sub_category: Optional[str] = None, ): """ 搜索album的登场角色 actor """ - return self.search(search_query, page, 4, order_by, time) + return self.search(search_query, page, 4, order_by, time, category, sub_category) class JmCategoryClient: @@ -384,6 +399,7 @@ def categories_filter(self, time: str, category: str, order_by: str, + sub_category: Optional[str] = None, ) -> JmCategoryPage: """ 分类 @@ -391,6 +407,7 @@ def categories_filter(self, :param page: 页码 :param time: 时间范围,默认是全部时间 :param category: 类别,默认是最新,即显示最新的禁漫本子 + :param sub_category: 副分类,仅网页端有这功能 :param order_by: 排序方式,默认是观看数 """ raise NotImplementedError @@ -522,6 +539,8 @@ def search_gen(self, page: int = 1, order_by: str = JmMagicConstants.ORDER_BY_LATEST, time: str = JmMagicConstants.TIME_ALL, + category: str = JmMagicConstants.CATEGORY_ALL, + sub_category: Optional[str] = None, ) -> Generator[JmSearchPage, Dict, None]: """ 搜索结果的生成器,支持下面这种调用方式: @@ -552,6 +571,8 @@ def search_gen(self, 'main_tag': main_tag, 'order_by': order_by, 'time': time, + 'category': category, + 'sub_category': sub_category, } yield from self.do_page_iter(params, page, self.search) @@ -561,6 +582,7 @@ def categories_filter_gen(self, time: str = JmMagicConstants.TIME_ALL, category: str = JmMagicConstants.CATEGORY_ALL, order_by: str = JmMagicConstants.ORDER_BY_LATEST, + sub_category: Optional[str] = None, ) -> Generator[JmCategoryPage, Dict, None]: """ 见 search_gen @@ -569,6 +591,7 @@ def categories_filter_gen(self, 'time': time, 'category': category, 'order_by': order_by, + 'sub_category': sub_category, } yield from self.do_page_iter(params, page, self.categories_filter) diff --git a/src/jmcomic/jm_config.py b/src/jmcomic/jm_config.py index b8f7c5c3..05deb044 100644 --- a/src/jmcomic/jm_config.py +++ b/src/jmcomic/jm_config.py @@ -35,7 +35,29 @@ class JmMagicConstants: CATEGORY_DOUJIN_COSPLAY = 'doujin_cosplay' # cosplay CATEGORY_3D = '3D' # 3D CATEGORY_ENGLISH_SITE = 'english_site' # 英文站 - CATEGORY_JM_TEAM = '禁漫漢化組' + + # 副分类 + SUB_CHINESE = 'chinese' # 汉化,通用副分类 + SUB_JAPANESE = 'japanese' # 日语,通用副分类 + + # 其他类(CATEGORY_ANOTHER)的副分类 + SUB_ANOTHER_OTHER = 'other' # 其他漫画 + SUB_ANOTHER_3D = '3d' # 3D + SUB_ANOTHER_COSPLAY = 'cosplay' # cosplay + + # 同人(SUB_CHINESE)的副分类 + SUB_DOUJIN_CG = 'CG' # CG + SUB_DOUJIN_CHINESE = SUB_CHINESE + SUB_DOUJIN_JAPANESE = SUB_JAPANESE + + # 短篇(CATEGORY_SHORT)的副分类 + SUB_SHORT_CHINESE = SUB_CHINESE + SUB_SHORT_JAPANESE = SUB_JAPANESE + + # 单本(CATEGORY_SINGLE)的副分类 + SUB_SINGLE_CHINESE = SUB_CHINESE + SUB_SINGLE_JAPANESE = SUB_JAPANESE + SUB_SINGLE_YOUTH = 'youth' # 分页大小 PAGE_SIZE_SEARCH = 80 @@ -53,7 +75,7 @@ class JmMagicConstants: APP_TOKEN_SECRET = '18comicAPP' APP_TOKEN_SECRET_2 = '18comicAPPContent' APP_DATA_SECRET = '185Hcomic3PAPP7R' - APP_VERSION = '1.6.7' + APP_VERSION = '1.7.0' APP_HEADERS_TEMPLATE = { 'Accept-Encoding': 'gzip', 'user-agent': 'Mozilla/5.0 (Linux; Android 9; V1938CT Build/PQ3A.190705.11211812; wv) AppleWebKit/537.36 (KHTML, ' @@ -65,14 +87,20 @@ class JmMagicConstants: 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,' 'application/signed-exchange;v=b3;q=0.7', 'accept-language': 'zh-CN,zh;q=0.9', - 'sec-ch-ua': '"Not.A/Brand";v="8", "Chromium";v="114", "Google Chrome";v="114"', + 'cache-control': 'no-cache', + 'dnt': '1', + 'pragma': 'no-cache', + 'priority': 'u=0, i', + 'referer': 'https://18comic.vip/', + 'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', 'sec-fetch-dest': 'document', 'sec-fetch-mode': 'navigate', 'sec-fetch-site': 'none', 'sec-fetch-user': '?1', - 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 ' + 'upgrade-insecure-requests': '1', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 ' 'Safari/537.36', } diff --git a/src/jmcomic/jm_plugin.py b/src/jmcomic/jm_plugin.py index 68104284..844956fd 100644 --- a/src/jmcomic/jm_plugin.py +++ b/src/jmcomic/jm_plugin.py @@ -1,6 +1,7 @@ """ 该文件存放的是option插件 """ +import os.path from .jm_option import * @@ -296,91 +297,63 @@ def invoke(self, zip_dir = JmcomicText.parse_to_abspath(zip_dir) mkdir_if_not_exists(zip_dir) - # 原文件夹 -> zip文件 - dir_zip_dict: Dict[str, Optional[str]] = {} + path_to_delete = [] photo_dict = downloader.download_success_dict[album] if level == 'album': zip_path = self.get_zip_path(album, None, filename_rule, suffix, zip_dir) - self.zip_album(album, photo_dict, zip_path, dir_zip_dict) + self.zip_album(album, photo_dict, zip_path, path_to_delete) elif level == 'photo': for photo, image_list in photo_dict.items(): zip_path = self.get_zip_path(None, photo, filename_rule, suffix, zip_dir) - self.zip_photo(photo, image_list, zip_path, dir_zip_dict) + self.zip_photo(photo, image_list, zip_path, path_to_delete) else: ExceptionTool.raises(f'Not Implemented Zip Level: {level}') - self.after_zip(dir_zip_dict) + self.after_zip(path_to_delete) - def zip_photo(self, photo, image_list: list, zip_path: str, dir_zip_dict) -> Optional[str]: + def zip_photo(self, photo, image_list: list, zip_path: str, path_to_delete): """ 压缩photo文件夹 - :returns: photo文件夹路径 """ photo_dir = self.option.decide_image_save_dir(photo) \ if len(image_list) == 0 \ else os.path.dirname(image_list[0][0]) - all_filepath = set(map(lambda t: self.unified_path(t[0]), image_list)) - - if len(all_filepath) == 0: - self.log('无下载文件,无需压缩', 'skip') - return None - from common import backup_dir_to_zip - backup_dir_to_zip( - photo_dir, - zip_path, - acceptor=lambda f: os.path.isdir(f) or self.unified_path(f) in all_filepath - ).close() + backup_dir_to_zip(photo_dir, zip_path) self.log(f'压缩章节[{photo.photo_id}]成功 → {zip_path}', 'finish') - dir_zip_dict[self.unified_path(photo_dir)] = zip_path + path_to_delete.append(self.unified_path(photo_dir)) @staticmethod def unified_path(f): return fix_filepath(f, os.path.isdir(f)) - def zip_album(self, album, photo_dict: dict, zip_path, dir_zip_dict) -> Optional[str]: + def zip_album(self, album, photo_dict: dict, zip_path, path_to_delete): """ 压缩album文件夹 - :returns: album文件夹路径 """ - # 所有下载了的图片文件的路径 - all_filepath: Set[str] = set(path for ls in photo_dict.values() for path, _ in ls) - - if len(all_filepath) == 0: - self.log('无下载文件,无需压缩', 'skip') - return - - # 该本子的所有章节的图片所在文件夹 - photo_dir_list = [self.option.decide_image_save_dir(photo) for photo in photo_dict.keys()] - - # 压缩文件对象 - from common import backup_dir_to_zip + album_dir = self.option.dir_rule.decide_album_root_dir(album) import zipfile - zfile = zipfile.ZipFile(zip_path, 'w') - - for photo_dir in photo_dir_list: - photo_dir = self.unified_path(photo_dir) - backup_dir_to_zip( - photo_dir, - zip_path, - zfile=zfile, - prefix=os.path.basename(photo_dir.rstrip('/')), - acceptor=lambda f: os.path.isdir(f) or self.unified_path(f) in all_filepath - ) - dir_zip_dict[photo_dir] = zip_path - - zfile.close() + with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as f: + for photo in photo_dict.keys(): + # 定位到章节所在文件夹 + photo_dir = self.unified_path(self.option.decide_image_save_dir(photo)) + # 章节文件夹标记为删除 + path_to_delete.append(photo_dir) + for file in files_of_dir(photo_dir): + abspath = os.path.join(photo_dir, file) + relpath = os.path.relpath(abspath, album_dir) + f.write(abspath, relpath) self.log(f'压缩本子[{album.album_id}]成功 → {zip_path}', 'finish') - def after_zip(self, dir_zip_dict: Dict[str, Optional[str]]): + def after_zip(self, path_to_delete: List[str]): # 删除所有原文件 - dirs = sorted(dir_zip_dict.keys(), reverse=True) + dirs = sorted(path_to_delete, reverse=True) image_paths = [ path for photo_dict in self.downloader.download_success_dict.values() @@ -650,7 +623,7 @@ def zip_folder_without_password(self, files, zip_path): """ import zipfile - with zipfile.ZipFile(zip_path, 'w') as zipf: + with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: # 获取文件夹中的文件列表并将其添加到 ZIP 文件中 for file in files: zipf.write(file, arcname=of_file_name(file)) diff --git a/src/jmcomic/jm_toolkit.py b/src/jmcomic/jm_toolkit.py index fca01f2e..d897778b 100644 --- a/src/jmcomic/jm_toolkit.py +++ b/src/jmcomic/jm_toolkit.py @@ -135,7 +135,7 @@ def match_field(field_name: str, pattern: Union[Pattern, List[Pattern]], text): last_pattern = pattern[len(pattern) - 1] # 缩小文本 for i in range(0, len(pattern) - 1): - match = pattern[i].search(text) + match: Match = pattern[i].search(text) if match is None: return None text = match[0] diff --git a/tests/test_jmcomic/test_jm_client.py b/tests/test_jmcomic/test_jm_client.py index 95e328d1..424b016f 100644 --- a/tests/test_jmcomic/test_jm_client.py +++ b/tests/test_jmcomic/test_jm_client.py @@ -155,7 +155,7 @@ def test_getitem_and_slice(self): list2=ans, ) - def test_search_advanced(self): + def test_search_params(self): elist = [] def search_and_test(expected_result, params): @@ -310,3 +310,35 @@ def run(arg1, arg2, arg3, arg4): ) return [f.result() for f in future_ls] # 等待执行完毕 + + def test_search_advanced(self): + if not self.client.is_given_type(JmHtmlClient): + return + + # noinspection PyTypeChecker + html_cl: JmHtmlClient = self.client + # 循环获取分页 + for page in html_cl.search_gen( + search_query='mana', + page=1, # 起始页码 + category=JmMagicConstants.CATEGORY_DOUJIN, + sub_category=JmMagicConstants.SUB_DOUJIN_CG, + time=JmMagicConstants.TIME_ALL, + ): + self.print_page(page) + + print_sep() + for page in html_cl.categories_filter_gen( + page=1, # 起始页码 + category=JmMagicConstants.CATEGORY_DOUJIN, + sub_category=JmMagicConstants.SUB_DOUJIN_CG, + time=JmMagicConstants.TIME_ALL, + ): + self.print_page(page) + break + + @staticmethod + def print_page(page): + # 打印page内容 + for aid, atitle in page: + print(aid, atitle)