|
6 | 6 | import os
|
7 | 7 | import re
|
8 | 8 | import sys
|
9 |
| -from dataclasses import dataclass |
| 9 | +from dataclasses import dataclass, field |
10 | 10 | from pathlib import PurePath # nueva forma de trabajar con rutas
|
11 | 11 | from typing import NoReturn, Optional, List, Text, Dict
|
12 | 12 |
|
|
20 | 20 | if new_path not in sys.path:
|
21 | 21 | sys.path.append(new_path)
|
22 | 22 |
|
23 |
| -from app.models.model_t_torrent import Torrent |
24 | 23 | from app.models.model_t_feedparser import FeedParser
|
| 24 | +from app.models.model_t_torrent import Torrent |
25 | 25 | from app.models.model_t_feed import Feed
|
26 | 26 | from app.utils.settings import REQ_HEADERS
|
27 | 27 | from app import logger
|
28 | 28 |
|
29 | 29 | urllib3.disable_warnings()
|
30 | 30 |
|
31 |
| -DOMAIN: Text = "descargas2020" |
32 |
| -URL: Text = f'{DOMAIN}.org' |
| 31 | +DOMAIN: Text = "pctmix" |
| 32 | +URL: Text = f'{DOMAIN}.com' |
33 | 33 |
|
34 | 34 |
|
35 |
| -class FeedparserDescargas2020(FeedParser): |
36 |
| - def __init__(self: FeedparserDescargas2020) -> NoReturn: |
37 |
| - self.entries: List[Feed] = list() |
| 35 | +@dataclass |
| 36 | +class FeedparserPctmix(FeedParser): |
| 37 | + entries: List[Feed] = field(default_factory=list) |
| 38 | + # FIXME ahora mismo no lo uso porque no veo series que salgan con la temporada completa |
| 39 | + # cuando salgan detectarlo y mofificar el number |
| 40 | + NUMBER: int = 99 # INDICA QUE ES UNA TEMPORADA COMPLETA |
38 | 41 |
|
39 | 42 | @staticmethod
|
40 |
| - def parse(url: Text = f'https://{URL}/ultimas-descargas/', category: Text = '1469', |
41 |
| - dat: Text = 'Hoy') -> FeedparserDescargas2020: |
42 |
| - """ |
43 |
| - category='1469' series en hd |
44 |
| - """ |
45 |
| - formdata: Dict[Text] = {'categoryIDR': category, 'date': dat} |
| 43 | + def parse(url: Text = f'https://{URL}/ultimas-descargas/') -> FeedparserPctmix: |
46 | 44 | session: requests.session = requests.session()
|
47 |
| - login: session.post = session.post(url, data=formdata, headers=REQ_HEADERS, verify=False) |
| 45 | + all_html: session.post = session.get(url, headers=REQ_HEADERS, verify=False) |
48 | 46 |
|
49 |
| - # logger.critical(login) |
50 |
| - sopa: BeautifulSoup = BeautifulSoup(login.text, 'html.parser') |
51 |
| - # sopa = BeautifulSoup(fichero, 'html.parser') |
52 |
| - result: BeautifulSoup.element.ResultSet = sopa.findAll('ul', {"class": "noticias-series"}) |
| 47 | + sopa: BeautifulSoup = BeautifulSoup(all_html.text, 'html.parser') |
| 48 | + result: BeautifulSoup.element.ResultSet = sopa.findAll('ul') |
53 | 49 |
|
54 |
| - f: FeedparserDescargas2020 = FeedparserDescargas2020() |
| 50 | + feed: FeedparserPctmix = FeedparserPctmix() |
55 | 51 | ul: BeautifulSoup.element.Tag
|
56 |
| - li: BeautifulSoup.element.Tag |
57 | 52 | for ul in result:
|
| 53 | + if not ul.findAll('img'): |
| 54 | + continue # skip ul other actions |
| 55 | + # foreach all (movies, tv shows, etc) |
| 56 | + li: BeautifulSoup.element.Tag |
58 | 57 | for li in ul.findAll('li'):
|
59 |
| - # logger.critical(li.div.find('h2').text) |
60 |
| - # logger.critical(li.a['href']) |
61 |
| - # FIXME CAMBIAR 44 Y 33 POR season Y chapter |
62 |
| - f.add(li.div.find('h2').text, 44, 33, li.a['href'], li.div.find('h2').text) |
63 |
| - # f.add(li.a['title'], li.a['href']) |
64 |
| - # f.add(serie.text, int(chapters[-1]), url) |
65 |
| - |
66 |
| - logger.debug(f) |
67 |
| - |
68 |
| - # for i in f.entries: |
69 |
| - # print(i.title) |
70 |
| - # print(i.cap) |
71 |
| - # print() |
72 |
| - |
73 |
| - return f |
| 58 | + # filter quality 720p |
| 59 | + if re.search(r'720p', li.div.span.text, re.IGNORECASE): |
| 60 | + link: Text = f"{URL}{li.a['href']}" |
| 61 | + original_name: Text = li.div.find('a').text.strip() |
| 62 | + # check if 1 chapter or more |
| 63 | + if re.search(r'Capitulos ', original_name, re.IGNORECASE): |
| 64 | + chapter: int = int(re.findall(r'\d+', original_name)[-1]) |
| 65 | + title: Text = re.sub(r'- Temp.*', '', original_name).strip() |
| 66 | + elif re.search(r'Capitulo ', original_name, re.IGNORECASE): |
| 67 | + chapter: int = int(re.search(r'Capitulo (\d+)', original_name, re.IGNORECASE).group(1)) |
| 68 | + title: Text = re.sub(r'Temp\. \d+ Capitulo \d+', '', original_name).strip() |
| 69 | + else: |
| 70 | + continue # skip if not detect capitulo |
| 71 | + |
| 72 | + season: int = int(re.search(r'Temp. (\d+)', original_name, re.IGNORECASE).group(1)) |
| 73 | + feed.add(title, season, chapter, link, original_name) |
| 74 | + |
| 75 | + return feed |
| 76 | + |
| 77 | + def __str__(self) -> Text: |
| 78 | + response: Text = str() |
| 79 | + for i in self.entries: |
| 80 | + response += f'{i.title} -> {i.epi}\n' |
| 81 | + return response |
74 | 82 |
|
75 | 83 |
|
76 | 84 | @dataclass
|
77 |
| -class Descargas2020(Torrent): |
| 85 | +class Pctmix(Torrent): |
78 | 86 | """
|
79 | 87 | """
|
80 | 88 |
|
81 |
| - def download_file_torrent(self: Descargas2020) -> NoReturn: |
| 89 | + def download_file_torrent(self: Pctmix) -> NoReturn: |
82 | 90 | self.url_torrent = self.get_url_torrent()
|
83 | 91 | self._download_file()
|
84 | 92 |
|
85 |
| - def get_url_torrent(self: Descargas2020, bot=None, message: Text = None) -> Optional[Text]: |
| 93 | + def get_url_torrent(self: Pctmix, bot=None, message: Text = None) -> Optional[Text]: |
86 | 94 | """
|
87 | 95 | Funcion que obtiene la url torrent del la dirreccion que recibe,hay que tener en cuenta que la url que recibe es
|
88 | 96 | la del feed y que no es la pagina que contiene el torrent, pero como todas tienen la misma forma se modifica la
|
@@ -114,6 +122,8 @@ def get_url_torrent(self: Descargas2020, bot=None, message: Text = None) -> Opti
|
114 | 122 |
|
115 | 123 | elif re.search(regex_recursion, self.url_web):
|
116 | 124 | return self.get_url_torrent(re.sub(regex_recursion, DOMAIN, self.url_web), message)
|
| 125 | + else: |
| 126 | + print("ups") |
117 | 127 |
|
118 | 128 | @staticmethod
|
119 | 129 | def descarga_url_torrent_aux(html_page: Text) -> Optional[Text]:
|
@@ -147,11 +157,12 @@ def descarga_url_torrent_aux(html_page: Text) -> Optional[Text]:
|
147 | 157 |
|
148 | 158 |
|
149 | 159 | if __name__ == '__main__':
|
150 |
| - url1 = 'https://descargas2020.org/descargar/serie-en-hd/the-wall/temporada-1/capitulo-07/descargas2020-org' |
151 |
| - t = Descargas2020(title='test1', url_web=url1, path_download=PurePath('/home/procamora/Documents/Gestor-Series/')) |
| 160 | + url1 = 'https://pctmix.com/descargar/cine-alta-definicion-hd/the-boy-la-maldicion-de-brahms/bluray-microhd/' |
| 161 | + t = Pctmix(title='test1', url_web=url1, path_download=PurePath('/home/procamora/Documents/Gestor-Series/')) |
152 | 162 | print(t.get_url_torrent())
|
153 | 163 | # t.download_file_torrent()
|
154 | 164 | # print(t)
|
155 | 165 |
|
156 |
| - f = FeedparserDescargas2020() |
157 |
| - # print(f.parse()) |
| 166 | + f1 = FeedparserPctmix() |
| 167 | + # f1.parse() |
| 168 | + print(f1.parse()) |
0 commit comments