Skip to content

Commit 76f94b5

Browse files
committed
new: web download torrent and remove olds webs
1 parent 948be2f commit 76f94b5

10 files changed

+82
-358
lines changed

app/models/model_t_dontorrent.py

-162
This file was deleted.

app/models/model_t_grantorrent.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -8,24 +8,24 @@
88
import sys
99
from dataclasses import dataclass
1010
from pathlib import PurePath, Path # nueva forma de trabajar con rutas
11-
from typing import Optional, List
11+
from typing import Optional, List, Text
1212

1313
import requests
14+
import urllib3
1415
from bs4 import BeautifulSoup
15-
from requests.packages.urllib3.exceptions import InsecureRequestWarning
1616

1717
# Confirmamos que tenemos en el path la ruta de la aplicacion, para poder lanzarlo desde cualquier ruta
1818
absolut_path: PurePath = PurePath(os.path.realpath(__file__)) # Ruta absoluta del fichero
19-
new_path: str = f'{absolut_path.parent}/../../'
19+
new_path: Text = f'{absolut_path.parent}/../../'
2020
if new_path not in sys.path:
2121
sys.path.append(new_path)
2222

23-
from app.utils.settings import REQ_HEADERS
24-
from app.models.model_t_torrent import Torrent
2523
from app.models.model_t_feedparser import FeedParser
24+
from app.models.model_t_torrent import Torrent
25+
from app.utils.settings import REQ_HEADERS
2626
from app import logger
2727

28-
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
28+
urllib3.disable_warnings()
2929

3030

3131
@dataclass

app/models/model_t_notification.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
4+
from __future__ import annotations
5+
6+
from dataclasses import dataclass
7+
8+
9+
@dataclass
10+
class Notification:
11+
pass

app/models/model_t_descargas2020.py app/models/model_t_pctmix.py

+53-42
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import os
77
import re
88
import sys
9-
from dataclasses import dataclass
9+
from dataclasses import dataclass, field
1010
from pathlib import PurePath # nueva forma de trabajar con rutas
1111
from typing import NoReturn, Optional, List, Text, Dict
1212

@@ -20,69 +20,77 @@
2020
if new_path not in sys.path:
2121
sys.path.append(new_path)
2222

23-
from app.models.model_t_torrent import Torrent
2423
from app.models.model_t_feedparser import FeedParser
24+
from app.models.model_t_torrent import Torrent
2525
from app.models.model_t_feed import Feed
2626
from app.utils.settings import REQ_HEADERS
2727
from app import logger
2828

2929
urllib3.disable_warnings()
3030

31-
DOMAIN: Text = "descargas2020"
32-
URL: Text = f'{DOMAIN}.org'
31+
DOMAIN: Text = "pctmix"
32+
URL: Text = f'{DOMAIN}.com'
3333

3434

35-
class FeedparserDescargas2020(FeedParser):
36-
def __init__(self: FeedparserDescargas2020) -> NoReturn:
37-
self.entries: List[Feed] = list()
35+
@dataclass
36+
class FeedparserPctmix(FeedParser):
37+
entries: List[Feed] = field(default_factory=list)
38+
# FIXME ahora mismo no lo uso porque no veo series que salgan con la temporada completa
39+
# cuando salgan detectarlo y mofificar el number
40+
NUMBER: int = 99 # INDICA QUE ES UNA TEMPORADA COMPLETA
3841

3942
@staticmethod
40-
def parse(url: Text = f'https://{URL}/ultimas-descargas/', category: Text = '1469',
41-
dat: Text = 'Hoy') -> FeedparserDescargas2020:
42-
"""
43-
category='1469' series en hd
44-
"""
45-
formdata: Dict[Text] = {'categoryIDR': category, 'date': dat}
43+
def parse(url: Text = f'https://{URL}/ultimas-descargas/') -> FeedparserPctmix:
4644
session: requests.session = requests.session()
47-
login: session.post = session.post(url, data=formdata, headers=REQ_HEADERS, verify=False)
45+
all_html: session.post = session.get(url, headers=REQ_HEADERS, verify=False)
4846

49-
# logger.critical(login)
50-
sopa: BeautifulSoup = BeautifulSoup(login.text, 'html.parser')
51-
# sopa = BeautifulSoup(fichero, 'html.parser')
52-
result: BeautifulSoup.element.ResultSet = sopa.findAll('ul', {"class": "noticias-series"})
47+
sopa: BeautifulSoup = BeautifulSoup(all_html.text, 'html.parser')
48+
result: BeautifulSoup.element.ResultSet = sopa.findAll('ul')
5349

54-
f: FeedparserDescargas2020 = FeedparserDescargas2020()
50+
feed: FeedparserPctmix = FeedparserPctmix()
5551
ul: BeautifulSoup.element.Tag
56-
li: BeautifulSoup.element.Tag
5752
for ul in result:
53+
if not ul.findAll('img'):
54+
continue # skip ul other actions
55+
# foreach all (movies, tv shows, etc)
56+
li: BeautifulSoup.element.Tag
5857
for li in ul.findAll('li'):
59-
# logger.critical(li.div.find('h2').text)
60-
# logger.critical(li.a['href'])
61-
# FIXME CAMBIAR 44 Y 33 POR season Y chapter
62-
f.add(li.div.find('h2').text, 44, 33, li.a['href'], li.div.find('h2').text)
63-
# f.add(li.a['title'], li.a['href'])
64-
# f.add(serie.text, int(chapters[-1]), url)
65-
66-
logger.debug(f)
67-
68-
# for i in f.entries:
69-
# print(i.title)
70-
# print(i.cap)
71-
# print()
72-
73-
return f
58+
# filter quality 720p
59+
if re.search(r'720p', li.div.span.text, re.IGNORECASE):
60+
link: Text = f"{URL}{li.a['href']}"
61+
original_name: Text = li.div.find('a').text.strip()
62+
# check if 1 chapter or more
63+
if re.search(r'Capitulos ', original_name, re.IGNORECASE):
64+
chapter: int = int(re.findall(r'\d+', original_name)[-1])
65+
title: Text = re.sub(r'- Temp.*', '', original_name).strip()
66+
elif re.search(r'Capitulo ', original_name, re.IGNORECASE):
67+
chapter: int = int(re.search(r'Capitulo (\d+)', original_name, re.IGNORECASE).group(1))
68+
title: Text = re.sub(r'Temp\. \d+ Capitulo \d+', '', original_name).strip()
69+
else:
70+
continue # skip if not detect capitulo
71+
72+
season: int = int(re.search(r'Temp. (\d+)', original_name, re.IGNORECASE).group(1))
73+
feed.add(title, season, chapter, link, original_name)
74+
75+
return feed
76+
77+
def __str__(self) -> Text:
78+
response: Text = str()
79+
for i in self.entries:
80+
response += f'{i.title} -> {i.epi}\n'
81+
return response
7482

7583

7684
@dataclass
77-
class Descargas2020(Torrent):
85+
class Pctmix(Torrent):
7886
"""
7987
"""
8088

81-
def download_file_torrent(self: Descargas2020) -> NoReturn:
89+
def download_file_torrent(self: Pctmix) -> NoReturn:
8290
self.url_torrent = self.get_url_torrent()
8391
self._download_file()
8492

85-
def get_url_torrent(self: Descargas2020, bot=None, message: Text = None) -> Optional[Text]:
93+
def get_url_torrent(self: Pctmix, bot=None, message: Text = None) -> Optional[Text]:
8694
"""
8795
Funcion que obtiene la url torrent del la dirreccion que recibe,hay que tener en cuenta que la url que recibe es
8896
la del feed y que no es la pagina que contiene el torrent, pero como todas tienen la misma forma se modifica la
@@ -114,6 +122,8 @@ def get_url_torrent(self: Descargas2020, bot=None, message: Text = None) -> Opti
114122

115123
elif re.search(regex_recursion, self.url_web):
116124
return self.get_url_torrent(re.sub(regex_recursion, DOMAIN, self.url_web), message)
125+
else:
126+
print("ups")
117127

118128
@staticmethod
119129
def descarga_url_torrent_aux(html_page: Text) -> Optional[Text]:
@@ -147,11 +157,12 @@ def descarga_url_torrent_aux(html_page: Text) -> Optional[Text]:
147157

148158

149159
if __name__ == '__main__':
150-
url1 = 'https://descargas2020.org/descargar/serie-en-hd/the-wall/temporada-1/capitulo-07/descargas2020-org'
151-
t = Descargas2020(title='test1', url_web=url1, path_download=PurePath('/home/procamora/Documents/Gestor-Series/'))
160+
url1 = 'https://pctmix.com/descargar/cine-alta-definicion-hd/the-boy-la-maldicion-de-brahms/bluray-microhd/'
161+
t = Pctmix(title='test1', url_web=url1, path_download=PurePath('/home/procamora/Documents/Gestor-Series/'))
152162
print(t.get_url_torrent())
153163
# t.download_file_torrent()
154164
# print(t)
155165

156-
f = FeedparserDescargas2020()
157-
# print(f.parse())
166+
f1 = FeedparserPctmix()
167+
# f1.parse()
168+
print(f1.parse())

app/models/model_t_torrent.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
#!/usr/bin/env python3
22
# -*- coding: utf-8 -*-
3+
34
from __future__ import annotations
45

56
import re
67
from abc import ABC, abstractmethod
78
from dataclasses import dataclass
89
from pathlib import PurePath, Path # nueva forma de trabajar con rutas
910
from typing import NoReturn, Text
11+
from datetime import datetime
1012

1113
import requests
1214

@@ -39,7 +41,10 @@ def _download_file(self: Torrent) -> NoReturn:
3941
return
4042
r: requests.Response = requests.get(self.url_torrent, headers=REQ_HEADERS, verify=False)
4143
logger.info(f'download file: {self.path_file_torrent}')
42-
with open(str(self.path_file_torrent), "wb") as code:
44+
now = datetime.now() # current date and time
45+
uniq: Text = now.strftime("%Y%d%m_%H%M%S_%f")
46+
uniq_path = Path(self.path_file_torrent.parent, f'{uniq}_{str(self.path_file_torrent.name).replace(" ", "_")}')
47+
with open(str(uniq_path), "wb") as code:
4348
code.write(r.content)
4449

4550
@abstractmethod

0 commit comments

Comments
 (0)