-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmanga_chooser.py
156 lines (134 loc) · 5.84 KB
/
manga_chooser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
from manga_crawler import Crawler
from config_parser import change_to_main_manga_dir
from config_parser import change_to_manga_dir
import urllib.request
import requests
def print_formatted_urls(list_urls):
index = 0
print("\nChoose the manga you want to check: ")
for url in list_urls:
index = index + 1
name = url.split('/')[4]
print("\n %d.- %s" % (index, name))
return index
# this is one method, using selenium (not used...)
def get_html_manga_souce_selenium(manga_name):
driver = webdriver.Firefox()
driver.set_page_load_timeout(3)
driver.get("http://mangafox.me/directory/")
elem = driver.find_element_by_id("searchform_name")
elem.send_keys(manga_name)
elem.send_keys(Keys.RETURN)
time.sleep(6)
driver.find_element_by_class_name("search_button").click()
time.sleep(1)
source = driver.page_source
driver.close()
return source
def get_html_manga_source(manga_name):
link = "http://mangafox.me/search.php?name_method=bw&name= " + manga_name + "&type=&author_method=cw&author=&artist_method=cw&artist=&genres[Action]=0&genres[Adult]=0&genres[Adventure]=0&genres[Comedy]=0&genres[Doujinshi]=0&genres[Drama]=0&genres[Ecchi]=0&genres[Fantasy]=0&genres[Gender+Bender]=0&genres[Harem]=0&genres[Historical]=0&genres[Horror]=0&genres[Josei]=0&genres[Martial+Arts]=0&genres[Mature]=0&genres[Mecha]=0&genres[Mystery]=0&genres[One+Shot]=0&genres[Psychological]=0&genres[Romance]=0&genres[School+Life]=0&genres[Sci-fi]=0&genres[Seinen]=0&genres[Shoujo]=0&genres[Shoujo+Ai]=0&genres[Shounen]=0&genres[Shounen+Ai]=0&genres[Slice+of+Life]=0&genres[Smut]=0&genres[Sports]=0&genres[Supernatural]=0&genres[Tragedy]=0&genres[Webtoons]=0&genres[Yaoi]=0&genres[Yuri]=0&released_method=eq&released=&rating_method=eq&rating=&is_completed=&advopts=1"
file = urllib.request.urlopen(link)
return file.read().decode(file.headers.get_content_charset())
def get_list_of_mangas(manga_name):
list_urls = []
# testing purposes...
# if path.isfile("test2.html"):
# fo = open("test.html", "r")
# soup = BeautifulSoup(fo.read(), 'html.parser')
# fo.close()
# else:
# source = get_html_manga_source(manga_name)
# soup = BeautifulSoup(source, 'html.parser')
# # test
# fo = open("test.html", "w")
# fo.write(source)
# fo.close()
source = get_html_manga_source(manga_name)
soup = BeautifulSoup(source, 'html.parser')
div = soup.find(id="mangalist")
mangalist = div.find("ul", "list")
manga_anchors = mangalist.find_all("a")
for anchor in manga_anchors:
if anchor.get('href').count('/') == 5:
if('http' not in anchor.get('href')):
list_urls.append('http:' + anchor.get('href'))
else:
list_urls.append(anchor.get('href'))
list_urls = set(list_urls)
return list_urls
def get_user_choice(list_urls):
choice = None
while 1:
num_options = print_formatted_urls(list_urls)
try:
choice = input("\nOption: ")
choice = int(choice)
if(choice in range(1, num_options + 1)):
return list_urls[choice - 1]
else:
print("Choose a number in the range...")
except NameError:
print("Try again...")
def get_mangas_in_range(crawler, manga_chosen, chapters, path=None):
range_mangas = map(int, chapters.split('-'))
range_mangas = list(range_mangas)
crawler.crawl_image_from_chapters(manga_chosen, range_mangas, path=path)
def get_number_of_chapters(manga_chosen):
chapters = []
main_manga_doc = requests.get(manga_chosen)
soup = BeautifulSoup(main_manga_doc.content, 'html.parser')
anchors = soup.find_all('a')
for a in anchors:
try:
if('tips' in a['class']):
if(len(a['href'].split('/')) == 8):
chapt_number = a['href'].split('/')[6][1::]
else:
chapt_number = a['href'].split('/')[5][1::]
if(chapt_number in chapters):
print("error.... repeated chapter")
else:
chapters.append(chapt_number)
except KeyError:
pass
return list(reversed(chapters))
def get_all_mangas(crawler, manga_chosen, path=None):
range_chapters = get_number_of_chapters(manga_chosen)
crawler.crawl_image_from_chapters(manga_chosen,
range_chapters,
path=path)
# manga_chosen: url to manga's main page.
def get_single_manga(crawler, manga_chosen, chapters, path):
crawler.crawl_image_from_chapter(manga_chosen, chapters, path=path)
# chapters: String, volumen: String, manga_name: String
def main_choose_manga(manga_name,
chapters=None,
volumen=None,
path=None,
all_manga=False):
root_dir = change_to_main_manga_dir(path) # root manga directory..
crawler = Crawler()
list_urls = list(get_list_of_mangas(manga_name))
manga_chosen = get_user_choice(list_urls)
# print(manga_chosen)
if volumen is not None:
# DOWNLOAD VOLUMEN
return -1
if chapters is None and all_manga is True:
# DOWNLOAD EVERY CHAPTER AVAILABLE...
# print(manga_chosen)
get_all_mangas(crawler, manga_chosen, path=root_dir)
return -1
elif '-' in chapters:
get_mangas_in_range(crawler, manga_chosen, chapters, path=root_dir)
return -1
else:
# DOWNLOAD A SINGLE CHAPTER.
name = manga_chosen.split('/')[4]
change_to_manga_dir(path, name, chapters)
get_single_manga(crawler, manga_chosen, chapters, path=root_dir)
return -1