-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathmovieBot.py
427 lines (342 loc) · 13.7 KB
/
movieBot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
#encoding:utf-8
'''
TODO : 智能搜索
THE WECHAT FUNCTION IN UNDER CONSTRUCTION, USE IT CAREFULLY !
power by Wyatt
2019/3/15
The WeChat Movie Bot, automatically send the movie resource BaiDu Cloud resource link,
the search engine is based on fqsousou.com, and the wechat engine is itchat
Enjoy It !
'''
######### 初始化开始 #########
mode_init = 1 #微信机器人初始状态,1表示开启,0则相反
bot_name = 'Wyatt电影机器人beta'
adv = 'Power By Wyatt\nAccuracy search based on Baidu Validate' #若不想加广告,赋 adv=''
get_movie_number = 5 #获取资源数量
validate_resource_max = 10 #验证资源链接的最大数量,若不想使用此功能,赋值为0
get_hot_number = 5 #获取热门电影的个数,如果为0,则不获取
use_secrete_ip = 1 #是否用隐藏ip
error_dic = ['百度网盘-链接不存在','关注公众号获取资源','获取资源加'] #百度网盘关键词黑名单
send_online_watch_address = 5 # 发送在线观看链接的个数,0为不发送
baidu_short_link_token = '' # https://dwz.cn/console/userinfo 申请百度短网址的token,测试:9860706e562a94413cc57f7076da665f
######### 初始化结束 #########
import requests as rq
import random
from lxml import etree
import itchat
import os
from lxml.html import fromstring
import json
# constant
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36'}
if use_secrete_ip:
try:
ipLib = []
url = 'https://www.xicidaili.com/nn/'
r = etree.HTML(rq.get(url, headers=header).content)
# //*[@id="ip_list"]/tr[2]/td[2]
# //*[@id="ip_list"]/tr[3]/td[2]
# //*[@id="ip_list"]/tr[101]/td[2]
for i in range(2, 101):
ip = r.xpath('//*[@id="ip_list"]/tr[' + str(i) + ']/td[2]/text()')[0]
type = r.xpath('//*[@id="ip_list"]/tr[' + str(i) + ']/td[6]/text()')[0]
port = r.xpath('//*[@id="ip_list"]/tr[' + str(i) + ']/td[3]/text()')[0]
ipLib.append([ip,type,port])
except Exception as e:
print('匿名ip获取失败!:' + str(e))
os._exit(0)
else:
ipLib = []
def get_an_ip():
global ipLib
if ipLib == []:
return {}
choice = random.choice(ipLib)
return {choice[1]:choice[0] + ':' + choice[2]}
def short(original_link):
host = 'https://dwz.cn'
path = '/admin/v2/create'
url = host + path
content_type = 'application/json'
token = baidu_short_link_token
bodys = {'url': original_link}
# 配置headers
headers = {'Content-Type': content_type, 'Token': token}
# 发起请求
try:
response = rq.post(url=url, data=json.dumps(bodys), headers=headers, proxies=get_an_ip())
except Exception:
return original_link
if json.loads(response.text)['Code'] == 0:
return json.loads(response.text)['ShortUrl']
else:
return original_link
def validate_resource(test_url):
global header,\
error_dic
r = rq.get(test_url,headers=header, proxies=get_an_ip()).content
tree = fromstring(r)
title = tree.findtext('.//title')
for forbidden_parameter in error_dic:
if forbidden_parameter in title:
return 0
return 1
def get_online_resource(movie_name):
resource = []
global header
global send_online_watch_address
r = rq.get('http://ifkdy.com/?q='+ movie_name,headers=header,proxies=get_an_ip()).content.decode()
tree = etree.HTML(r)
for i in range(1,send_online_watch_address):
try:
r = short(tree.xpath('/html/body/div[2]/div[1]/ul/li['+ str(i) +']/a/@href')[0])
except IndexError:
break
resource.append(r)
return resource
def gain_link(movie_name):
global \
header,\
get_movie_number,\
validate_resource_max
c = rq.get('https://www.fqsousou.com/s/' + movie_name + '.html',headers=header, proxies=get_an_ip()).content.decode()
tree_r = etree.HTML(c)
r = []
init = 1
count = 0
incorrect = []
while True:
try:
rr = {}
xpath = '/html/body/div[3]/div/div/div[2]/div[1]/div[2]/ul/li[' + str(init) + ']/a'
treer = tree_r.xpath(xpath)
rr['name'] = treer[0].attrib.get('title')
rr['naive_link'] = treer[0].attrib.get('href')
r.append(rr)
init += 1
# 这里不用打印error
except Exception:
break
# 分析二级域名
def ana_naive_link(naive_link):
c = rq.get(naive_link,headers=header, proxies=get_an_ip()).content
xpath_link = '/html/body/div[3]/div/div/div/div[1]/div[1]/div[3]/p/a[2]'
xpath_type = '/html/body/div[3]/div/div/div/div[1]/div[1]/div[2]/dl/dt[2]/label/text()'
xpath_size = '/html/body/div[3]/div/div/div/div[1]/div[1]/div[2]/dl/dt[3]/label/text()'
c = etree.HTML(c)
movie_link = short(c.xpath(xpath_link)[0].attrib.get('href'))
movie_type = c.xpath(xpath_type)[0]
if not movie_type == '文件夹':
movie_size = c.xpath(xpath_size)[0]
else:
movie_size = ''
return [movie_link, movie_type, movie_size]
# 分析 movie number limit
if len(r) < get_movie_number:
gain_num_limit = len(r)
else:
gain_num_limit = get_movie_number
# 遍历naive数组进一步分析资源地址
for i in range(len(r)):
try:
resource = ana_naive_link('https://www.fqsousou.com/' + r[i]['naive_link'])
# 有test_validate
if validate_resource_max:
if not validate_resource(resource[0]):
print('ok -- but validate error')
incorrect.append(i)
if i >= validate_resource_max:
break
continue
# 如果没有test_validate 或 validate 正常
# 注意pop后需要挪位
r[i]['link'] = resource[0]
if resource[1] == '':
r[i]['type'] = '未知'
else:
r[i]['type'] = resource[1]
if resource[2] == '':
r[i]['size'] = '未知'
else:
r[i]['size'] = resource[2]
print('ok')
count += 1
if count >= gain_num_limit:
break
except Exception as e:
print('fail: ' + str(e))
incorrect.append(i)
continue
# 倒序排列,以免出现pop错 index 的情况
for i in sorted(incorrect,reverse=True):
r.pop(i)
# 最好参数不要带 gain_num_limit,以应对全 fail 的情况出现
return r[:count]
# 微信机器人功能
def start_wechat_bot():
global bot_name
#如果是在服务器运行,auto_login 加上参数 enableCmdQR=2
itchat.auto_login(hotReload=True)
# initialize
rcv = 'filehelper'
itchat.send('成功接入'+ bot_name +'服务端!\n发送开启以开启服务',rcv)
friend = itchat.get_friends()
myName = friend[0]['UserName']
def send_error_report(desc,error):
itchat.send(desc+ '\n错误类型:'+ str(error),rcv)
# 配置装饰器
@itchat.msg_register(itchat.content.TEXT)
def main(msg):
# 导入初始化值
global mode_init,\
get_hot_number,\
adv,\
send_online_watch_address
# return para: FromUserName ToUserName Content
if msg['ToUserName'] == rcv:
# 配置功能
if msg['Content'] == '开启':
mode_init = 1
itchat.send('已开启机器人',rcv)
if msg['Content'] == '状态':
if mode_init:
itchat.send('已开启机器人\n发送关闭以关闭机器人',rcv)
else:
itchat.send('未开启机器人',rcv)
if msg['Content'] == '关闭':
mode_init = 0
itchat.send('已关闭机器人\n发送开启以启动机器人', rcv)
if msg['Content'] == '测试':
try:
beautiful_input(gain_link('我'))
itchat.send('搜索模块正常!',rcv)
except Exception as e:
send_error_report('搜索模块错误!',e)
try:
beautiful_input_for_hot_movie(get_hot())
itchat.send('热门模块正常!',rcv)
except Exception as e:
send_error_report('热门模块错误!',e)
# 对外功能
if mode_init:
if msg['Content'][:2] == '搜索':
# 防止自己不能搜索
if msg['FromUserName'] == myName:
msg['FromUserName'] = rcv
itchat.send(bot_name + '正在搜索,请稍等。。。', msg['FromUserName'])
try:
r = gain_link(msg['Content'][2:])
if not r == []:
re = beautiful_input(r)
itchat.send(re, msg['FromUserName'])
else:
# 如果没有检索
itchat.send('已检索到10个相关资源,但是 Baidu Validate 系统排除有 10 个垃圾资源')
# 如果检索错误
except Exception as e:
itchat.send('对不起,不能找到您想搜索的资源', msg['FromUserName'])
send_error_report('搜索模块错误,未能成功完成检索',e)
# 获取在线看地址
try:
if send_online_watch_address:
r = get_online_resource(msg['Content'][2:])
if not r == []:
re = '在线看地址:\n'
for i in r:
re = re + short(i) + '\n=====================\n'
itchat.send(re, msg['FromUserName'])
# 如果错误
except Exception as e:
send_error_report('在线看模块错误,未能成功完成检索', e)
# 热门获取模块
try:
if get_hot_number:
itchat.send(beautiful_input_for_hot_movie(r=get_hot()),msg['FromUserName'])
except Exception as e:
send_error_report('热门模块错误,未能成功完成检索',e)
# 如果 adv 不为空
if not adv == '':
try:
itchat.send(str(adv),msg['FromUserName'])
except Exception as e:
send_error_report('广告模块错误',e)
# 开始运行
itchat.run()
# 热门功能
def get_hot():
hot_list = []
global \
header,\
get_hot_number
# 不要加 proxies,不然会很慢
c = rq.get('http://58921.com/boxoffice/live',headers=header,proxies=get_an_ip()).content.decode()
r = etree.HTML(c)
for i in range(1,get_hot_number + 1):
xpath = '//*[@id="content"]/div/table/tbody/tr['+ str(i+1) +']/td[1]/a/text()'
try:
hot_list.append(r.xpath(xpath)[0])
except Exception as e:
print(e)
continue
return hot_list
def beautiful_input(r):
re = '百度云链接:\n=====================\n'
for i in r:
re = re + '资源名:' + i['name'] + '\n' + '资源类型:' + i['type'] + '\n' \
'资源大小:' + i['size'] + '\n云盘地址:' + i[
'link'] + '\n=====================\n'
return re
def beautiful_input_for_hot_movie(r):
re = ''
count = 1
for i in r:
re = re + str(count) + '. ' + i + '\n'
count += 1
return '为您推荐目前最热的电影:\n' + re
def state_config():
'''
mode_init = 0
get_movie_number = 5
validate_resource_max = 0
get_hot_number = 5
use_secrete_ip = 0
'''
state = ''
if mode_init == 0:
print('微信机器人初始化状态为:关闭')
else:
print('微信机器人初始化状态为:开启')
print('获取电影资源的数目为:' + str(get_movie_number))
if validate_resource_max:
print('最大百度云资源验证数目为:'+ str(validate_resource_max))
if validate_resource_max == 0:
print('未启用资源验证系统')
if get_hot_number:
print('获取最热电影数目:' + str(get_hot_number))
if not get_hot_number:
print('未启用最热电影系统')
if use_secrete_ip:
print('使用ip为:隐秘ip' )
else:
print('未开启隐秘IP')
if adv == '':
print('未开启广告投放功能')
else:
print('广告: ' + adv)
def help():
print(
'欢迎使用 MovieBot 姬,以下是你可以调用的函数:\n'
'get_an_ip() -------------------- 从IP库里面随机获取一个高匿IP\n'
'short(url) -------------------- 缩短网址\n'
'validate_resource(url) --------- 检测百度云资源可不可用\n'
'get_online_resource(movie) ------- 获取在线看资源\n'
'gain_link(movie) --------------- 获取资源\n'
'start_wechat_bot() ------------- 开启微信服务\n'
'get_hot() ---------------------- 获取热门电影\n'
'beautiful_input(get_link) ------ 美化 get_link 的输出\n'
'beautiful_input_for_hot_movie()- 美化 get_hot 输出\n'
'state_config() ----------------- 打印易懂的 config\n'
'help() ------------------------- 打印帮助\n'
'\nWARNING: 在使用上述函数前,请先对初始化块进行赋值'
)