Skip to content

Commit 6ec2afe

Browse files
committed
19100102 d12作业
1 parent 0aa0b0f commit 6ec2afe

File tree

1 file changed

+91
-0
lines changed

1 file changed

+91
-0
lines changed

19100102/jynbest6066/d12_training2.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
from wxpy import *
2+
import d11_training1
3+
import requests
4+
import collections
5+
import pyquery
6+
from pyquery import PyQuery
7+
import collections
8+
from os import path
9+
import json
10+
import re
11+
'''导入jieba'''
12+
import jieba
13+
14+
bot = Bot()
15+
16+
my_friend = bot.friends().search('嘻嘻')[0]
17+
my_friend.send('Hello WeChat!')
18+
19+
20+
def stats_text_en(text):
21+
#dict1 = {}
22+
import re
23+
''' 保留英文单字 '''
24+
en_pattern = re.compile(r'[a-zA-Z]+[\'\-]?[a-zA-Z]+')
25+
list1 = re.findall(en_pattern, text)
26+
27+
return list1
28+
29+
def stats_text_cn(text):
30+
#dict1 = {}
31+
''' 保留中文单字 '''
32+
cn_pattern = re.compile(r'[\u4e00-\u9fa5]')
33+
return "".join(re.findall(cn_pattern, text))
34+
35+
'''调用collections的Counter函数'''
36+
#cnt = collections.Counter()
37+
#for word in list1:
38+
#cnt[word] += 1
39+
40+
def cut_cnwords(text):
41+
list2=[]
42+
#non_word_char = '"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏'
43+
#non_word_char += string.punctuation + string.whitespace
44+
#trans = str.maketrans({key: None for key in non_word_char})
45+
#text = text.translate(trans)
46+
seg_list = jieba.cut(text, cut_all=True)
47+
48+
for i in seg_list:
49+
if len(i)>=2:
50+
list2.append(i)
51+
return list2
52+
53+
def stats_text(text):
54+
'''使用isinstance函数验证输入的参数类型是否为str'''
55+
if isinstance(text, str) != True:
56+
'''用raise语句来引发异常'''
57+
raise ValueError
58+
else:
59+
return stats_text_en(text),stats_text_cn(text)
60+
61+
def main(text):
62+
'''提取所有英文单字'''
63+
enwords = stats_text_en(text)
64+
'''提取所有中文单字'''
65+
cnwords = stats_text_cn(text)
66+
'''分词'''
67+
cutcnwords = cut_cnwords(cnwords)
68+
return enwords+cutcnwords
69+
70+
@bot.register(msg_types=SHARING)#监听好友分享的消息
71+
def auto_reply(msg):
72+
print(msg)
73+
if msg.type == "Sharing":
74+
75+
url = msg.url
76+
77+
'''使用requests的get获取网址全部内容'''
78+
response = requests.get(url)
79+
'''使用pyquery提取网址正文内容'''
80+
document = pyquery.PyQuery(response.text)
81+
content = document('#js_content').text()
82+
words = main(content)
83+
#print(words)
84+
85+
top100 = collections.Counter(words).most_common(100)
86+
str100 = ','.join([str(x) for x in top100])
87+
88+
print(str100)
89+
my_friend.send(str100)
90+
91+
embed()

0 commit comments

Comments
 (0)