Skip to content

Commit 8129ca2

Browse files
committed
day13
1 parent 2fdc8d3 commit 8129ca2

File tree

4 files changed

+98
-2235
lines changed

4 files changed

+98
-2235
lines changed

19100401/congboqiu/d13/d13/main.py

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import re
2+
import jieba
3+
import getpass
4+
import requests
5+
import wxpy
6+
from pyquery import PyQuery
7+
from mymodule import stats_word
8+
9+
#导入模块wxpy,建微信机器人
10+
from wxpy import *
11+
bot=Bot(cache_path=True)
12+
my_friend=bot.friends().search('翕羊羊')[0]
13+
my_friend.send('你好,请分享篇文章给我')#发消息给朋友
14+
15+
16+
@bot.register(my_friend,msg_types='Sharing')
17+
def auto_reply(msg):
18+
print(msg)
19+
print(msg.url)
20+
response = requests.get(msg.url)
21+
document = PyQuery(response.text)
22+
content = document('#js_content').text()
23+
text=content
24+
25+
#引入变量words和count,为图像化函数做好参数引入
26+
dict_text=dict(stats_word.stats_text_cn(text,10)) #把结果转化字典
27+
key_words=list(dict_text.keys())#把字典的key和对应值转换为列表
28+
count_values=list(dict_text.values()) #把字典的value和对应值转换为列表
29+
30+
#用matplotlib把words和count图像化
31+
import matplotlib.pyplot as plt
32+
import numpy as np
33+
plt.rcdefaults()
34+
fig,ax = plt.subplots()
35+
plt.rcParams['font.sans-serif']=['SimHei']
36+
ax.barh(key_words,count_values, align = "center", color = "blue")
37+
ax.set_yticks(key_words)
38+
ax.set_yticklabels(key_words)
39+
ax.invert_yaxis()
40+
ax.set_xlabel(count_values)
41+
ax.set_title('你刚才所发文章的词频前10统计')
42+
43+
plt.savefig('result.png')
44+
msg.reply_image('result.png')
45+
46+
embed()
47+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
2+
import re
3+
import jieba
4+
from collections import Counter
5+
counter=20
6+
def stats_text_en(text, counter):
7+
8+
if not isinstance(text, str):
9+
raise ValueError("I can only handle a string type text")
10+
11+
# first removing "-", useless space, then changing all words into lower-case.
12+
text_p = text.replace("\-", " ").strip().lower()
13+
# don't forgeting the "\n", Otherwise it will bring some awkawk words
14+
i = re.compile("[^a-z \n]")
15+
text_en = i.sub("", text_p).split()
16+
17+
# using Sounter to create a dictionary sorted by frequency
18+
sort_en = Counter(text_en).most_common(counter)
19+
return sort_en
20+
21+
22+
def stats_text_cn(text, counter):
23+
24+
if not isinstance(text, str):
25+
raise ValueError("I can only handle a string type text")
26+
27+
# only chinese left and change into a list.
28+
text_cn = ""
29+
for t in text:
30+
if ord(t) > 256:#ASCII编码,去除英文及符号
31+
text_cn = text_cn + t
32+
else:
33+
text_cn = text_cn + ","
34+
list_cn_word = ([x for x in jieba.cut(text_cn, cut_all=False) if len(x) >= 2])
35+
36+
# using Sounter to create a dictionary sorted by frequency
37+
sort_cn = Counter(list_cn_word).most_common(counter)
38+
return sort_cn
39+
print(sort_cn)
40+
41+
def stats_text(text, counter):
42+
if not isinstance(text, str):
43+
raise ValueError("I can only handle a string type text")
44+
45+
sort_en = stats_text_en(text, counter)
46+
sort_cn = stats_text_cn(text, counter)
47+
48+
49+
50+
51+

0 commit comments

Comments
 (0)