Skip to content

Commit 0e20648

Browse files
committed
Day 12
2 days left
1 parent 38de41d commit 0e20648

File tree

5 files changed

+86
-0
lines changed

5 files changed

+86
-0
lines changed
Binary file not shown.
Binary file not shown.

19100202/Gdong24/d12/d11_training1.py

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#import package
2+
import requests
3+
import yagmail
4+
import getpass
5+
import stats_word
6+
from pyquery import PyQuery
7+
8+
9+
#设置发件人、登录密码、收件人
10+
sender = input('请输入发件人邮箱地址:')
11+
psw = input('请输入发件人邮箱登录密码:')
12+
recipient = input('请输入收件人邮箱地址:')
13+
smtp = 'smtp.qq.com'
14+
15+
#获取微信公众号文章
16+
response = requests.get('https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA')
17+
18+
#提取微信公众号正文
19+
document = PyQuery (response.text)
20+
content = document ('#js_content').text()
21+
#print(content)
22+
23+
24+
25+
#Day8内容,尝试引用
26+
try:
27+
print('前100的中文词频统计结果: ', stats_word.stats_text_cn(content)) #没有英文,直接调用的中文统计
28+
except:
29+
print("对象不是字符串类型!")
30+
31+
# 统计前100词频
32+
statList = stats_word.stats_text_cn(content)
33+
statString = ''.join(str(i) for i in statList)
34+
print(statString)
35+
36+
#将统计结果发送到
37+
#yagmail.SMTP(sender,psw,smtp).send(recipient,'19100202 gdong',statString)
38+

19100202/Gdong24/d12/d12_training2.py

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import stats_word
2+
import requests
3+
from pyquery import PyQuery
4+
from wxpy import *
5+
6+
def main():
7+
bot = Bot() #扫描二维码登陆微信
8+
my_friend = bot.friends() #回复对象为所有好友
9+
10+
@bot.register(msg_types=SHARING) #监听好友分享的消息
11+
def auto_reply(msg):
12+
response = requests.get(msg.url) # 分享网页msg.url
13+
document = PyQuery(response.text)
14+
content = document('#js_content').text() #d11
15+
result = stats_word.stats_text_cn(content,count=100)
16+
return result #将结果返回给好友
17+
18+
embed() #
19+
20+
if __name__=='__main__':
21+
main()

19100202/Gdong24/d12/stats_word.py

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#引用package
2+
import jieba
3+
import collections
4+
from collections import Counter
5+
import re
6+
7+
8+
#中文词频排序
9+
def stats_text_cn(text2):
10+
#建立列表 local varialbe
11+
cut_list = []
12+
word_list = []
13+
count_list = []
14+
15+
cn_pattern = re.compile(r'[\u4e00-\u9fa5]')
16+
text_cn = re.findall(cn_pattern, text2)
17+
18+
text_cut = ''.join(text_cn) #把筛选返回的list转为str
19+
20+
cut_list = jieba.cut(text_cut,cut_all=False) #使用jieba精准模式分词
21+
22+
for word in cut_list: #筛选大于等于2字的词
23+
if len(word) >= 2:
24+
word_list.append(word)
25+
#用counter优化输出(day9)
26+
count_list = Counter(word_list).most_common(100)
27+
return count_list

0 commit comments

Comments
 (0)