|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | + |
| 3 | +text= ''' |
| 4 | +The Zen of Python, by Tim Peters |
| 5 | +Beautiful is better than ugly. |
| 6 | +Explicit is better than implicit. |
| 7 | +Simple is better than complex. |
| 8 | +Complex is better than complicated. |
| 9 | +Flat is better than nested. |
| 10 | +Sparse is better than dense. |
| 11 | +Readability counts. |
| 12 | +Special cases aren't special enough to break the rules. |
| 13 | +Although practicality beats purity. |
| 14 | +Errors should never pass silently. |
| 15 | +Unless explicitly silenced. |
| 16 | +In the face of ambxiguity, refuse the temptation to guess. |
| 17 | +There should be one-- and preferably only one --obvious way to do |
| 18 | +it. |
| 19 | +Although that way may not be obvious at first unless you're Dutch. |
| 20 | +Now is better than never. |
| 21 | +Although never is often better than *right* now. |
| 22 | +If the implementation is hard to explain, it's a bad idea. |
| 23 | +If the implementation is easy to explain, it may be a good idea. |
| 24 | +Namespaces are one honking great idea -- let's do more of those! |
| 25 | +''' |
| 26 | +text1='''澎湃新闻9月26日从公安部交管局获悉, |
| 27 | + 国庆假期全国将再次迎来自驾出行高峰,高速公路、景区周边道路交通压 |
| 28 | + 力和安全风险将显著增大。近日,公安部对近年来国庆假期道路交通事故 |
| 29 | + 情况进行分析,对今年国庆假期交通安全形势进行研判,并向社会公众发 |
| 30 | + 出交通安全预警。 |
| 31 | + 近三年国庆假期道路交通事故情况显示,小客车及高速公路是事故预防的 |
| 32 | + 重点车辆和重点道路。从导致死亡事故的原因看,未按规定让行、无证驾 |
| 33 | + 驶、超速行驶肇事最多,酒驾醉驾、超速行驶、未按规定让行肇事导致死 |
| 34 | + 亡人数占比逐年增加。''' |
| 35 | +# def stats_text_en(text): |
| 36 | +# # text=input('请输入英文:\n') |
| 37 | +# count=10 |
| 38 | +# import collections |
| 39 | +# if type(text) is str: |
| 40 | +# x='*-,.!' #列出要替换的标点符号 |
| 41 | +# # words_dict={} #空字典 |
| 42 | +# for c in x: #按每个标点在X里面循环 |
| 43 | +# text=(text.replace(c,'')) #重置的text的值 |
| 44 | +# words=text.lower().split() #将text小写字母化后切片给words赋值 |
| 45 | +# r=collections.Counter(words).most_common(count) |
| 46 | +# # z=b.most_common(5) |
| 47 | +# # b=set(words) #将words去掉重复项赋值给B |
| 48 | +# # for word in b: #在B里的每个单词循环 |
| 49 | +# # d=words.count(word) #计算每个单词在words里出现的次数,赋值给D |
| 50 | +# # words_dict[word]=d #每个单词及出现的次数以字典的方式存储在words_dict |
| 51 | +# # e=sorted(words_dict.items(),key=lambda t:t[1],reverse=True) #将字典里的项目按值的大小降序排列 |
| 52 | +# print(r) #打印E |
| 53 | +# else: |
| 54 | +# raise ValueError('文本类型不是英文') |
| 55 | +# print(ValueError.__context__) |
| 56 | +# return |
| 57 | +# stats_text_en(text) |
| 58 | +# def stats_text_cn (text1): #定义函数stats_text_cn |
| 59 | +# # text1=input('请输入文字:\n') |
| 60 | +# count=10 |
| 61 | +# import collections |
| 62 | +# if type(text1) is str: |
| 63 | +# import re #导入模块 |
| 64 | +# i=r"[\u4e00-\u9fa5]" #unicode代码库 |
| 65 | +# j= re.compile(i) #将创建的模式对像赋值给J |
| 66 | +# k= re.findall(j,text1) #将与模式与文本匹配得到的unicode代码赋值给K |
| 67 | +# q=collections.Counter(k).most_common(count) |
| 68 | +# # dict={} #建空字典 |
| 69 | +# # for l in k: #在K内每个字循环 |
| 70 | +# # m=k.count(l) #计算每个字K里出现的次数赋值给M |
| 71 | +# # dict[l]=m #将每一项存入字典 |
| 72 | +# # q=sorted(dict.items(),key=lambda y:y[1],reverse=True) #字典项按出现次数降序 |
| 73 | +# print(q) #打印排序后的结果 |
| 74 | +# else: |
| 75 | +# raise ValueError('文本类型不是字符串') |
| 76 | +# print(ValueError.__context__) |
| 77 | +# return #返回 |
| 78 | +# stats_text_cn(text1) |
| 79 | + |
| 80 | +#def stats_text_en1 (text): |
| 81 | +# a = counter() |
| 82 | +# if type(text) is str: |
| 83 | +# x='*-,.!' #列出要替换的标点符号 |
| 84 | +# for c in x: #按每个标点在X里面循环 |
| 85 | +# text=text.replace(c,'')#重置的text的值 |
| 86 | +# words=text.lower().split() #将text小写字母化后切片给words赋值 |
| 87 | +# a=collections.Counter(words).most_common(10) |
| 88 | +# print(a) |
| 89 | +# return |
| 90 | +#stats_text_en1(text) |
| 91 | +# def tang_count (): |
| 92 | +# count=100 |
| 93 | +# import collections |
| 94 | +# import json |
| 95 | +# import re |
| 96 | +# with open('/Users/mayjiao/Documents/GitHub/selfteaching-python-camp/exercises/1901100169/D9/mymodule/tang300.json','r',encoding='utf-8') as f: |
| 97 | +# k=f.read() |
| 98 | +# j= re.compile(r"[\u4e00-\u9fa5]") |
| 99 | +# m= re.findall(j,k) |
| 100 | +# q=collections.Counter(m).most_common(count) |
| 101 | +# print(q) |
| 102 | +# return |
| 103 | +# tang_count() |
| 104 | + |
| 105 | + |
| 106 | + |
| 107 | +def stats_text_cn(url,count): |
| 108 | + from pyquery import PyQuery |
| 109 | + import requests |
| 110 | + import re |
| 111 | + import collections |
| 112 | + import jieba |
| 113 | + import string |
| 114 | + |
| 115 | + response = requests.get(url) |
| 116 | + document = PyQuery(response.text) |
| 117 | + content = document('#js_content').text() |
| 118 | + content1 = [x for x in jieba.cut(content) if len(x) >= 2] #使用精确模式分词 |
| 119 | + q=collections.Counter(content1).most_common(count) |
| 120 | + a=str(q) |
| 121 | +# print(a) |
| 122 | + |
| 123 | + # password = 'Dvnuars1238^_^' |
| 124 | + # recipients = '[email protected]' |
| 125 | +# yagmail.register(sender,password) |
| 126 | + return a |
| 127 | +#stats_text_cn() |
| 128 | +def send_mail(): |
| 129 | + import yagmail |
| 130 | + import getpass |
| 131 | + url='https://mp.weixin.qq.com/s/pLmuGoc4bZrMNl7MSoWgiA' |
| 132 | + sender = input('输入发件人邮箱:') |
| 133 | + password = getpass.getpass('输入发件人邮箱密码:') |
| 134 | + recipients = input('输入收件人邮箱:') |
| 135 | + content=stats_text_cn(url,100) |
| 136 | + yag=yagmail.SMTP(user=sender,password=password,host='smtp.163.com') |
| 137 | + yag.send(to=recipients,subject="1901100169 自学训练营17群 DAY11 MayJiao",contents=content) |
| 138 | + return |
| 139 | +send_mail() |
0 commit comments