Skip to content

Commit 916e3a5

Browse files
committed
Create 1001S01E06_stats_word.py
1 parent a6d67a2 commit 916e3a5

File tree

1 file changed

+84
-0
lines changed

1 file changed

+84
-0
lines changed
+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
text='''
2+
The Zen of Python, by Tim Peters
3+
Beautiful is better than ugly.
4+
Explicit is better than implicit.
5+
Simple is better than complex.
6+
Complex is better than complicated.
7+
Flat is better than nested.
8+
Sparse is better than dense.
9+
Readability counts.
10+
Special cases aren't special enough to break the rules.
11+
Although practicality beats purity.
12+
Errors should never pass silently.
13+
Unless explicitly silenced.
14+
In the face of ambxiguity, refuse the temptation to guess.
15+
There should be one-- and preferably only one --obvious way to do it.
16+
Although that way may not be obvious at first unless you're Dutch.
17+
Now is better than never.
18+
Although never is often better than *right* now.
19+
If the implementation is hard to explain, it's a bad idea.
20+
If the implementation is easy to explain, it may be a good idea.
21+
Namespaces are one honking great idea -- let's do more of those!
22+
23+
乡愁 余光中
24+
小时候,
25+
乡愁是一枚小小的邮票,
26+
我在这头,
27+
母亲在那头。
28+
长大后,
29+
乡愁是一张窄窄的船票,
30+
我在这头,
31+
新娘在那头。
32+
后来啊,
33+
乡愁是一方矮矮的坟墓,
34+
我在外头,
35+
母亲在里头。
36+
而现在,
37+
乡愁是一湾浅浅的海峡,
38+
我在这头,
39+
大陆在那头。
40+
'''
41+
42+
import re
43+
fuhao=",.!-*&,。" #去除字符串中所有除单词和汉字以外的符号
44+
for str in fuhao:
45+
text=text.replace(str,'')
46+
print(text)
47+
48+
#创建一个名为stats_text_en的函数
49+
#使用字典(dicict)统计字符串样本text中各个英文单词出现的次数
50+
51+
def stats_text_en(text):
52+
'''统计单词次数.
53+
使用字典(dict)统计text中每个英文单词出现的次数.'''
54+
result=re.sub("[^A-Za-z]"," ",text.strip())
55+
dic={}
56+
for x in result.split():
57+
if not x in dic:
58+
dic[x]=1
59+
else:
60+
dic[x]=dic[x]+1
61+
return dic
62+
#print(stats_text_en(text))
63+
frequency = stats_text_en(text)
64+
print('**********************************************')
65+
print("按照出现次数从大到小输出所有的单词及出现的次数")
66+
print('**********************************************')
67+
print(sorted(frequency.items(),key=lambda frequency:frequency[1],reverse=True))
68+
69+
#创建一个名为stats_text_cn的函数,功能:统计每个中文汉字出现的次数
70+
def stats_text_cn(text):
71+
'''统计汉字次数.
72+
73+
使用字典(dict)统计text中每个汉字出现的次数.'''
74+
dictionary={} #引用一个空字典
75+
for i in text:
76+
if u'\u4e00' <= i <= u'\u9fa5': #提取中文汉字 \u是unincode编码,u4e00是十六进制表达值
77+
dictionary[i]=text.count(i)
78+
return dictionary
79+
#print(stats_text_cn(text))
80+
frequency = stats_text_cn(text)
81+
print('**********************************************')
82+
print("按照出现次数从大到小输出所有的汉字及出现的次数")
83+
print('**********************************************')
84+
print (sorted(frequency.items(), key=lambda frequency: frequency[1],reverse=True))

0 commit comments

Comments
 (0)