1
+ text = '''
2
+ The Zen of Python, by Tim Peters
3
+ Beautiful is better than ugly.
4
+ Explicit is better than implicit.
5
+ Simple is better than complex.
6
+ Complex is better than complicated.
7
+ Flat is better than nested.
8
+ Sparse is better than dense.
9
+ Readability counts.
10
+ Special cases aren't special enough to break the rules.
11
+ Although practicality beats purity.
12
+ Errors should never pass silently.
13
+ Unless explicitly silenced.
14
+ In the face of ambxiguity, refuse the temptation to guess.
15
+ There should be one-- and preferably only one --obvious way to do it.
16
+ Although that way may not be obvious at first unless you're Dutch.
17
+ Now is better than never.
18
+ Although never is often better than *right* now.
19
+ If the implementation is hard to explain, it's a bad idea.
20
+ If the implementation is easy to explain, it may be a good idea.
21
+ Namespaces are one honking great idea -- let's do more of those!
22
+
23
+ 乡愁 余光中
24
+ 小时候,
25
+ 乡愁是一枚小小的邮票,
26
+ 我在这头,
27
+ 母亲在那头。
28
+ 长大后,
29
+ 乡愁是一张窄窄的船票,
30
+ 我在这头,
31
+ 新娘在那头。
32
+ 后来啊,
33
+ 乡愁是一方矮矮的坟墓,
34
+ 我在外头,
35
+ 母亲在里头。
36
+ 而现在,
37
+ 乡愁是一湾浅浅的海峡,
38
+ 我在这头,
39
+ 大陆在那头。
40
+ '''
41
+
42
+ import re
43
+ fuhao = ",.!-*&,。" #去除字符串中所有除单词和汉字以外的符号
44
+ for str in fuhao :
45
+ text = text .replace (str ,'' )
46
+ print (text )
47
+
48
+ #创建一个名为stats_text_en的函数
49
+ #使用字典(dicict)统计字符串样本text中各个英文单词出现的次数
50
+
51
+ def stats_text_en (text ):
52
+ '''统计单词次数.
53
+ 使用字典(dict)统计text中每个英文单词出现的次数.'''
54
+ result = re .sub ("[^A-Za-z]" ," " ,text .strip ())
55
+ dic = {}
56
+ for x in result .split ():
57
+ if not x in dic :
58
+ dic [x ]= 1
59
+ else :
60
+ dic [x ]= dic [x ]+ 1
61
+ return dic
62
+ #print(stats_text_en(text))
63
+ frequency = stats_text_en (text )
64
+ print ('**********************************************' )
65
+ print ("按照出现次数从大到小输出所有的单词及出现的次数" )
66
+ print ('**********************************************' )
67
+ print (sorted (frequency .items (),key = lambda frequency :frequency [1 ],reverse = True ))
68
+
69
+ #创建一个名为stats_text_cn的函数,功能:统计每个中文汉字出现的次数
70
+ def stats_text_cn (text ):
71
+ '''统计汉字次数.
72
+
73
+ 使用字典(dict)统计text中每个汉字出现的次数.'''
74
+ dictionary = {} #引用一个空字典
75
+ for i in text :
76
+ if u'\u4e00 ' <= i <= u'\u9fa5 ' : #提取中文汉字 \u是unincode编码,u4e00是十六进制表达值
77
+ dictionary [i ]= text .count (i )
78
+ return dictionary
79
+ #print(stats_text_cn(text))
80
+ frequency = stats_text_cn (text )
81
+ print ('**********************************************' )
82
+ print ("按照出现次数从大到小输出所有的汉字及出现的次数" )
83
+ print ('**********************************************' )
84
+ print (sorted (frequency .items (), key = lambda frequency : frequency [1 ],reverse = True ))
0 commit comments