File tree 2 files changed +47
-0
lines changed
2 files changed +47
-0
lines changed Original file line number Diff line number Diff line change
1
+ text = [2 ,3 ,4 ,5 ]
2
+
3
+ from mymodule .stats_word import stats_text_cn as cn
4
+ try : #添加一个try except捕获一次
5
+ cn (text )
6
+ except ValueError as Type_Error :
7
+ print (Type_Error )
8
+ print (cn (a ))
9
+
10
+
11
+
12
+
13
+
14
+
15
+
16
+
17
+
18
+
19
+
20
+
Original file line number Diff line number Diff line change
1
+ text = [2 ,3 ,4 ,5 ]
2
+ import re
3
+ import collections
4
+ def stats_text_cn (text ):
5
+ if type (text )!= str :
6
+ raise ValueError ("文本必须是字符串" )
7
+ p = re .compile (u'[\u4e00 -\u9fa5 ]' ) #匹配一组字符可以用方括号[]定义自己的字符分类。
8
+ a = re .findall (p ,text ) #找到text中匹配中文u'[\u4e00-\u9fa5]'
9
+ #re.findall遍历匹配,可以获取字符串中所有匹配的字符串,返回一个列表。
10
+ str = '' .join (a ) # ''.join()是字符串操作函数,常常用于字符连接操作。把list列表转为str字符串
11
+ print (collections .Counter (str )) #统计中文词频
12
+
13
+ def stats_text_en (text ):
14
+ if type (text )!= str :
15
+ raise ValueError ("文本必须是字符串" )
16
+ b = re .sub (r'[^A-Za-z]' ,' ' ,text ) #用正则表达式过滤出26个大小写英文字母。text中非字母的替换成空格。
17
+ list = b .split () #以空格分割,返回分割后字符串列表。
18
+ print (collections .Counter (list )) #统计单词词频
19
+
20
+ def stats_text (text ):
21
+ if type (text )!= str :
22
+ raise ValueError ("文本必须是字符串" )
23
+ stats_text_cn (text )
24
+ stats_text_en (text )
25
+
26
+
27
+ stats_text (text )
You can’t perform that action at this time.
0 commit comments