@@ -27,10 +27,10 @@ def list_dict(l):
27
27
b = True # 是否是英文
28
28
for word in l :
29
29
for char in word :
30
- if (char >= '\u0041 ' and char <= '\u005a ' ) or (char >= '\u0061 ' and char <= '\u007a ' ): # 字符是英文
30
+ if ('\u0041 ' <= char <= '\u005a ' ) or ('\u0061 ' <= char <= '\u007a ' ): # 字符是英文
31
31
b = True
32
32
break
33
- else : # 存在一个字符非英文 所以整个词非英文单词
33
+ else : # 存在一个字符非英文 所以整个词非英文单词
34
34
b = False
35
35
break
36
36
if (b ):
@@ -42,11 +42,14 @@ def list_dict(l):
42
42
43
43
44
44
def stats_text_en (s ):
45
- s = cut_clean (s ) # 切分字符串并清洗标点符号
46
- s_dict = list_dict (s ) # 将tempiate转化为字典并统计词频
47
- # 对字典按照value值排序
48
- s_s_dict = sorted (s_dict .items (), key = lambda item : item [1 ], reverse = True )
49
- print (s_s_dict )
45
+ if isinstance (s , str ):
46
+ s = cut_clean (s ) # 切分字符串并清洗标点符号
47
+ s_dict = list_dict (s ) # 将tempiate转化为字典并统计词频
48
+ # 对字典按照value值排序
49
+ s_s_dict = sorted (s_dict .items (), key = lambda item : item [1 ], reverse = True )
50
+ print (s_s_dict )
51
+ else :
52
+ raise ValueError ("is not str" )
50
53
return s_s_dict
51
54
52
55
@@ -65,10 +68,13 @@ def cut_count_cn(c, regex): # 取出所有中文 是一个列表
65
68
66
69
67
70
def stats_text_cn (s ): # 定义检索中文函数
68
- regex = re .compile ("(?x)(?: [\w -]+ | [\x80 -\xff ]{3} )" )
69
- words = cut_count_cn (s , regex )
70
- s_s_dict = sorted (words .items (), key = lambda item : item [1 ], reverse = True )
71
- print (s_s_dict )
71
+ if isinstance (s , str ):
72
+ regex = re .compile ("(?x)(?: [\w -]+ | [\x80 -\xff ]{3} )" )
73
+ words = cut_count_cn (s , regex )
74
+ s_s_dict = sorted (words .items (), key = lambda item : item [1 ], reverse = True )
75
+ print (s_s_dict )
76
+ else :
77
+ raise ValueError ("is not str" )
72
78
return s_s_dict
73
79
74
80
0 commit comments