Skip to content

Commit 04186a4

Browse files
committed
no message
1 parent bd0ae29 commit 04186a4

File tree

3 files changed

+21
-12
lines changed

3 files changed

+21
-12
lines changed
Binary file not shown.

19100102/daweijian/mymodule/main.py

100755100644
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,7 @@
3333
'''
3434

3535
# 运行导入的函数
36-
stats_text(text)
36+
try:
37+
stats_text(text)
38+
except ValueError as ve:
39+
print(ve)

19100102/daweijian/mymodule/stats_word.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ def list_dict(l):
2727
b = True # 是否是英文
2828
for word in l:
2929
for char in word:
30-
if (char >= '\u0041' and char <= '\u005a') or (char >= '\u0061' and char <= '\u007a'): # 字符是英文
30+
if ('\u0041' <= char <= '\u005a') or ('\u0061' <= char <= '\u007a'): # 字符是英文
3131
b = True
3232
break
33-
else: #存在一个字符非英文 所以整个词非英文单词
33+
else: # 存在一个字符非英文 所以整个词非英文单词
3434
b = False
3535
break
3636
if (b):
@@ -42,11 +42,14 @@ def list_dict(l):
4242

4343

4444
def stats_text_en(s):
45-
s = cut_clean(s) # 切分字符串并清洗标点符号
46-
s_dict = list_dict(s) # 将tempiate转化为字典并统计词频
47-
# 对字典按照value值排序
48-
s_s_dict = sorted(s_dict.items(), key=lambda item: item[1], reverse=True)
49-
print(s_s_dict)
45+
if isinstance(s, str):
46+
s = cut_clean(s) # 切分字符串并清洗标点符号
47+
s_dict = list_dict(s) # 将tempiate转化为字典并统计词频
48+
# 对字典按照value值排序
49+
s_s_dict = sorted(s_dict.items(), key=lambda item: item[1], reverse=True)
50+
print(s_s_dict)
51+
else:
52+
raise ValueError("is not str")
5053
return s_s_dict
5154

5255

@@ -65,10 +68,13 @@ def cut_count_cn(c, regex): # 取出所有中文 是一个列表
6568

6669

6770
def stats_text_cn(s): # 定义检索中文函数
68-
regex = re.compile("(?x)(?: [\w -]+ | [\x80 -\xff]{3} )")
69-
words = cut_count_cn(s, regex)
70-
s_s_dict = sorted(words.items(), key=lambda item: item[1], reverse=True)
71-
print(s_s_dict)
71+
if isinstance(s, str):
72+
regex = re.compile("(?x)(?: [\w -]+ | [\x80 -\xff]{3} )")
73+
words = cut_count_cn(s, regex)
74+
s_s_dict = sorted(words.items(), key=lambda item: item[1], reverse=True)
75+
print(s_s_dict)
76+
else:
77+
raise ValueError("is not str")
7278
return s_s_dict
7379

7480

0 commit comments

Comments
 (0)