Skip to content

Commit d223e3a

Browse files
committed
no message
1 parent 04186a4 commit d223e3a

File tree

4 files changed

+2260
-13
lines changed

4 files changed

+2260
-13
lines changed
Binary file not shown.

19100102/daweijian/mymodule/main.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# 通过stats_word导入stats_text模块
2+
import json
3+
24
from stats_word import stats_text
35

46
text = '''
@@ -34,6 +36,10 @@
3436

3537
# 运行导入的函数
3638
try:
37-
stats_text(text)
39+
with open('tang300.json', 'r', encoding='utf-8') as f:
40+
d = json.load(f)
41+
s = json.dumps(d, indent=2, ensure_ascii=False)
42+
count = 10
43+
stats_text(s, count)
3844
except ValueError as ve:
3945
print(ve)

19100102/daweijian/mymodule/stats_word.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# 封装d5的代码
22
import re
3+
from collections import Counter
34

45

56
def clean_ip_list(words): # 清理掉空格 标点符号
@@ -41,16 +42,18 @@ def list_dict(l):
4142
return cadiz
4243

4344

44-
def stats_text_en(s):
45+
def stats_text_en(s, count):
4546
if isinstance(s, str):
4647
s = cut_clean(s) # 切分字符串并清洗标点符号
4748
s_dict = list_dict(s) # 将tempiate转化为字典并统计词频
48-
# 对字典按照value值排序
49-
s_s_dict = sorted(s_dict.items(), key=lambda item: item[1], reverse=True)
50-
print(s_s_dict)
49+
# 用Counter对数组按照value值排序
50+
c_dict = Counter(s_dict)
51+
# 找出频率最多的前count名
52+
c_dict = c_dict.most_common(count)
53+
print(c_dict)
5154
else:
5255
raise ValueError("is not str")
53-
return s_s_dict
56+
return c_dict
5457

5558

5659
def cut_count_cn(c, regex): # 取出所有中文 是一个列表
@@ -67,18 +70,21 @@ def cut_count_cn(c, regex): # 取出所有中文 是一个列表
6770
return cadiz
6871

6972

70-
def stats_text_cn(s): # 定义检索中文函数
73+
def stats_text_cn(s, count): # 定义检索中文函数
7174
if isinstance(s, str):
7275
regex = re.compile("(?x)(?: [\w -]+ | [\x80 -\xff]{3} )")
7376
words = cut_count_cn(s, regex)
74-
s_s_dict = sorted(words.items(), key=lambda item: item[1], reverse=True)
75-
print(s_s_dict)
77+
# 用Counter对数组按照value值排序
78+
c_dict = Counter(words)
79+
# 找出频率最多的前count名
80+
c_dict = c_dict.most_common(count)
81+
print(c_dict)
7682
else:
7783
raise ValueError("is not str")
78-
return s_s_dict
84+
return c_dict
7985

8086

8187
# 定义stats_text函数
82-
def stats_text(s):
83-
stats_text_cn(s) # 导入stats_text_cn函数
84-
stats_text_en(s) # 导入stats_text_en函数
88+
def stats_text(s, count):
89+
stats_text_cn(s, count) # 导入stats_text_cn函数
90+
stats_text_en(s, count) # 导入stats_text_en函数

0 commit comments

Comments
 (0)