Skip to content

Commit 5fa0eb3

Browse files
Arya21121srvz
authored andcommitted
[1901100083]自学训练营15群 DAY7 work (#6204)
* Create 1001S02E05_string.py * Create 1001S02E05_stats_text.py * Create 1001S02E05_array.py * Create 1001S02E06_stats_word.py * Create stats_word.py * Create main.py * Create stats_word.py * Create main.py
1 parent 7b36308 commit 5fa0eb3

File tree

2 files changed

+123
-0
lines changed

2 files changed

+123
-0
lines changed

exercises/1901100083/d08 work/main.py

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from mymodule import stats_word
2+
import traceback
3+
import logging
4+
5+
logger = logging.getLogger(__name__)
6+
7+
def test_traceback():
8+
try:
9+
stats_word.stats_text(1)
10+
except Exception as e:
11+
print('test_traceback =>',e)
12+
print(traceback.format_exc())
13+
14+
def test_logger():
15+
try:
16+
stats_word.stats_text(1)
17+
except Exception as e:
18+
# print('test_logger =>', e)
19+
logger.exception(e)
20+
21+
if __name__ == "__main__":
22+
# stats_word.stats_text(1)
23+
test_traceback()
24+
test_logger()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# 统计参数中每个英文单词出现的次数
2+
def stats_text_en(text):
3+
if not isinstance(text, str):
4+
raise ValueError('参数必须是 str 类型,输入类型 %s' % type(text))
5+
elements = text.split()
6+
words = []
7+
symbols = ',.*-!'
8+
for element in elements:
9+
for symbol in symbols:
10+
element = element.replace(symbol, '')
11+
if len(element) and element.isascii():
12+
words.append(element)
13+
counter = {}
14+
word_set = set(words)
15+
16+
for word in word_set:
17+
counter[word] = words.count(word)
18+
return sorted(counter.items(), key=lambda x: x[1], reverse=True)
19+
20+
21+
# 统计参数中每个中文汉字出现的次数
22+
def stats_text_cn(text):
23+
if not isinstance(text, str):
24+
raise ValueError('参数必须是 str 类型,输入类型 %s' % type(text))
25+
cn_characters = []
26+
for character in text:
27+
if '\u4e00' <= character <= '\u9fff':
28+
cn_characters.append(character)
29+
counter = {}
30+
cn_character_set = set(cn_characters)
31+
for character in cn_character_set:
32+
counter[character] = cn_characters.count(character)
33+
return sorted(counter.items(), key=lambda x: x[1], reverse=True)
34+
35+
36+
37+
def stats_text(text):
38+
'''
39+
合并英文词频和中文字频的结果
40+
'''
41+
if not isinstance(text, str):
42+
raise ValueError('参数必须是 str 类型,输入类型 %s' % type(text))
43+
return stats_text_en(text) + stats_text_cn(text)
44+
45+
46+
47+
48+
en_text = '''
49+
The Zen of Python, by Tim Peters
50+
51+
Beautiful is better than ugly.
52+
Explicit is better than implicit.
53+
Simple is better than complex.
54+
Complex is better than complicated.
55+
Flat is better than nested.
56+
Sparse is better than dense.
57+
Readability counts.
58+
Special cases aren't special enough to break the rules.
59+
Although practicality beats purity.
60+
Errors should never pass silently.
61+
Unless explicitly silenced.
62+
In the face of ambxiguity, refuse the temptation to guess.
63+
There should be one-- and preferably only one --obvious way to do it.
64+
Although that way may not be obvious at first unless you're Dutch.
65+
Now is better than never.
66+
Although never is often better than *right* now.
67+
If the implementation is hard to explain, it's a bad idea.
68+
If the implementation is easy to explain, it may be a good idea.
69+
Namespaces are one honking great idea -- let's do more of those!
70+
'''
71+
72+
cn_text = '''
73+
Python 之禅 by Tim Peters
74+
75+
优美胜于丑陋
76+
明了胜于晦涩
77+
简洁胜于复杂
78+
复杂胜于凌乱
79+
扁平胜于嵌套
80+
间隔胜于紧凑
81+
可读性很重要
82+
即便假借特里的实用性之名,也不可违背这些规则
83+
不要包容所有错误,除非你确定需要这样做
84+
当存在多种可能,不要尝试去猜测
85+
而是尽量找一种,最好是唯一一种明显的解决方案
86+
虽然这并不容易,因为你不是 Python 之父
87+
做也许好过不做,但不假思索就动手还不如不做
88+
。。。
89+
'''
90+
91+
# 搜索 __name__ == __main__
92+
if __name__ == '__main__':
93+
en_result = stats_text_en(en_text)
94+
cn_result = stats_text_cn(cn_text)
95+
print('统计参数中每个英文单词出现的次数 ==>\n', en_result)
96+
print('统计参数中每个中文汉字出现的次数 ==>\n', cn_result)
97+
98+
99+

0 commit comments

Comments
 (0)