Merge pull request selfteaching#1508 from Luchen1471/master

oneMoreTime1357 · web-flow · commit da6ce6dadfc7 · 2019-03-31T23:08:53.000+08:00
1900303 day8
diff --git a/19100303/Luchen1471/README.md b/19100303/Luchen1471/README.md
@@ -80,4 +80,10 @@ https://docs.python.org/zh-cn/3/tutorial/modules.html
 3.dir(xxx)列出xxx模块的定义。
 4.模块们打包，文件夹内容以__init__.py开头，各种方式载入。
 5.今天尝试中英文对照阅读，效率似乎比只读英文略高一点，但是感觉不习惯。
-6.今天内容感觉比较简单，但是还是遇到一个没解决的问题。计划交作业之后去巩固一下前两天没有好好研究的内容。
+6.今天内容感觉比较简单，但是还是遇到一个没解决的问题。计划交作业之后去巩固一下前两天没有好好研究的内容。
+
+20190331
+1.作业依旧用了同学的代码。（我挑选半天，选了一个最简洁明了的）
+2.很多作业里加载了jieba，但是我在git上搜索到这个第三方模块，应该试试把这个加载上，是对昨天内容的考查。
+3.进度有点卡在统计字频那一天。
+4.周末时间并没有安排得那么理想。
diff --git a/19100303/Luchen1471/main.py b/19100303/Luchen1471/main.py
@@ -1,4 +1,10 @@
-text =  ''' 
+# this is d6 excercise for modules
+# date : 2019.3.24
+# author by : qiming
+# modified by : Luchen
+# 确实还是别人的代码，上上次课的一个内容感觉不太熟悉，我的进度有点卡在统计字频这件事上，但是这两次的内容似乎不太复杂。
+
+text1 =  ''' 
 愚公移山
 太行，王屋二山的北面，住了一個九十歲的老翁，名叫愚公。二山佔地廣闊，檔住去路，使他 和家人往來極為不便。
 一天，愚公召集家人說：「讓我們各盡其力，剷平二山，開條道路，直通豫州，你們認為怎 樣？」
@@ -29,6 +35,10 @@
 Filled with admiration for Yugong, the Emperor of Heavens ordered two mighty gods to carry the mountains away.
 '''
 
-
+text = 0
+#text = text1
 from mymodule import stats_word
-print('合并词频统计结果： ', stats_word.stats_text(text))
+try : 
+    print('合并词频统计结果： ', stats_word.stats_text(text))
+except ValueError as ve :
+    print(ve)
diff --git a/19100303/Luchen1471/mymodule/stats_word.py b/19100303/Luchen1471/mymodule/stats_word.py
@@ -1,105 +1,39 @@
-# 示例字符串
-'''
-text =  
-The Zen of Python, by Tim Peters
+# this is d8 excercise for errors and exceptions
+# date : 2019.3.25
+# author by : qiming
+# Luchen：确实还是别人的代码，上上次课的一个内容感觉不太熟悉，我的进度有点卡在统计字频这件事上。
 
 
-Beautiful is better than ugly.
-Explicit is better than implicit.
-Simple is better than complex.
-Complex is better than complicated.
-Flat is better than nested.
-Sparse is better than dense.
-Readability counts.
-Special cases aren't special enough to break the rules.
-Although practicality beats purity.
-Errors should never pass silently.
-Unless explicitly silenced.
-In the face of ambxiguity, refuse the temptation to guess.
-There should be one-- and preferably only one --obvious way to do it.
-Although that way may not be obvious at first unless you're Dutch.
-Now is better than never.
-Although never is often better than *right* now.
-If the implementation is hard to explain, it's a bad idea.
-If the implementation is easy to explain, it may be a good idea.
-Namespaces are one honking great idea -- let's do more of those!
-
-Python是一种计算机程序设计语言。是一种动态的、面向对象的脚本语言，最初被设计用于编写自动化脚本(shell)，随着版本的不断更新和语言新功能的添加，越来越多被用于独立的、大型项目的开发。
-
-'''
 import collections
 import re
 
-def stats_text_en(string_en):
-    ''' 统计英文词频
-    
-    第一步：过滤英文字符，并将string拆分为list。
-    第二步：清理*-等标点符号。
-    第三步：使用collections库中的Counter函数进行词频统计并输出统计结果。
-    '''
-    #print("处理前的原始字符串\n\n",string_en)
-    result = re.sub("[^A-Za-z]", " ", string_en.strip())#把非A-Z和a-z的字符串全部去除掉
-    #print("处理后的结果\n\n",result)
-    newList = result.split( )
-    i=0
-    for i in range(0,len(newList)):
-        newList[i]=newList[i].strip('*-,.?!')
-        if newList[i]==' ': 
-            newList[i].remove(' ')
-        else:
-            i=i+1
-    print('英文单词词频统计结果： ',collections.Counter(newList),'\n')
-
-
-def stats_text_cn(string_cn):
-    ''' 统计中文汉字字频
-    
-    第一步：过滤汉字字符，并定义频率统计函数 stats()。
-    第二步：清除文本中的标点字符,将非标点字符组成新列表 new_list。
-    第三步：遍历列表，将字符同上一次循环中频率统计结果作为形参传给统计函数stats()。
-    第四步：统计函数在上一次统计结果基础上得出本次统计结果，赋值给newDict。
-    第五步：new_list遍历结束，输出倒序排列的统计结果。
-    '''
-    result1 = re.findall(u'[\u4e00-\u9fff]+', string_cn)
-    newString = ''.join(result1)
-
-    def stats(orgString, newDict) :
-        d = newDict
-        for m in orgString :
-            d[m] = d.get(m, 0) + 1
-        return d
-    
-    new_list = []
-    for char in newString :
-        cn = char.strip('-*、。，：？！……')
-        new_list.append(cn)
-    
-    words = dict()
-    for n in range(0,len(new_list)) :
-        words = stats(new_list[n],words)
-    newWords = sorted(words.items(), key=lambda item: item[1], reverse=True) 
-    print('中文汉字字频统计结果： ',dict(newWords))
-
-# 调用函数
-#stats_text_en(text)
-#stats_text_cn(text)
-
 def stats_text_en(en) :
-    ''' 英文词频统计'''
-    text_en = re.sub("[^A-Za-z]", " ", en.strip())
-    enList = text_en.split( )
-    return collections.Counter(enList)
+    ''' 1. 英文词频统计。
+        2. 参数类型检查，不为字符串抛出异常。
+    '''
+    if type(en) == str : 
+            text_en = re.sub("[^A-Za-z]", " ", en.strip())
+            enList = text_en.split( )
+            return collections.Counter(enList)
+    else : 
+            raise ValueError ('type of argumengt is not str')
 
-    
 def stats_text_cn(cn) :
-    ''' 汉字字频统计 '''
-    cnList = re.findall(u'[\u4e00-\u9fff]+', cn.strip())
-    cnString = ''.join(cnList)
-    return collections.Counter(cnString)
+    ''' 1. 汉字字频统计 
+        2. 参数类型检查，不为字符串抛出异常。
+    '''
+    if type(cn) == str : 
+            cnList = re.findall(u'[\u4e00-\u9fff]+', cn.strip())
+            cnString = ''.join(cnList)
+            return collections.Counter(cnString)
+    else :
+            raise ValueError ('type of argumengt is not str')
 
 def stats_text(text_en_cn) :
-    ''' 合并英汉词频统计 '''
-    return (stats_text_en(text_en_cn)+stats_text_cn(text_en_cn))
-
-#感觉有一个问题没太明白，我交完作业再看看……
-#还有，文件虽然有报错，但是好像不耽误调用
+    ''' 1. 合并英汉词频统计 
+        2. 参数类型检查，不为字符串抛出异常。
+    '''
+    if type(text_en_cn) == str : 
+            return (stats_text_en(text_en_cn)+stats_text_cn(text_en_cn))
+    else :
+            raise ValueError ('type of argumengt is not str')