|
| 1 | +# 今天學到不同統計量之間特性, |
| 2 | +# 試著分析男生女生身高資料, |
| 3 | +# 試著回答下面的問題: |
| 4 | +# Q1:試著用今天所教的內容,如何描述這兩組資料的樣態? |
| 5 | +# Q2: 請問男生和女生在平均身高上誰比較高? |
| 6 | +# Q3:請問第二題的答案和日常生活中觀察的一致嗎? 如果不一致,你覺得原因可能為何? |
| 7 | +# 上述問題透過 python 語法進行運算, 並將上述答案填寫在 (google 表單)[https://docs.google.com/forms/d/e/1FAIpQLSdDzwpeJl8YLPwZaW8pBZvtuXY9kIbbZLqxcXyzFaoraV5JEA/viewform ] |
| 8 | + |
| 9 | +# library |
| 10 | +import matplotlib.pyplot as plt |
| 11 | +import numpy as np |
| 12 | +import pandas as pd |
| 13 | +from scipy import stats |
| 14 | +import math |
| 15 | +import statistics |
| 16 | +import seaborn as sns |
| 17 | + |
| 18 | +#輸入資料 |
| 19 | +boys=[164, 176, 169, 169, 165, 175, 159, 151, 144, 160, 183, 165, 156, 170, 164, 173, 165, 163, 177, 171] |
| 20 | +print(sorted(boys)) |
| 21 | +# 計算統計量_平均數的方法 |
| 22 | +mean_boy=np.mean(boys) |
| 23 | +print("男孩身高平均=",mean_boy) |
| 24 | + |
| 25 | +statistics_mean_boy=statistics.mean(boys) |
| 26 | +print("statistics_mean_boy=",statistics_mean_boy) |
| 27 | + |
| 28 | + |
| 29 | +# 計算統計量_中位數的方法 |
| 30 | +np_median_boy=np.median(boys,axis=None) |
| 31 | +print("np_median_boy=",np_median_boy) |
| 32 | + |
| 33 | +statistics_median_boy=statistics.median(boys) |
| 34 | +print("statistics_median_boy=",statistics_median_boy) |
| 35 | + |
| 36 | +# 統計量_眾數 |
| 37 | +# 統計量的眾數,如果有多個眾數,取最小的值當眾數。 |
| 38 | + |
| 39 | +mode_boy=stats.mode(boys,axis=None) |
| 40 | +print("男孩身高眾數=",mode_boy) |
| 41 | +print("男孩身高眾數=",mode_boy[0][0]) |
| 42 | + |
| 43 | +# 統計量_眾數 |
| 44 | +statistics_mode_boy=statistics.mode(boys) |
| 45 | +print("statistics_mode_boy=",statistics_mode_boy) |
| 46 | + |
| 47 | +#全距 |
| 48 | +#rangeV=max(boys)-min(boys) |
| 49 | +def rangeV(x): |
| 50 | + return(max(x)-min(x)) |
| 51 | + |
| 52 | +print(rangeV(boys)) |
| 53 | + |
| 54 | +# 計算變異數的方法 |
| 55 | +print("男孩身高變異數=",statistics.variance(boys)) |
| 56 | +print("男孩身高變異數=",np.var(boys,ddof=1)) |
| 57 | + |
| 58 | +# 統計量_標準差的方法 |
| 59 | +#樣本標準差 |
| 60 | +#ddof=1, 回傳 sample standard deviation 樣本標準差,分母(n-1),無偏估計 |
| 61 | +std_boy=np.std(boys,ddof=1) |
| 62 | +print("男孩身高標準差=",std_boy) |
| 63 | + |
| 64 | +statistics_stdev_boy=statistics.stdev(boys) |
| 65 | +print("statistics_mean_boy=",statistics_stdev_boy) |
| 66 | + |
| 67 | +# python 百分位數 |
| 68 | +#np |
| 69 | +print("90百分位數=",np.percentile(boys, 90)) |
| 70 | +print("50百分位數=",np.percentile(boys, 50)) |
| 71 | +print("20百分位數=",np.percentile(boys, 20)) |
| 72 | +#stat |
| 73 | +print("20百分位數=",stats.scoreatpercentile(boys, 20)) |
| 74 | + |
| 75 | +#計算峰度和偏度 |
| 76 | +print(stats.skew(boys)) |
| 77 | +print(stats.kurtosis(boys)) |
| 78 | + |
| 79 | +# pandas和 stat 接近 |
| 80 | +# python的峰帶 |
| 81 | + |
| 82 | +#最後,畫圖看分布 |
| 83 | +plt.hist(boys,alpha=.4,bins=40) |
| 84 | +plt.title('boy,skewness={0},kurtosis={1}'.format(round(stats.skew(boys),2),round(stats.kurtosis(boys),2))) |
| 85 | +plt.axvline(x=mean_boy) |
| 86 | +plt.show() |
0 commit comments