|
15 | 15 | import statistics
|
16 | 16 | import seaborn as sns
|
17 | 17 |
|
18 |
| -#輸入資料 |
19 |
| -boys=[164, 176, 169, 169, 165, 175, 159, 151, 144, 160, 183, 165, 156, 170, 164, 173, 165, 163, 177, 171] |
20 |
| -print(sorted(boys)) |
| 18 | +## 輸入資料 |
| 19 | +boys=[164, 176, 169, 169, 165, 175, 159, 151, 144, 160, 183, 165, 156, 170, |
| 20 | + 164, 173, 165, 163, 177, 171] |
| 21 | + |
| 22 | +girls=[169, 183, 170, 168, 182, 170, 173, 185, 151, 156, 162, 169, 162, 181, |
| 23 | + 159, 154, 167, 175, 170, 160] |
| 24 | + #Q1:試著用今天所教的內容,如何描述這兩組資料的樣態? |
| 25 | +#暗示: 分別計算男生和女生的統計量 |
21 | 26 | # 計算統計量_平均數的方法
|
22 | 27 | mean_boy=np.mean(boys)
|
23 | 28 | print("男孩身高平均=",mean_boy)
|
24 |
| - |
25 |
| -statistics_mean_boy=statistics.mean(boys) |
26 |
| -print("statistics_mean_boy=",statistics_mean_boy) |
27 |
| - |
| 29 | +mean_girl=np.mean(girls) |
| 30 | +print("女孩身高平均=",mean_girl) |
28 | 31 |
|
29 | 32 | # 計算統計量_中位數的方法
|
30 | 33 | np_median_boy=np.median(boys,axis=None)
|
31 |
| -print("np_median_boy=",np_median_boy) |
32 |
| - |
33 |
| -statistics_median_boy=statistics.median(boys) |
34 |
| -print("statistics_median_boy=",statistics_median_boy) |
35 |
| - |
36 |
| -# 統計量_眾數 |
37 |
| -# 統計量的眾數,如果有多個眾數,取最小的值當眾數。 |
| 34 | +print("男孩身高中位數=",np_median_boy) |
| 35 | +np_median_girl=np.median(girls,axis=None) |
| 36 | +print("女孩身高中位數=",np_median_girl) |
38 | 37 |
|
| 38 | +#計算統計量_眾數 |
39 | 39 | mode_boy=stats.mode(boys,axis=None)
|
40 |
| -print("男孩身高眾數=",mode_boy) |
41 | 40 | print("男孩身高眾數=",mode_boy[0][0])
|
42 | 41 |
|
43 |
| -# 統計量_眾數 |
44 |
| -statistics_mode_boy=statistics.mode(boys) |
45 |
| -print("statistics_mode_boy=",statistics_mode_boy) |
| 42 | +mode_girl=stats.mode(girls,axis=None) |
| 43 | +print("女孩身高眾數=",mode_girl[0][0]) |
46 | 44 |
|
47 |
| -#全距 |
48 |
| -#rangeV=max(boys)-min(boys) |
| 45 | +#計算全距: |
49 | 46 | def rangeV(x):
|
50 | 47 | return(max(x)-min(x))
|
51 | 48 |
|
52 |
| -print(rangeV(boys)) |
| 49 | +print("男孩身高全距=",rangeV(boys)) |
| 50 | +print("女孩身高全距=",rangeV(girls)) |
53 | 51 |
|
54 |
| -# 計算變異數的方法 |
55 |
| -print("男孩身高變異數=",statistics.variance(boys)) |
| 52 | +#計算變異數 |
56 | 53 | print("男孩身高變異數=",np.var(boys,ddof=1))
|
| 54 | +print("男孩身高變異數=",np.var(girls,ddof=1)) |
57 | 55 |
|
58 |
| -# 統計量_標準差的方法 |
59 |
| -#樣本標準差 |
60 |
| -#ddof=1, 回傳 sample standard deviation 樣本標準差,分母(n-1),無偏估計 |
| 56 | + |
| 57 | +#計算標準差 |
61 | 58 | std_boy=np.std(boys,ddof=1)
|
62 | 59 | print("男孩身高標準差=",std_boy)
|
| 60 | +std_girl=np.std(girls,ddof=1) |
| 61 | +print("女孩身高標準差=",std_girl) |
63 | 62 |
|
64 |
| -statistics_stdev_boy=statistics.stdev(boys) |
65 |
| -print("statistics_mean_boy=",statistics_stdev_boy) |
66 |
| - |
67 |
| -# python 百分位數 |
| 63 | +## python 百分位數 |
68 | 64 | #np
|
69 |
| -print("90百分位數=",np.percentile(boys, 90)) |
70 |
| -print("50百分位數=",np.percentile(boys, 50)) |
71 |
| -print("20百分位數=",np.percentile(boys, 20)) |
72 |
| -#stat |
73 |
| -print("20百分位數=",stats.scoreatpercentile(boys, 20)) |
| 65 | +print("男孩身高90百分位數=",np.percentile(boys, 90)) |
| 66 | +print("男孩身高50百分位數=",np.percentile(boys, 50)) |
| 67 | +print("男孩身高20百分位數=",np.percentile(boys, 20)) |
| 68 | + |
| 69 | +print("女孩身高90百分位數=",np.percentile(girls, 90)) |
| 70 | +print("女孩身高50百分位數=",np.percentile(girls, 50)) |
| 71 | +print("女孩身高20百分位數=",np.percentile(girls, 20)) |
| 72 | + |
74 | 73 |
|
75 | 74 | #計算峰度和偏度
|
76 |
| -print(stats.skew(boys)) |
77 |
| -print(stats.kurtosis(boys)) |
| 75 | +print("男孩身高偏度=",stats.skew(boys)) |
| 76 | +print("男孩身高峰度=",stats.kurtosis(boys)) |
78 | 77 |
|
79 |
| -# pandas和 stat 接近 |
80 |
| -# python的峰帶 |
| 78 | +print("女孩身高偏度=",stats.skew(girls)) |
| 79 | +print("女孩身高峰度=",stats.kurtosis(girls)) |
81 | 80 |
|
82 |
| -#最後,畫圖看分布 |
| 81 | +# https://kknews.cc/zh-tw/code/3ngaz5a.html |
| 82 | +#最後,畫圖看分布 | boy's skew and kurtosis |
83 | 83 | plt.hist(boys,alpha=.4,bins=40)
|
84 | 84 | plt.title('boy,skewness={0},kurtosis={1}'.format(round(stats.skew(boys),2),round(stats.kurtosis(boys),2)))
|
85 | 85 | plt.axvline(x=mean_boy)
|
86 | 86 | plt.show()
|
| 87 | + |
| 88 | +#最後,畫圖看分布 | girl's skew and kurtosis |
| 89 | +plt.hist(girls,alpha=.4,bins=40,color=sns.desaturate("indianred", .8)) |
| 90 | +plt.title('girl,skewness={0},kurtosis={1}'.format(round(stats.skew(girls),2),round(stats.kurtosis(girls),2))) |
| 91 | +plt.axvline(x=mean_girl,color=sns.desaturate("indianred", .8)) |
| 92 | +plt.show() |
| 93 | + |
| 94 | +plt.hist(boys,alpha=.4) |
| 95 | +plt.hist(girls,color=sns.desaturate("indianred", .8),alpha=.4) |
| 96 | +plt.title("all samples") |
| 97 | +plt.axvline(x=mean_girl,color=sns.desaturate("indianred", .8)) |
| 98 | +plt.axvline(x=mean_boy) |
| 99 | +plt.show() |
| 100 | + |
| 101 | +#Q1答案: 所以從分布看的出來,女生的平均身高較高,且資料較為集中,男生的資料較為分散。 |
| 102 | + |
| 103 | +#Q2: 請問男生和女生在平均身高上誰比較高? |
| 104 | +# 計算統計量_平均數的方法 |
| 105 | +mean_boy=np.mean(boys) |
| 106 | +print("男孩身高平均=",mean_boy) |
| 107 | +mean_girl=np.mean(girls) |
| 108 | +print("女孩身高平均=",mean_girl) |
| 109 | +#女生平均而言比較高 |
| 110 | + |
0 commit comments