1
- def pareto ():
1
+ def pareto (data_list ):
2
2
"""
3
+ =IF (SUM (B$2:B8) > SUM(B$2:B$14)/2, (ABS( SUM(B$2:B$14)/2 - SUM(B$2:B7) ) / B8 ) * D8+C8,"")
4
+
3
5
Estimate the mean using grouped data, like we get
4
- from census age and income distributions
6
+ from census age and income distributions.
7
+
8
+ Takes a list of data, where each item contains a count of
9
+ the number of items that fall in that range, plus the
10
+ bottom end of the range and it's width.
11
+
12
+ Each item should look like this:
13
+ [COUNT, BOTTOM, TOP]
14
+
15
+ Here's a full example using age:
16
+
17
+ [ # count # base # width
18
+ [216350, 0, 5], # Under 5 years
19
+ [201692, 5, 5], # 5 to 9 years
20
+ [211151, 10, 5], # 10 to 14 years
21
+ [204986, 15, 5], # 15 to 19 years
22
+ [200257, 20, 5], # 20 to 24 years
23
+ [439047, 25, 10], # 25 to 34 years
24
+ [459664, 35, 10], # 35 to 44 years
25
+ [424775, 45, 10], # 45 to 54 years
26
+ [163492, 55, 5], # 55 to 59 years
27
+ [127511, 60, 5], # 60 to 64 years
28
+ [169552, 65, 10], # 65 to 74 years
29
+ [113693, 75, 10], # 75 to 84 years
30
+ [44661, 85, 10], # 85 years and over
31
+ ]
32
+
5
33
"""
6
- pass
34
+ counts = [float (i [0 ]) for i in data_list ]
35
+ bases = [float (i [1 ]) for i in data_list ]
36
+ widths = [float (i [2 ]) for i in data_list ]
37
+
38
+ # break early if we don't have data
39
+ if not counts or sum (counts ) == 0 :
40
+ return 0
41
+
42
+ # Find the group that has the median in it
43
+ # Which will be the group at which the sum of the
44
+ # cumulative counts is greater than the sum of all
45
+ # of the counts...
46
+ target = sum (counts ) / 2
47
+ cumulative_counts = 0
48
+ index = 0
49
+ while cumulative_counts <= target :
50
+ cumulative_counts += counts [index ]
51
+ index += 1
52
+
53
+ index -= 1
54
+ # Just to be verbose
55
+ median_group_count = counts [index ]
56
+ median_group_base = bases [index ]
57
+ median_group_width = widths [index ]
58
+ # Calculate the sum of all of the groups prior to
59
+ # the one that contains the median
60
+ previous_groups_sum = cumulative_counts - median_group_count
61
+
62
+ # Finally, calculate the median
63
+ median = median_group_base + ((sum (counts )/ 2 - previous_groups_sum )/ median_group_count ) * median_group_width
64
+ return median
0 commit comments