R/Wednesday2.R at master · panmari/R · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# Date: Wednesday, August 8, 2012

##############################
### Binomial Distributions ###
##############################

# Examples for binomially distributed random
# variables:
# - Election polls: A random sample of n persons is
#   interviewed,
#      X = nr. of people supporting party ABC
#        ~ binom(n,p)
#   with p in [0,1]: unknown percentage of supporters
#   of ABC in the whole population.
# - cross-fertilization experiment: Two plants of
#   genotype Aa produce n children.
#      X = nr. of children of genotype aa
#        ~ binom(n,p),
#   presumably, p = 1/4 (Mendel's law), but maybe
#   some selection is going on.

help.search("binomial distribution")
? Binomial

### Binominal Distributions


# Illustrate binomial distributions
# with a multiple plot:

n <- 30
x <- 0:n

# Visualize binom(n,p) for 9 different values opf p:
par(mfcol=c(3,3))
pv <- seq(from=0.1,to=0.9,by=0.1)
for (j in 1:9)
{
	barplot(dbinom(x,n,pv[j]),names.arg=x,
		main=paste("p = ", as.character(pv[j])))
}

# There are many ways to modify plots.
? par
# One interesting parameter is "mai"
# ("ma(rgin sizes in) i(nches)").
# An example:
# same as css margins, less space between plots
par(mfcol=c(3,3),mai=c(0.3,0.3,0.5,0))
pv <- seq(from=0.1,to=0.9,by=0.1)
for (j in 1:9)
{
	barplot(dbinom(x,n,pv[j]),names.arg=x,
		main=paste("p = ", as.character(pv[j])))
}
# Another group of useful parameters:
# All parameters cex, cex.... which control
# the font size of axis labels, numbers etc.

# Computes confidence intervalls for p!
? binom.test

# Short explanation of the parameters:
# x : observation X
# n : sample size or number of trials
# p : a hypothetical value of the unknown p
#     to be tested (default: 0.5).
# alternative : optional parameter with
#     possible values
#     "two.sided" (default), "greater", "less".
# conf.level : confidence level for the
#     confidence interval or confidence bound
#     (default: 0.95).
#
# * alternative == "two.sided": The
#   null hypothesis that the unknown p equals
#   the specified value is tested, and a
#   confidence interval is computed.
# * alternative == "greater": The
#   null hypothesis that the unknown p is
#   less than or equal to the specified value
#   is tested, and a lower confidence bound
#   (confidence interval with upper limit 1)
#   is computed.
# * alternative == "less": The
#   null hypothesis that the unknown p is
#   greater than or equal to the specified value
#   is tested, and an upper confidence bound
#   (confidence interval with lower limit 0)
#   is computed.


# First examples for the use of binom.test:
# Interview n = 400 potential voters
# and determine number X of supporters of
# party ABC...

n <- 400
x <- 122

binom.test(x,n)
binom.test(x,n,conf.level=0.99)

# p-value = plausibility of null-hypothesis

# By default, binom.test() computes a
# confidence interval for the unknown p.
# If one is interested only in an upper bound
# or only in a lower bound, one has to use
# the optional input argument "alternative"

binom.test(x,n,alternative="less")
binom.test(x,n,alternative="greater")
binom.test(x,n,alternative="two.sided")

# Of the 60 attendees of this course,
# 8 people smoke regularly.
# We pretend that the 60 attendees are a
# random subsample of the population of
# all students in Bern, and p is the unknown
# proportion regular smokers within this
# population.
binom.test(8,60)
binom.test(8,60,conf.level=0.99)


# Second illustration of binom.test():
# Use "scan()" to generate and store a "random vector"
# containing about 70 entries in {1,2}:
data <- scan("")
# Question: Are you a reliable random generator?

# First test:
# We determine n = length(data) and
# X = number of entries equal to "1":
n <- length(data)
x <- sum(data == 1)
binom.test(x,n)

# Second test:
# We determine n2 = n-1 and
# x2 = number of indices i in {1,2,...,n-1}
#      such that data[i] != data[i+1]:
data[1:(n-1)] != data[2:n] # if two following numbers are not the same => true
n2 <- n-1
x2 <- sum(data[1:(n-1)] != data[2:n]) #how many times did the coin change immediately?
binom.test(x2,n2)


# Exercise for binom.test:

# Help analyzing the following (fictitious) data:
# * Party ABC is hoping for > 10% voters in the
#   upcoming election.
#   In a poll with n = 1000 people, X = 127 claimed to
#   support ABC. Does this confirm the party's hope?
# * In a cross-fertilization trial (see above),
#   n = 52 plants with parents of genotype Aa have been
#   grown, and X = 9 plants turned out to be of
#   genotype aa. Is
#      p = Prob(child = aa | parents are both Aa)
#        = 1/4?


binom.test(127, 1000, p=0.1, alternative="greater", conf.level=0.99)

binom.test(9,52, p=1/4, alternative="two.sided")

? binom.test


# Try to do this yourself, before reading on!

# For first example: working hypothesis (alternative)
# is that p > 0.1:

binom.test(127, 1000, p=0.1, alternative="greater")

# For second example: working hypothesis (alternative)
# is that p != 0.25, any deviation is possible.

binom.test(9, 52, p=0.25)
binom.test(9, 52, p=0.25, alternative="two.sided")

# Disappointed? Well, the variability of
# binom(52,0.25) is quite large:
barplot(dbinom(0:52,52,0.25),names.arg=0:52)

# If we would have had 5 times as many observations:

binom.test(45, 260, p=0.25, alternative="two.sided")
barplot(dbinom(0:260,260,0.25),names.arg=0:260)


# Final comment: Always try to work with
# confidence intervals or confidence bounds
# rather than tests!