-
Notifications
You must be signed in to change notification settings - Fork 0
/
BondadAjuste.R
222 lines (171 loc) · 6.57 KB
/
BondadAjuste.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
####===========================================================A.L.R.R.2021
# SCRIPT Probabilidad e Intervalos de confianza
####===========================================================####
# Set working directory
setwd(paste('/Users/adriana/Documents/default/',
'/deafult', sep = ""))
####===========================================================####
# 1. We are interested in the heights of the players of the two...
# ... basketball teams Brose Baskets Bamberg and Bayer Giants...
# ... Leverkusen as well as the football team SV Werder Bremen.
# Calculate a 95% confidence interval for all three teams and..
# ...interpret the results
# Size of the 2 basketball teams and the football team
bamberg_n <- 16
leverkusen_n <- 14
bremen_n <- 23
all_n <- bamberg_n + leverkusen_n + bremen_n # n > 30
# Mean height: mean of the mean of all teams
bamberg_mean <- 199.06
leverkusen_mean <- 196.00
bremen_mean <- 187.52
all_mean <- mean(c(bamberg_mean, leverkusen_mean, bremen_mean))
# Variance in height: mean of the variance of all teams
bamberg_sd <- 7.047
leverkusen_sd <- 9.782
bremen_sd <- 5.239
all_sd <- mean(c(bamberg_sd, leverkusen_sd, bremen_sd))
# Confidence quotient: Z alpha/2
alpha2 <- (1 - 0.95) / 2
z025 <- qnorm(alpha2, lower.tail = F)
# Standard Error
errorest <- all_sd / sqrt(all_n)
# Margin of error
margerr <- z025 * errorest
# Lower limit
liminf <- all_mean - margerr
# Upper limit
limsup <- all_mean + margerr
# Output
cbind("Lim_inf(cm)" = round(liminf, 2),
"Lim_sup(cm)" = round(limsup, 2))
####===========================================================####
# 2. A married couple tosses a coin after each dinner to...
# ...determine who has to wash the dishes. If the coin shows...
# ...head, then the husband has to wash the dishes, and if the...
# ...coin shows tails, then the wife has to wash the dishes...
# ...After 98 dinners, the wife notes that the coin has shown...
# ...heads 59 times.
# Estimate the probability that the wife has to wash the dishes.
print(paste("Dado que la decisión está basada en el",
"lanzamiento de una moneda, y los intentos de cada",
"lanzamiento son independientes el uno del otro,",
"la probabilidad de que la esposa tenga que lavar",
"los platos esta noche es de 0.5 o del 50%",
sep = " "))
# Calculate and interpret the 95% confidence interval for p.
# Binomial experiment: distribución muestral es aprox. normal
# con media p y error estándar SE = sqrt(pq/n)
n <- 98 # Total number of trials
q <- 59 # Total number of failures (when the husband gets it)
x <- n - q # Total number of successes (the wife gets it)
# Proporción muestral: mejor estimador puntual para proporción...
# ... * poblacional *
pgorro <- x / n # Mean probability of success
qgorro <- 1 - pgorro
# Confidence quotient
z025 <- qnorm(0.05 / 2, lower.tail = F)
# Lower Limit
liminf <- pgorro - (z025 * sqrt((pgorro * qgorro) / n))
# Upper Limit
limsup <- pgorro + (z025 * sqrt((pgorro * qgorro) / n))
# Output
cbind("Lim_inf(p)" = round(liminf, 2),
"Lim_sup(p)" = round(limsup, 2))
####===========================================================####
# 3. A random sample of 100 deaths recorded in the United States...
# ...last year revealed an average lifespan of 71.8 years....
# ...Assuming a population standard deviation of 8.9 years, does...
# ...this seem to indicate that the current half-life is greater...
# ...than 70 years? Use a significance level of 0.05.
# Datos
n <- 100
xbarra <- 71.8
sigma <- 8.9
signif <- 0.05
mu <- 70
H0 <- "mu = 70 years"
HA <- "mu > 70 years"
# Estadístico de prueba
Zest <- (xbarra - mu) / (sigma / sqrt(n))
# Valor crítico
Zcrit <- qnorm(signif, lower.tail = F)
# Prueba
if (Zest >= Zcrit) {
print(paste("Se rechaza la H0 de que",
H0),
sep = " ")
} else {
print(paste("No se rechaza la H0 de que",
H0),
sep = " ")
}
# Valor p específico
p <- pnorm(Zest, lower.tail = F)
# Plot
library(ggplot2)
randNorm <- rnorm(100)
randDensity <- dnorm(randNorm)
ggplot(data.frame(x = randNorm, y = randDensity)) +
aes(x = x, y = y) +
geom_point(
) +
labs(x = "Variable Normal Aleatoria", y = "Densidad"
) + geom_segment(aes(x = Zest, xend = Zest,
y = 0, yend = 0.06),
lty = 3, color = "blue") + geom_segment(
aes(x = Zcrit, xend = Zcrit,
y = 0, yend = 0.1),
lty = 3, color = "red")
ggsave("normal.jpeg")
####===========================================================####
# 4. What is Wilcoxon-Mann-Whitney (WMW) U-Test?
# ...Give an example using R
# Select an R data set
data(iris)
# Count number of data
nrow(iris)
# Check that the structure of the data
str(iris)
# Subselect less than 30 rows
iris_np <- iris[sample(nrow(iris), 27) ,]
row.names(iris_np) <- NULL
# Check number of data points in each factor...
# ...each factor will be assumed to be an independent
# ...sample
nrow(iris_np[which(iris_np$Species=="virginica"),])
nrow(iris_np[which(iris_np$Species=="setosa"),])
nrow(iris_np[which(iris_np$Species=="versicolor"),])
# Create two data frames to compare
virginica <- iris_np[which(iris_np$Species=="virginica"), ]
versicolor <- iris_np[which(iris_np$Species=="versicolor"),]
# Check normality in Petal Length
shapiro.test(virginica$Sepal.Length)
shapiro.test(versicolor$Sepal.Length)
# U-Mann-Whitney
wilcox.test(virginica$Sepal.Length, versicolor$Sepal.Length)
####===========================================================####
# 5. What is the goodness-of-fit test 𝟀2?
# ...Give an example using R
# Following the previous example, we can test whether the sub-
# ...sample we followed
virginica <- iris_np[which(iris_np$Species=="virginica"), ]
versicolor <- iris_np[which(iris_np$Species=="versicolor"),]
setosa <- iris_np[which(iris_np$Species == "setosa"),]
iris_expected <- c(50/150, 50/150, 50/150)
iris_observed <- c(nrow(virginica), nrow(versicolor), nrow(setosa))
resultado <- chisq.test(iris_observed, p = iris_expected)
resultado
# Con un ∝=0.05 y dos grados de libertad, se tiene:
chiqalpha <- qchisq(0.05, 2, lower.tail = F)
# Prueba
H0 <- "hay un buen ajuste entre las frecuencias esperadas y las observadas."
HA <- "hay un ajuste deficiente entre las frecuencias esperadas y las observadas."
if (resultado$statistic > chiqalpha) {
print(paste("Se rechaza la H0 de que",
H0),
sep = "")
} else {
print(paste("No se rechaza H0. Por lo tanto, se asume que ",
H0, sep = ""))
}