Skip to content

Commit

Permalink
Adds more images/content to stats lecture.
Browse files Browse the repository at this point in the history
  • Loading branch information
arokem committed Apr 21, 2024
1 parent acabf1b commit d3307c8
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 60 deletions.
41 changes: 29 additions & 12 deletions slides/04-stats-with-big-data.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,38 @@ Data can be "big" in two different ways:

# Plotting your data

![](./images/loftus_plots.png)

# Using confidence intervals to assess results

![](./images/loftus_plot_with_error.png)

# Using confidence intervals to assess results

![](./images/loftus_comparison.png)

# Using confidence intervals to assess results

![](./images/loftus_lowry.png)

# Explicit models

- For example, the model that Dr. Loeb used.
- Analogies from physics are useful, because physical law provides mathematical formulations of relationships.
- Or "planned comparisons":
- Come up with a quantitative hypothesis
- Assign weights for each condition (to sum to 0)
- Compute the correlation between weights and conditional means.

# Computing to the rescue

::: {.fragment}

![](./images/computers_in_1983.png)

:::


# Resampling methods

- Jackknife
Expand Down Expand Up @@ -136,15 +164,6 @@ Invented by Bradley Efron
- And other complex procedures.
- Efron argues that this is the natural procedure Fisher et al. would have preferred in the 20's if they had computers.

# When Efron talks about computers

::: {.fragment}

He's talking about this:

![](./images/computers_in_1983.png)

:::

# Demo

Expand Down Expand Up @@ -173,14 +192,12 @@ by Tim Hesterberg.

# The curse of dimensionality

What about large $p$
What about large $p$?

- There be dragons


# Data is sparser in higher dimensions


# The distance between points increases rapidly

# When $p$ > $n$ multi-colinearity exists
Expand Down
94 changes: 46 additions & 48 deletions slides/code/stats_demo.r
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# and helped correct string formatting issues and debug broadcasting errors.

library(boot)
library(ggplot2)

# Function to calculate the mean:
calc_mean <- function(data, indices) {
Expand Down Expand Up @@ -93,79 +94,76 @@ cat(paste(
"Jackknife bias: ", jackknife_bias[ii], "\n", sep = ""))


# Next, bootstrapping
######################################################################
# Bootstrapping

# Use the boot function to perform bootstrapping
boot_results <- boot(data, calc_mean, R = 10000)

# Calculate the standard error
std_error <- sd(boot_results$t)
print(paste("Standard Error of the Mean: ", std_error))
sorted_boot_results <- sort(boot_results$t)

# Plot sorted results with ggplot
ggplot(data.frame(sorted_boot_results),
aes(x=seq_along(sorted_boot_results),
y = sorted_boot_results)) +
geom_line()

# Calculate standard error (central 68% of the distribution ):

ci <- quantile(sorted_boot_results, c(0.16, 0.84))
std_err <- 0.5 * (ci[2] - ci[1])
cat(paste(
"Standard Error of the Mean: ", std_error, "\n"))

# Calculate standard error (std of the distribution):
std_err <- sd(sorted_boot_results)
cat(paste(
"Standard Error of the Mean: ", std_error, "\n"))

# Calculate 95% CI:
ci <- quantile(sorted_boot_results, c(0.025, 0.975))

# Define the non-linear model function
nonlinear_model <- function(x, a, tau) {
1 - a * exp(x/tau)
cat(paste(
"95% Confidence Interval: ", ci[1], ci[2], "\n"))

# Define a non-linear model function
nonlinear_model <- function(x, alpha, beta, kappa) {
alpha - (alpha - beta) * exp(- x / kappa)
}


# Generate some sample data
set.seed(123)
x <- seq(-10, 10, length.out = 100)
y <- nonlinear_model(x, 2, 3) + rnorm(length(x), mean = 0, sd = 2)
x <- seq(1, 100, length.out = 100)
y <- nonlinear_model(x, 100, 10, 20) + rnorm(length(x), mean = 0, sd = 2)

ggplot(data.frame(x = x, y = y), aes(x = x, y = y)) +
geom_line()

# Define the function to estimate the parameters using the non-linear model
estimate_parameters <- function(data, indices) {
x <- data$X[indices]
y <- data$Y[indices]
fit <- nls(y ~ nonlinear_model(x, a, tau), start = list(a = 1, tau = 1))
fit <- nls(
y ~ nonlinear_model(x, alpha, beta, kappa),
start = list(
alpha = 100, beta = 10, kappa = 20))
coef(fit)
}

# Perform bootstrap estimation
boot_results <- boot(data = data.frame(X = x, Y = y), statistic = estimate_parameters, R = 1000)
boot_results <- boot(
data = data.frame(X = x, Y = y),
statistic = estimate_parameters, R = 10000)

# Get the bootstrap estimates of the parameters
bootstrap_estimates <- boot_results$t

# Calculate the variance of the bootstrap estimates
variance_estimates <- apply(bootstrap_estimates, 2, var)

# Print the variance estimates
print(variance_estimates)

# Plot the bootstrap distribution of the parameters with ggplot:
library(ggplot2)
bootstrap_df <- data.frame(a = bootstrap_estimates[,1], tau = bootstrap_estimates[,2])
ggplot(bootstrap_df, aes(x = a)) +
geom_histogram(binwidth = 0.1, fill = "lightblue", color = "black") +
labs(title = "Bootstrap Distribution of Parameter a") +
theme_minimal()


ggplot(bootstrap_df, aes(x = tau)) +
geom_histogram(binwidth = 0.1, fill = "lightblue", color = "black") +
labs(title = "Bootstrap Distribution of Parameter tau") +
theme_minimal()

# Calculate the confidence intervals for the parameters
conf_intervals <- boot.ci(boot_results[, 1], type = "bca", index = 1:2)
conf_intervals <- boot.ci(boot_results,
conf=0.95,
type = "prec")
print(conf_intervals)

# Jacknife example
# Generate a non-Gaussian distribution
set.seed(123)
data <- rexp(1000, rate = 0.5)


# Initialize a vector to store the jackknife estimates
jackknife_estimates <- numeric(length(data))


# Calculate with bootstrap:
# Define a function to calculate the mean
calc_mean <- function(data, indices) {
return(mean(data[indices]))
}

# Use the boot function to perform bootstrapping
boot_results <- boot(data, calc_mean, R = 1000)
Binary file added slides/images/loftus_comparison.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added slides/images/loftus_lowry.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added slides/images/loftus_plot_with_error.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added slides/images/loftus_plots.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit d3307c8

Please sign in to comment.