From Wikipedia, the free encyclopedia
rm(list=ls())
# Boostrap Algorithm
bootstrap <- function(data, B, theta, ...) {
n <- NROW(data) # number of observed samples
cols <- NCOL(data) # number of columns in data frame
boot <- list() # data structure to be returned
boot$B <- B
boot$data <- data
boot$theta.hat <- theta(data, ...)
# generate and store random sample of indices
ind <- sample(1:n, n*B, replace=T)
boot$indices <- matrix(ind, ncol=n, byrow=T)
# make 3-D array of bootstrap sample data
boot$samples <- aperm(array( t(data[ind,]), c(cols,n,B) ), c(2,1,3))
# compute theta.star of each bootstrap data sample
boot$replicates <- apply(boot$samples, 3, theta, ...)
# compute bootstrap estimate of standard error
if(!is.vector(boot$replicates)) {
boot$sderror <- sqrt(apply(boot$replicates,1,var))
} else {
boot$sderror <- sqrt(var(boot$replicates))
}
# compute bootstrap estimate of bias
p.stars <- aperm(apply(boot$indices, 1, tabulate, n)/n, c(2,1))
p.bar <- apply(p.stars, 2, mean)
theta.bar <- theta(data, wt=p.bar)
boot$bias <- mean(boot$replicates) - theta.bar
class(boot) <- "bootstrap" # object-orientation
return(boot)
}
# theta-hat is prediction of number of cars to be burned overnight
theta.hat <- function(boot.data, wt=NULL) {
data = data.frame(day=1:6,cars=c(40,315,596,897,1295,1408))
data$cars <- data$cars + boot.data[,2]
model <- lm(cars ~ day, data, weights=wt)
prediction <- predict(model,newdata=data.frame(day=7))
return(prediction)
}
# rioting data from French Ministry of Interior (number of vehicles burned)
riot.data <- data.frame(day=1:6,cars=c(40,315,596,897,1295,1408))
# fit linear model
model <- lm(cars ~ day, riot.data)
B <- 250 # number of bootstrap replications to do
boot.data <- data.frame(index=1:6, residuals=model$residuals) # boot residuals
boot <- bootstrap(boot.data, B, theta=theta.hat) # replicate predictions
# plot histogram of replicates
library(MASS)
truehist(boot$replicates,
main="Possible number of vehicles that will be burned on Nov 7",
xlab="bootstrap replicates of linear extrapolation",
ylab="observed frequency",
sub=c(paste("standard error =", round(boot$sderror, digits=2),
", bias = ", round(boot$bias,digits=2))), col="light grey")