Talk:2005 French urban violence/stats

From Wikipedia, the free encyclopedia

rm(list=ls())

# Boostrap Algorithm 
bootstrap <- function(data, B, theta, ...) {
       n <- NROW(data)    # number of observed samples
        cols <- NCOL(data) # number of columns in data frame
        boot <- list()     # data structure to be returned

        boot$B <- B 
        boot$data <- data 
        boot$theta.hat <- theta(data, ...)
         
        # generate and store random sample of indices
        ind <- sample(1:n, n*B, replace=T)
        boot$indices <-  matrix(ind, ncol=n, byrow=T)

        # make 3-D array of bootstrap sample data
        boot$samples <- aperm(array( t(data[ind,]), c(cols,n,B) ), c(2,1,3))
        # compute theta.star of each bootstrap data sample
        boot$replicates <- apply(boot$samples, 3, theta, ...)

        # compute bootstrap estimate of standard error
        if(!is.vector(boot$replicates)) { 
                boot$sderror <- sqrt(apply(boot$replicates,1,var))
        } else {
                boot$sderror <- sqrt(var(boot$replicates))
        }
        
        # compute bootstrap estimate of bias
        p.stars <- aperm(apply(boot$indices, 1, tabulate, n)/n, c(2,1))
        p.bar <-  apply(p.stars, 2, mean) 
        theta.bar <- theta(data, wt=p.bar)
        boot$bias <- mean(boot$replicates) - theta.bar               

        class(boot) <- "bootstrap" # object-orientation      
        return(boot)    
}


# theta-hat is prediction of number of cars to be burned overnight
theta.hat <- function(boot.data, wt=NULL) {
        data = data.frame(day=1:6,cars=c(40,315,596,897,1295,1408))
        data$cars <- data$cars + boot.data[,2]
        model <- lm(cars ~ day, data, weights=wt)
        prediction <- predict(model,newdata=data.frame(day=7))
        return(prediction)
}

# rioting data from French Ministry of Interior (number of vehicles burned)
riot.data <- data.frame(day=1:6,cars=c(40,315,596,897,1295,1408))

# fit linear model
model <- lm(cars ~ day, riot.data) 

B <- 250 # number of bootstrap replications to do
boot.data <- data.frame(index=1:6, residuals=model$residuals) # boot residuals
boot <- bootstrap(boot.data, B, theta=theta.hat) # replicate predictions 

# plot histogram of replicates
library(MASS)
truehist(boot$replicates, 
         main="Possible number of vehicles that will be burned on Nov 7",
          xlab="bootstrap replicates of linear extrapolation",
          ylab="observed frequency",
          sub=c(paste("standard error =", round(boot$sderror, digits=2),
               ", bias = ", round(boot$bias,digits=2))), col="light grey")