# Kevin Dunn, May 2015, Creative Commons Zero

# Global analysis options; current values are as used in the CEEA June 2015 paper
set.seed(2)
trim.tails <- FALSE
robust.reg <- FALSE  # If using robust regression then don't trim the tails.
bootstrap  <- TRUE   # Use a bootstrap method to determine the regression slope

# Run the following code once:
#install.packages(c('gdata', 'data.table', 'car', 'boot'))

# Please note, results might differ slightly on your computer, however, they will
# not be much different to those presented in the paper.

for (i in 1:8) {
  require(gdata)
  library(gdata)
  # Download the data from http://yint.org/unlimited-time-tests
  # and save as an Excel file. Use an R library to read in this data.
  data <- read.xls('Unlimited-time--Raw-data.xlsx', sheet=i)
  colnames(data) <- c("Time", "Grade", "Details")
  
  # Re-order the rows by "Time" order
  library(data.table)
  data <- data.table(data)
  data[order(data$Time)]
  
  plot(data$Time, data$Grade, 
       xlab="Time duration [minutes]", 
       ylab="Student's score [percentage]",
       main=paste("Student's grades as a function of time written, for test", i)
  )
  
  # Now trim the first 2.5% and the last 2.5% of the data
  # Not used in the analysis for the CEEA paper, May/June 2015
  if(trim.tails){
    N <- dim(data)[1]
    trim.N <- ceiling(N*0.025)  
    data <- data[-seq(1, trim.N), ] 
    N <- dim(data)[1]
    data <- data[-seq(N, N-trim.N+1, by=-1), ] 
  }
  
  if(robust.reg){
    library(MASS)
    model <- rlm(data$Grade ~ data$Time)
  }else{
    model <- lm(data$Grade ~ data$Time)  
  }
  print("---------------------")
  print(i)
  print(summary(model))
  library(car)
  qqPlot(model)  # Ensure the residuals are in fact all normally distributed
  
  # Code based on that from: http://www.statmethods.net/advstats/bootstrapping.html
  library(boot)
  coefficient <- function(formula, data, indices) {
    d <- data[indices,]
    fit <- lm(formula, data=d)
    return(as.numeric(fit$coefficients[2]))
  } 
  
  SE <- function(formula, data, indices) {
    d <- data[indices,]
    fit <- lm(formula, data=d)
    return(summary(fit)$sigma)
  } 
  
  # Bootstrap with 10000 replications.
  # Change the "statistic" from "coefficient" if you'd like to generate 
  # different bootstrap statistics (e.g. for standard error)
  results <- boot(data=data, statistic=coefficient, 
                  R=10000, formula=Grade~Time)
  hist(results$t, xlab=expression('Slope coefficient, b'[T]), 
       main=paste("Bootstrap slope coefficient for test", i)
       )
  
  print(boot.ci(results, type="bca"))   # get 95% confidence interval 
  abline(v=boot.ci(results, type="bca")$bca[4], lwd=3, col="blue", lty=3)
  abline(v=boot.ci(results, type="bca")$bca[5], lwd=3, col="blue", lty=3)
  #print(confint(model)) 
  #plot(data$Grade ~ data$Time)
  #lines(lowess(data$Time,data$Grade))
  #abline(model)
}

# Consider case #4 (worst case)
# ------
data <- read.xls('Unlimited-time--Raw-data.xlsx', sheet=4)
colnames(data) <- c("Time", "Grade", "Details")

# Re-order the rows by "Time" order
library(data.table)
data <- data.table(data)
data[order(data$Time)]

# Boxplot of the earliest 10 next to the last 10
early <- data[1:10,]$Grade
late <- data[-seq(1,(dim(data)[1]-10)),]$Grade
middle <- data[seq(11,(dim(data)[1]-10)),]$Grade
boxplot(early, middle, late, ylim=c(40,100), 
        names=c("121 to 169 mins", "169 to 240 min", "240 to 259 mins"),
        main="Boxplot for test 04: first 10, the middle, last 10")


# Consider case #2 (neutral case)
# ------
data <- read.xls('Unlimited-time--Raw-data.xlsx', sheet=2)
colnames(data) <- c("Time", "Grade", "Details")

# Re-order the rows by "Time" order
library(data.table)
data <- data.table(data)
data[order(data$Time)]

# Boxplot of the earliest 10 next to the last 10
early <- data[1:10,]$Grade
middle <- data[seq(11,(dim(data)[1]-10)),]$Grade
late <- data[-seq(1,(dim(data)[1]-10)),]$Grade
boxplot(early, middle, late, ylim=c(40,100), 
        names=c("104 to 135 mins", "135 to 247 min", "247 to 308 mins"),
        main="Boxplot for test 02: first 10, the middle, last 10")