# Kevin Dunn, May 2015, Creative Commons Zero # Global analysis options; current values are as used in the CEEA June 2015 paper set.seed(2) trim.tails <- FALSE robust.reg <- FALSE # If using robust regression then don't trim the tails. bootstrap <- TRUE # Use a bootstrap method to determine the regression slope # Run the following code once: #install.packages(c('gdata', 'data.table', 'car', 'boot')) # Please note, results might differ slightly on your computer, however, they will # not be much different to those presented in the paper. for (i in 1:8) { require(gdata) library(gdata) # Download the data from http://yint.org/unlimited-time-tests # and save as an Excel file. Use an R library to read in this data. data <- read.xls('Unlimited-time--Raw-data.xlsx', sheet=i) colnames(data) <- c("Time", "Grade", "Details") # Re-order the rows by "Time" order library(data.table) data <- data.table(data) data[order(data$Time)] plot(data$Time, data$Grade, xlab="Time duration [minutes]", ylab="Student's score [percentage]", main=paste("Student's grades as a function of time written, for test", i) ) # Now trim the first 2.5% and the last 2.5% of the data # Not used in the analysis for the CEEA paper, May/June 2015 if(trim.tails){ N <- dim(data)[1] trim.N <- ceiling(N*0.025) data <- data[-seq(1, trim.N), ] N <- dim(data)[1] data <- data[-seq(N, N-trim.N+1, by=-1), ] } if(robust.reg){ library(MASS) model <- rlm(data$Grade ~ data$Time) }else{ model <- lm(data$Grade ~ data$Time) } print("---------------------") print(i) print(summary(model)) library(car) qqPlot(model) # Ensure the residuals are in fact all normally distributed # Code based on that from: http://www.statmethods.net/advstats/bootstrapping.html library(boot) coefficient <- function(formula, data, indices) { d <- data[indices,] fit <- lm(formula, data=d) return(as.numeric(fit$coefficients[2])) } SE <- function(formula, data, indices) { d <- data[indices,] fit <- lm(formula, data=d) return(summary(fit)$sigma) } # Bootstrap with 10000 replications. # Change the "statistic" from "coefficient" if you'd like to generate # different bootstrap statistics (e.g. for standard error) results <- boot(data=data, statistic=coefficient, R=10000, formula=Grade~Time) hist(results$t, xlab=expression('Slope coefficient, b'[T]), main=paste("Bootstrap slope coefficient for test", i) ) print(boot.ci(results, type="bca")) # get 95% confidence interval abline(v=boot.ci(results, type="bca")$bca[4], lwd=3, col="blue", lty=3) abline(v=boot.ci(results, type="bca")$bca[5], lwd=3, col="blue", lty=3) #print(confint(model)) #plot(data$Grade ~ data$Time) #lines(lowess(data$Time,data$Grade)) #abline(model) } # Consider case #4 (worst case) # ------ data <- read.xls('Unlimited-time--Raw-data.xlsx', sheet=4) colnames(data) <- c("Time", "Grade", "Details") # Re-order the rows by "Time" order library(data.table) data <- data.table(data) data[order(data$Time)] # Boxplot of the earliest 10 next to the last 10 early <- data[1:10,]$Grade late <- data[-seq(1,(dim(data)[1]-10)),]$Grade middle <- data[seq(11,(dim(data)[1]-10)),]$Grade boxplot(early, middle, late, ylim=c(40,100), names=c("121 to 169 mins", "169 to 240 min", "240 to 259 mins"), main="Boxplot for test 04: first 10, the middle, last 10") # Consider case #2 (neutral case) # ------ data <- read.xls('Unlimited-time--Raw-data.xlsx', sheet=2) colnames(data) <- c("Time", "Grade", "Details") # Re-order the rows by "Time" order library(data.table) data <- data.table(data) data[order(data$Time)] # Boxplot of the earliest 10 next to the last 10 early <- data[1:10,]$Grade middle <- data[seq(11,(dim(data)[1]-10)),]$Grade late <- data[-seq(1,(dim(data)[1]-10)),]$Grade boxplot(early, middle, late, ylim=c(40,100), names=c("104 to 135 mins", "135 to 247 min", "247 to 308 mins"), main="Boxplot for test 02: first 10, the middle, last 10")