# Demonstration of resampling statistics for independent observations # This begins with a standard t test on independent groups. # Then we see one way to use a randomization test to do the same thing # without worrying overmuch about the shape of the distribution. # Then we see a very elegant way to accomplish nearly the same thing, but the # R code is far from transparent. # We are using a data file where the means are roughly equal--The moon illusion # with eyes elevated and eyes level. I have added some observations so that the sampling # distribution of the differences is not close to normal. datafile <- read.table("http://www.uvm.edu/~dhowell/methods8/DataFiles/Tab18-2.dat", header = TRUE) attach(datafile) Group <- factor(Group) cat("The names of the variables are ",names(datafile), "\n") means <- tapply(dv, Group, mean) sd <- tapply(dv, Group, sd) n.s <- tapply(dv, Group, length) samp.mean.diff <- means[1] - means[2] cat("The sample mean differences is ", samp.mean.diff , "\n") # Plot a histogram of the differences par(mfrow = c(2,1)) grp1 <- dv[Group == 1] grp2 <- dv[Group == 2] hist(grp1, main = "Group 1 = Success") xv <- seq(min(grp1),max(grp1),.1) yv <- dnorm(xv, mean = mean(grp1), sd = sd(grp1)) par (new = TRUE) plot(xv, yv, type = "l", col = "blue", xlim = c(min(grp1), max(grp1)), xlab = "", yaxt = "n", ylab = "") hist(grp2, main = "Group 2 = Fail") xv <- seq(min(grp2),max(grp2),.1) yv <- dnorm(xv, mean = mean(grp2), sd = sd(grp2)) par (new = TRUE) plot(xv, yv, type = "l", col = "blue", xlim = c(min(grp2), max(grp2)), xlab = "", yaxt = "n", ylab = "") cat("NOTE: Group 1 (Success) is clearly not normally distributed") ############################################################################################################# # First we will run a standard t test t.result <- t.test(dv~ Group, paired = FALSE, conf.level = .95) print(t.result) cat("The t-test gives a probability under the null of ",t.result$p.value,"\n\n") # Now to continue using randomization test # Leave Group as is, but randomly reorder the dv and get mean difference nreps = 1000 meandiff <- numeric(nreps) for (i in 1:nreps) { newdata <- sample(dv, size = length(dv), replace = FALSE) newmeans <- tapply(newdata, Group, mean) meandiff[i] <- newmeans[1] - newmeans[2] } # We want a two-tailed test, so we can work with absolute values. absdiff <- abs(meandiff) prob <- length(absdiff[absdiff >= abs(samp.mean.diff)] )/nreps cat("The randomization test gives a probability under the null of " , prob, "\n\n") ####################################################################################################################### # In theory we could do something like with did with paired samples and compute all possible permutations of # the dv. However that would be factorial(67) = 3.647111e+94, which is about 3.6 with 94 0s after it. Not feasible!! # I know that many of those have the same (rearranged) data in the same group, but that still leaves 9.3649e+15, # which is still unreasonable even if I knew how to draw only permutations with scores changing groups. #################################################################################### # Finally, let's use the standard Wilcoxon Rank-Sum Test' nonpar <- wilcox.test(dv~Group, alternative = "two.sided", paired = FALSE, conf.int = TRUE) cat("For the Wilcoxon test the p value under the null is ", nonpar$p.value,"\n\n") cat(" Ignore warning about ties--there are bound to be ties with this much data \n\n") cat("This is remarkable agreement, but we have a large data set, even if one group is badly skewed", "\n\n")