# Demonstration of resampling statistics for paired observations

# This begins with a standard t test on paired differences.
# Then we see one way to use a randomization test to do the same thing
# without worrying overmuch about the shape of the distribution.
# Then we see a very elegant way to accomplish nearly the same thing, but the
# R code is far from transparent.

# We are using a data file where the means are roughly equal--The moon illusion
# with eyes elevated and eyes level. I have added some observations so that the sampling
# distribution of the differences is not close to normal.

datafile <- read.table("http://www.uvm.edu/~dhowell/methods8/DataFiles/MoonEyes.dat", header = TRUE)
attach(datafile)
cat("The names of the variables are ",names(datafile), "\n")
diff <- Elevated - Level
sample.mean <- mean(diff)
cat("The sample mean differences is ", sample.mean, "\n")
# Plot a histogram of the differences
hist(diff)
xv <- seq(-3,3,.01)
yv <- dnorm(xv)
par (new = TRUE)
plot(xv, yv, type = "l", col = "blue", xlim = c(-2.5, 0.5), xlab = "", yaxt = "n", ylab = "")

# First we will run a standard t test

t.result <- t.test(Elevated, Level, paired = TRUE, conf.level = .95)
print(t.result)
cat("The t-test gives a probability under the null of ",t.result$p.value,"\n\n")

# Now to continue
nreps <- 2^14          # Normally I would use 10,000, but I want the number of samples here to
                       # match the number of possible permutations
means.random <- numeric(nreps)     # Set to hold all obtained means




####################################################################################
# There are two ways to run a randomization (permutation) test. When there are a great many 
# possible arrangements of the data, it is easiest to take a (very large) random sample of them
# and then assume that this set is a good reflection of what you would have if you took all 
# possible sets. That is the common approach.

# The other way is to actually calculate all possible permutations, using each one once. I would
# call that a "permutation" test. It is exact. This turns out to be a lot simpler, at least in this
# case, than you might think.

# We will use both approaches.

# Now we will randomly assign the sign of the difference
unsigned.diff <- abs(diff)
n <- length(unsigned.diff)
for (i in 1:nreps) {
  signs <- sample(c(1, -1), size = n, replace = TRUE)  # Draw 14 obs randomly from 1, -1
  signed.diff <- unsigned.diff*signs
  means.random[i] <- mean(signed.diff)       # Calculate the mean with that arrangement of signs
}

# The question is what percentage of those differences are greater than or equal to
# plus or minus sample.mean
percent.above <- length(means.random[means.random >= abs(sample.mean)])/nreps
percent.below <- length(means.random[means.random <= abs(sample.mean)*(-1)])/nreps
percent.extreme <- percent.above + percent.below

cat("The randomization test gives a probability under the null of", percent.extreme, "\n\n")


####################################################################################
# Now we will do things a bit differently by taking all possible permutations of the
# signs rather than just a very large random sample of them. There are 2^14 = 16,384 of them. 
# This leads to a truly exact test.

# The code looks formidable, but type ?expand.grid to see what that does, and then
# submit the messy stuff a section at a time. You can see what is happening by printing out each
# result as it is calculated.

unsigned.Diff <- abs(diff)      # Convert the differences to positive values
                                # We already had this, but this makes the section stand alone

aa <- as.data.frame(matrix(rep(c(-1,1),14), nr =2))
permutations <- as.matrix(expand.grid(aa))
head(permutations)              # Notice how the rows change as you go down

# By matrix multiplication you can multiply the different patterns of +/- 1 by the
# vector of differences. This will give you the sum of the differences

allsums <- permutations %*% unsigned.diff
   #     We could have put all of that together in one unreadable line
   #     > allsums <- as.matrix(expand.grid(as.data.frame(matrix(rep(c(-1,1),14), nr
   #     + =2)))) %*% unsigned.diff        Thanks to Bert Gunter for giving this approach

# If we want means, divide by n = 14
allmeans <- allsums/n

percent.above2 <- length(allmeans[allmeans >= abs(sample.mean)])/nreps
percent.below2 <- length(allmeans[allmeans <= (abs(sample.mean)*(-1))])/nreps
percent.extreme2 <- percent.above2 + percent.below2
cat("The permutation test probability under the null is = ",percent.extreme2,"\n\n")

# Note that this returns almost exactly the same result as the randomization procedure.


####################################################################################
# Finally, let's use the standard Wilcoxon Signed-Ranks Matched-pairs Test'
# Ignore warning messages--or else modify data trivially to prevent ties
#      For example, Level[3] <- 2.03001; Level[14] = 2.75001
nonpar <- wilcox.test(Elevated, Level, alternative = "two.sided", paired = TRUE, conf.int = TRUE)
cat("For the Wilcoxon test the p value under the null is ", nonpar$p.value,"\n\n")

# The choice of test clearly makes a difference, though not for randomizations.