# Fisher's Exact Test

# Fisher's Exact Test for 2 X 2
# Optimal for designs with fixed marginals
# Data from Tea-Tasting example
# This can be changed to read from a file, but the matrix is small enough that
# it is easier to enter it directly.
# See "?fisher.test" for more information.

Bristol <- matrix(c(3,1,1,3), nrow = 2,
         dimnames = list(Row = c("1","2"), Col = c("1","2")))
# Remember that "matrix" reads down columns 
cat("The data matrix","\n")
print(Bristol)
cat("\n")
cat("Fisher's exact test--one-sided","\n") 
ft <- fisher.test(Bristol, alternative = "greater", conf.int = TRUE)  
# Ignore error message
#For a two-tailed test change to alternative = "two.sided"
#two.sided and conf.int apply only to 2x2
print(ft)
cat("Pearson's (two-sided) chi-square test without Yates' correction","\n")
pchisq <- chisq.test(Bristol, correct = FALSE)
print(pchisq)
cat("Pearson's (two-sided) chi-square test with Yates' correction","\n")
pchisq <- chisq.test(Bristol, correct = TRUE)
print(pchisq)
cat("Notice that the last two tests are two-sided while Fisher's test", "\n","is set up as one-sided.","\n")


###   Row Marginals Fixed

origdata <- matrix(c(35, 9, 60, 41), nrow = 2, byrow = TRUE)
         # Another possible set of data -- ignore error messages
         # origdata <- matrix(c(12,2,1,3,2,4,3,4,187,17,11,9,16,7,8,6), nrow = 2, byrow = TRUE)
# Establish sample size and number of rows and columns  (rs and cs)
N <- sum(origdata)
nr <- nrow(origdata)
nc <- ncol(origdata)
ncells <- nr*nc
# Establish row and column marginal totals
rowtots <- rowSums(origdata)
coltots <- colSums(origdata)
# Calculate expected cell probabilities based on original marginal totals
# and convert to vector
expected <- matrix(data = NA, nrow = nr, ncol = nc, byrow = TRUE)
for (i in 1:nr) {
  for (j in 1:nc) {
    expected[i,j] <- rowtots[i] * coltots[j] /N^2
  }
}
# Convert to vector  
expectedvector <- as.vector(t(expected))
# This can now be sampled

# Calculate chi-square on original data
obtchisq <- chisq.test(origdata, correct = F)$statistic    

nreps <- 10000                    
xx <- numeric(N)
results <- numeric(nreps)
greater <- 0                
cells <- numeric(ncells)
cells <- c(1:ncells) 
# Now resample nreps times

for (i in 1:nreps) {
  xx <- sample(cells,N,replace = TRUE, prob = expectedvector)
  vv <- matrix(0,1,ncells)
  for (j in 1:N) {
    b <- xx[j]
    vv[b] = vv[b] + 1
  }
  
  x <- matrix(vv,nrow = nr, byrow = TRUE)
  # x is now the contingency table for ith random sample
  
  results[i]<- chisq.test(x, correct = FALSE)$statistic
  greater <- ifelse(results[i] >= obtchisq, greater + 1, greater + 0)  
}

dfs <- (nr - 1)*(nc - 1)
cat("For a chi-square distribution the mean should be = ", dfs, ". It is actually ",mean(results, na.rm = TRUE),"\n")
cat("\n")
cat("For a chi-square distribution the var. should be = ", 2*dfs,". It is actually ",var(results, na.rm = "TRUE"), "\n")
cat("\n")

cat(" Look at the graphic for the distribution and a qq plot", "\n")
cat("\n")
par(mfrow=c(1,2))
hist(results, xlab = "Chi-Square", ylab = "Frequency", breaks = 50, main = "Chi-square distribution")
# Use the car library for the qq plot against a chi-square distrib.
#
library(car)
qqPlot(results, dist = "chisq", df = dfs)