# Fisher's Exact Test # Fisher's Exact Test for 2 X 2 # Optimal for designs with fixed marginals # Data from Tea-Tasting example # This can be changed to read from a file, but the matrix is small enough that # it is easier to enter it directly. # See "?fisher.test" for more information. Bristol <- matrix(c(3,1,1,3), nrow = 2, dimnames = list(Row = c("1","2"), Col = c("1","2"))) # Remember that "matrix" reads down columns cat("The data matrix","\n") print(Bristol) cat("\n") cat("Fisher's exact test--one-sided","\n") ft <- fisher.test(Bristol, alternative = "greater", conf.int = TRUE) # Ignore error message #For a two-tailed test change to alternative = "two.sided" #two.sided and conf.int apply only to 2x2 print(ft) cat("Pearson's (two-sided) chi-square test without Yates' correction","\n") pchisq <- chisq.test(Bristol, correct = FALSE) print(pchisq) cat("Pearson's (two-sided) chi-square test with Yates' correction","\n") pchisq <- chisq.test(Bristol, correct = TRUE) print(pchisq) cat("Notice that the last two tests are two-sided while Fisher's test", "\n","is set up as one-sided.","\n") ### Row Marginals Fixed origdata <- matrix(c(35, 9, 60, 41), nrow = 2, byrow = TRUE) # Another possible set of data -- ignore error messages # origdata <- matrix(c(12,2,1,3,2,4,3,4,187,17,11,9,16,7,8,6), nrow = 2, byrow = TRUE) # Establish sample size and number of rows and columns (rs and cs) N <- sum(origdata) nr <- nrow(origdata) nc <- ncol(origdata) ncells <- nr*nc # Establish row and column marginal totals rowtots <- rowSums(origdata) coltots <- colSums(origdata) # Calculate expected cell probabilities based on original marginal totals # and convert to vector expected <- matrix(data = NA, nrow = nr, ncol = nc, byrow = TRUE) for (i in 1:nr) { for (j in 1:nc) { expected[i,j] <- rowtots[i] * coltots[j] /N^2 } } # Convert to vector expectedvector <- as.vector(t(expected)) # This can now be sampled # Calculate chi-square on original data obtchisq <- chisq.test(origdata, correct = F)$statistic nreps <- 10000 xx <- numeric(N) results <- numeric(nreps) greater <- 0 cells <- numeric(ncells) cells <- c(1:ncells) # Now resample nreps times for (i in 1:nreps) { xx <- sample(cells,N,replace = TRUE, prob = expectedvector) vv <- matrix(0,1,ncells) for (j in 1:N) { b <- xx[j] vv[b] = vv[b] + 1 } x <- matrix(vv,nrow = nr, byrow = TRUE) # x is now the contingency table for ith random sample results[i]<- chisq.test(x, correct = FALSE)$statistic greater <- ifelse(results[i] >= obtchisq, greater + 1, greater + 0) } dfs <- (nr - 1)*(nc - 1) cat("For a chi-square distribution the mean should be = ", dfs, ". It is actually ",mean(results, na.rm = TRUE),"\n") cat("\n") cat("For a chi-square distribution the var. should be = ", 2*dfs,". It is actually ",var(results, na.rm = "TRUE"), "\n") cat("\n") cat(" Look at the graphic for the distribution and a qq plot", "\n") cat("\n") par(mfrow=c(1,2)) hist(results, xlab = "Chi-Square", ylab = "Frequency", breaks = 50, main = "Chi-square distribution") # Use the car library for the qq plot against a chi-square distrib. # library(car) qqPlot(results, dist = "chisq", df = dfs)