## Click Here for Code

####################### (No Fixed Marginals) ######## ######################################################## # Analysis of contingency tables conditional only on the total sample size # I have changed this to make the cell freq under H0 equal to (on average) the # expected frequencies with those marginal totals. # It is the only way that I can think of to set up a sampling scheme. BUT I am # not restricting the sampling to those marginal totals. # The qq plot fails if you change nreps away from 10000 because of the # way I create length of variables.I should fix that some day. #read in the data civilUnion <- matrix(c(35, 9, 60, 41), nrow = 2, byrow = TRUE) #data <- matrix(c(3,1,1,3), nrow = 2 ) #data <- read.table("DeathModified.dat", header = TRUE, nrows = 2) #data <- read.table("Visintainer.dat", header = TRUE) #data <- read.table("Darley.dat", header = TRUE) #data <- matrix(c(12,2,1,3,2,4,3,4,187,17,11,9,16,7,8,6), nrow = 2, byrow = TRUE) #data <- matrix(c(13,36,14,30), nrow = 2, byrow = TRUE) #rownames(oata) <- c("Drug","Placebo") #colnames(data) <- c("Relapse","Success") #data <- matrix(c(12,2,1,3,2,4,3,4,187,17,11,9,16,7,8,6), nrow = 2, byrow = TRUE) # Establish sample size and number of rows and columns (rs and cs) data <- civilUnion N <- sum(data) nr <- nrow(data) nc <- ncol(data) ncells <- nr*nc # Establish row and column marginal totals rowtots <- rowSums(data) coltots <- colSums(data) # Calculate expected cell probabilities based on original marginal totals # and convert to vector expected <- matrix(data = NA, nrow = nr, ncol = nc, byrow = TRUE) for (i in 1:nr) { for (j in 1:nc) { expected[i,j] <- rowtots[i] * coltots[j] /N^2 } } # Convert to vector expectedvector <- as.vector(t(expected)) # This can now be sampled # Calculate chi-square on original data chisq.test(data, correct = F) obtchisq <- chisq.test(data, correct = F)$statistic nreps <- 10000 xx <- numeric(N) results <- numeric(nreps) greater <- 0 cells <- numeric(ncells) cells <- c(1:ncells) # Now resample nreps times for (i in 1:nreps) { xx <- sample(cells,N,replace = TRUE, prob = expectedvector) vv <- matrix(0,1,ncells) for (j in 1:N) { b <- xx[j] vv[b] = vv[b] + 1 } x <- matrix(vv,nrow = nr, byrow = TRUE) # x is now the contingency table for ith random sample results[i]<- chisq.test(x, correct = FALSE)$statistic greater <- ifelse(results[i] >= obtchisq, greater + 1, greater + 0) } probGreater <- greater/nreps cat("With no fixed marginals p(chi-square) = ",probGreater) dfs <- (nr - 1)*(nc - 1) cat("For a chi-square distribution the mean should be = ", dfs, ". It is actually ",mean(results, na.rm = TRUE),"\n") cat("\n") cat("For a chi-square distribution the var. should be = ", 2*dfs,". It is actually ",var(results, na.rm = "TRUE"), "\n") cat("\n") cat(" Look at the graphic for the distribution and a qq plot", "\n") cat("\n") par(mfrow=c(2,2)) hist(results, xlab = "Chi-Square", ylab = "Frequency", breaks = 50, main = "Chi-square distribution") # # # qqPlot(results, dist = "chisq", df = dfs) #Using Monte-Carlo simulation cat("The following is the result of letting R do a simulation for chisq.test","\n") cat("\n") print(chisq.test(data, simulate.p.value = TRUE, B = 10000)) # cat("The following is the result of Fisher's exact test ","\n") cat("\n") print(fisher.test(data, conf.level = 0.95)) cat("\n") cat("Do not be alarmed by warnings if you have some small marginals. ", "\n") cat("They appear because I used the chi-square statistic as my indicator of disparity.", "\n")