#-------------------------------------------------------------------- # Examples of testing Goodness-of-fit to HWP #-------------------------------------------------------------------- # Data: Pgm locus genotypes in a sample of Aedes Aegypti mosquitos # Source: Weir B. (1996) Genetic Data Analysis. Sinauer #-------------------------------------------------------------------- # enter the genotype data n.AA <- 5 n.Aa <- 9 n.aa <- 26 a1 <- c( rep("a",n.aa), rep("A",n.Aa), rep("A",n.AA) ) a2 <- c( rep("a",n.aa), rep("a",n.Aa), rep("A",n.AA) ) obs.dat <- as.data.frame(cbind(a1,a2)) head(obs.dat) # compute table of observed genotype frequecies o <- numeric(3); names(o) <- c("aa" ,"Aa", "AA") o[1] <- n.aa o[2] <- n.Aa o[3] <- n.AA # compute observed allele frequencies n <- sum(o) #n = o1 + o2 + o3 p.A <- (sum(obs.dat$a1=="A")+sum(obs.dat$a2=="A"))/(2*n) p.a <- (sum(obs.dat$a1=="a")+sum(obs.dat$a2=="a"))/(2*n) # compute table of expected genotype frequecies e <- numeric(3); names(e) <- c("aa" ,"Aa", "AA") e[1] <- n * p.a^2 e[2] <- n * p.A * p.a * 2 e[3] <- n * p.A^2 # compute observed chi-square x2.obs <- sum((o-e)^2/e) # df = (# of categories - 1) - (# independent parameters estimated from the data) # df = (#pheno categories - 1) - (#alleles - 1) df <- (3-1) - (2-1) # print statistics for the original data o # Observed Counts e # Expected Counts x2.obs # Chi-square # p-value 1-pchisq(x2.obs,df) # Another way to get the Observed Table obs.tab <- table(paste(obs.dat$a1,obs.dat$a2,sep="")) o <- numeric(3); names(o) <- c("aa" ,"Aa", "AA") o[1] <- sum(obs.tab[names(obs.tab)=="aa"]) o[2] <- sum(obs.tab[names(obs.tab)=="Aa" | names(obs.tab)=="aA"]) o[3] <- sum(obs.tab[names(obs.tab)=="AA"]) o #-------------------------------------------------------------------- # Likelihood Ratio Test #-------------------------------------------------------------------- like.0 <- (p.a^2 )^n.aa * (2*p.a*p.A)^n.Aa * (p.A^2 )^n.AA #Under HWP like.1 <- (n.aa/n)^n.aa * (n.Aa/n )^n.Aa * (n.AA/n)^n.AA #No restrictions x2.lrt <- -2*log( like.0 / like.1 ) #[1] 5.187405 x2.lrt #-------------------------------------------------------------------- # Randomization Test (Permutation Version) #-------------------------------------------------------------------- # initialize a vector for the chi-squares from the permutations n.permu <- 1000 x2 <- numeric(n.permu) #Create an empty object for the 1000 results perm.dat <- NULL # begin the permutations for (i in 1:n.permu) { # shuffle the data temp <- sample(c(a1,a2)) perm.dat$a1 <- temp[1:n] perm.dat$a2 <- temp[(n+1):(2*n)] # compute table of genotype frequecies perm.tab <- table(paste(perm.dat$a1,perm.dat$a2,sep="")) o <- numeric(3); names(o) <- c("aa" ,"Aa", "AA") o[1] <- sum(perm.tab[names(perm.tab)=="aa"]) o[2] <- sum(perm.tab[names(perm.tab)=="Aa" | names(perm.tab)=="aA"]) o[3] <- sum(perm.tab[names(perm.tab)=="AA"]) # compute observed chi-square x2[i] <- sum((o-e)^2/e) } # plot a histogram of the permutation distribution hist(x2,col="lightskyblue") points(x2.obs,0,pch=8,cex=3) # compute the permutation p-value sum(x2 >= x2.obs) pvalue <- sum(x2 >= x2.obs)/n.permu pvalue #-------------------------------------------------------------------- # Plot of chi-squared dist with 1 d.f. #-------------------------------------------------------------------- x <- seq(0,5,by=.01) y <- dchisq(x,1) plot(x,y) plot(x,y,type="l") #-------------------------------------------------------------------- # Using the built-in chisq.test() function #-------------------------------------------------------------------- obs.mat <- matrix(c(5,4.5,4.5,26), nrow=2) #split the heterozygotes evenly results <- chisq.test(obs.mat, correct=F) #w/o Yates continuity correction attributes(results) results$expected results$exp #-------------------------------------------------------------------- # HWP & Population Structure #-------------------------------------------------------------------- obs.1 <- matrix(c(128, 32, 32, 8), nrow=2) #split the heterozygotes evenly obs.2 <- matrix(c(32, 48, 48, 72), nrow=2) #split the heterozygotes evenly obs.tot <- obs.1 + obs.2 obs.1 chisq.test(obs.1, correct=F)$expected chisq.test(obs.1, correct=F) chisq.test(obs.2, correct=F) chisq.test(obs.tot, correct=F)