--- title: "R Notebook" output: GLMs --- We will work with two classes of GLM's in this exercise: regressions models for Poisson and Binomial data. ### Poisson Regression Model We observed seedling counts in a set of quadrats placed in a forest. At each quadrat, we measure an integrated metric of light levels. We want to estimate a Poisson regression model for these data to determine the relationship between light levels and seedlng abundance. We can use the following code to simulate data in order to recapture model paramters: ```{r} light<-runif(1000) # Alt specification for light levels: light=c(0.10, 0.18, 0.26, 0.33, 0.41, 0.49, 0.57, 0.64, 0.72, 0.80) # Alt specification for light levels: light<-seq(0.1, 0.8,len=10) beta0<-1.0; beta1<-3.0 lam<-exp(beta0+beta1*light) # this is equivalent to ln(lambda)<-beta0+beta1*light nSeedlings<-rpois(n=length(lam),lambda=lam) seedlingData<-data.frame(light,nSeedlings) # if you want a data frame plot(light,nSeedlings) ``` ### 1. Fit the Poisson regression using maximum likelihood. The likelihood function: ```{r} nllPoi<-function(parVec,sdlgs,lght){ b0<-parVec[1] b1<-parVec[2] sdlgPred<-exp(b0+b1*lght) nllik<- -sum(dpois(x=sdlgs,lambda=sdlgPred,log=TRUE)) # cat("nllik= ",nllik,sep=" ",fill=T);cat(" ",sep=" ",fill=T) return(nllik) } ``` ```{r} parVec<-c(0.5,1.0) # Initial parameter values outPois<-optim(par=parVec,fn=nllPoi,method="L-BFGS-B",lower=c(-Inf,-Inf),upper=c(Inf,Inf),sdlgs=nSeedlings,lght=light) outPois$par # 1.01036 2.98204 outPois$val # nllik 2676.105 myAIC<-2*2 + 2*outPois$val # 5356.21 ``` Plotting Poisson regression fit to the data: ```{r} lightPred<-light[order(light)] plot(light,nSeedlings) lines(lightPred, exp(outPois$par[1]+outPois$par[2]*lightPred),col='red',lwd=2) ``` ### 2. Compare your maximum likelihood fit to the R glm function. ```{r} outglm<-glm(nSeedlings~light, family=poisson,data=seedlingData) summary(outglm) ``` ### 3. Compare your fit to Stan using brms library ```{r} library(brms) outStan <- brm(formula = nSeedlings ~ light, data = seedlingData, family = poisson()) summary(outStan) plot(outStan) plot(marginal_effects(outStan), points = TRUE) outStan1 <- brm(formula = nSeedlings ~ light, data = seedlingData, family = poisson(), prior = c(set_prior("normal(0,100)", class = "b")), warmup = 1000, iter = 2000, chains = 4) summary(outStan1) plot(outStan1) plot(marginal_effects(outStan1), points = TRUE) ``` ### 4. Compare your fit to Stan using rstanarm library ```{r} library(rstanarm) outrstanarm<-stan_glm(nSeedlings ~ light,data = seedlingData, family = poisson, iter=2000, warmup=1000, cores=4) summary(outrstanarm) plot(outrstanarm) ``` ### 5. Fit the Poisson regression using Rstan ```{r} library(rstan) myData<-list(nSeedlings=nSeedlings,light=light,N=length(light)) # resStan <- stan(model_code = 'poissonReg.stan', data = myData, # chains = 3, iter = 3000, warmup = 500, thin = 10) resStan <- stan(model_code = modelString, data = myData, chains = 3, iter = 3000, warmup = 500, thin = 10) summary(resStan,par=c('beta0','beta1')) ``` ```{r} library(coda) post_fit<-As.mcmc.list(resStan) plot(post_fit,par=c('beta0','beta1')) ``` ```{r} modelString<-"data { int N; real light[N]; int nSeedlings[N]; } parameters { real beta0; real beta1; } transformed parameters { real lp[N]; real mu[N]; for(i in 1:N){ lp[i] = beta0 + beta1 * light[i]; mu[i] = exp(lp[i]); } } model { nSeedlings ~ poisson(mu); }" ``` ### 6. Fit the Poisson regression using Rjags ```{r} library(rjags) jags <- jags.model('poissonReg.jags.txt', data = list('nSeedlings' = nSeedlings, 'light' = light, 'N' = length(light)), inits<-list( list('beta0'=1,'beta1'=2), list('beta0'=1,'beta1'=.2), list('beta0'=.1,'beta1'=2), list('beta0'=2,'beta1'=5)), n.chains = 4, n.adapt = 100) ``` ```{r} update(jags, 1000) jags.samples(jags, c('beta0', 'beta1'), 10000) ``` ```{r} library(coda) codaSamples<-coda.samples(jags, c('beta0','beta1'), 10000, 1) ``` ```{r} plot(codaSamples, trace = FALSE, density = TRUE) summary(codaSamples) traceplot(codaSamples) ``` ```{r} modelString<-" model { for (i in 1:N){ nSeedlings[i] ~ dpois(lam.hat[i]) log(lam.hat[i]) <- beta0 + beta1 * light[i] } beta0 ~ dnorm(0, .0001) beta1 ~ dnorm(0, .0001) } " writeLines(modelString, con='poissonReg.jags.txt') ``` ### *Binomial Regression Model* We observed seedling survival in a in a forest. At each seedlng, we measure an integrated metric of light levels. We want to estimate a logistic regression model for these data to determine the relationship between light levels and seedlng survival. The model will be of the form: logit(p)<-beta0 + beta1 * light seedling ~ binomial(n=1, p) Repeat steps 1-6 from the Poisson regression model above except for the binomial model. Please use the following code to simulate data in order to recapture model paramters (beta0 and beta1): Simulating data for a binomial regression. ```{r} beta0<- -1.0; beta1<-3.0 light<-runif(1000) pVect<-exp(beta0+beta1*light)/(1+exp(beta0+beta1*light)) range(pVect) seedlingSurv<-rbinom(n=length(light),size=1,prob=pVect) seedlingSurvData<-data.frame(light,seedlingSurv) # if you want a data frame plot(light,seedlingSurv) ``` ### 1. Fit the binomial regression using maximum likelihood. The likelihood function: ```{r} nllBin<-function(parVec,seedlingSurv,light){ b0<-parVec[1] b1<-parVec[2] pPred<-exp(b0+b1*light)/(1+exp(b0+b1*light)) nllik<- -sum(dbinom(x=seedlingSurv,size=1,prob=pPred,log=TRUE)) # cat("nllik= ",nllik,sep=" ",fill=T);cat(" ",sep=" ",fill=T) return(nllik) } ``` ```{r} parVec<-c(0.5,1.0) # Initial parameter values outBinom<-optim(par=parVec,fn=nllBin,method="L-BFGS-B",lower=c(-Inf,-Inf),upper=c(Inf,Inf), seedlingSurv=seedlingSurv,light=light) outBinom$par # -1.021732 3.086268 outBinom$val # nllik 593.8812 myAIC<-2*2 + 2*outBinom$val # 1191.762 ``` ```{r} lightPred<-light[order(light)] b0<-outBinom$par[1] b1<-outBinom$par[2] pPred<-exp(b0+b1*lightPred)/(1+exp(b0+b1*lightPred)) plot(light,seedlingSurv) lines(lightPred, pPred,col='red',lwd=2) ``` ### 2. Compare your maximum likelihood fit to the R glm function. ```{r} outglm<-glm(seedlingSurv~light, family=binomial(),data=seedlingSurvData) summary(outglm) ``` ### 3. Compare your fit to Stan using brms library ```{r} library(brms) outStan <- brm(formula = seedlingSurv ~ light, data = seedlingSurvData, family = bernoulli()) # or binomial() summary(outStan) plot(outStan) # plot(marginal_effects(outStan), points = TRUE) ``` ### 4. Compare your fit to Stan using rstanarm library ```{r} library(rstanarm) outrstanarm <- stan_glm(seedlingSurv ~ light, # cbind(agree, disagree) ~ education + gender, data = seedlingSurvData, family = binomial(link = "logit"), prior = normal(0,100), prior_intercept = normal(0,100)) # chains = CHAINS, cores = CORES, seed = SEED) summary(outrstanarm) plot(outrstanarm) prior_summary(outrstanarm) ``` ### 5. Fit the Binomial regression using Rstan ```{r} library(rstan) myData<-list(seedlingSurv=seedlingSurv,light=light,N=length(light)) # resStan <- stan(model_code = 'poissonReg.stan', data = myData, # chains = 3, iter = 3000, warmup = 500, thin = 10) resStan <- stan(model_code = modelString, data = myData, chains = 3, iter = 3000, warmup = 500, thin = 10) summary(resStan,par=c('beta0','beta1')) ``` ```{r} library(coda) post_fit<-As.mcmc.list(resStan) summary(post_fit) plot(post_fit,par=c('beta0','beta1')) ``` ```{r} modelString<-"data { int N; real light[N]; int seedlingSurv[N]; } parameters { real beta0; real beta1; } transformed parameters { real logitTheta[N]; real theta[N]; for(i in 1:N){ logitTheta[i] = beta0 + beta1 * light[i]; theta[i] = inv_logit(logitTheta[i]); } } model { beta0~normal(0,100); beta1~normal(0,100); seedlingSurv ~ bernoulli(theta); }" ``` ### 6. Fit the Binomial regression using Rjags ```{r} modelString<-" model { for (i in 1:N){ seedlingSurv[i] ~ dbern(theta.hat[i]) logit(theta.hat[i]) <- beta0 + beta1 * light[i] } beta0 ~ dnorm(0, .0001) beta1 ~ dnorm(0, .0001) } " writeLines(modelString, con='binomReg.jags.txt') ``` ```{r} library(rjags) jags <- jags.model('binomReg.jags.txt', data = list('seedlingSurv' = seedlingSurv, 'light' = light, 'N' = length(light)), inits<-list( list('beta0'=1,'beta1'=2), list('beta0'=1,'beta1'=0.2), list('beta0'=0.1,'beta1'=2), list('beta0'=2,'beta1'=5)), n.chains = 4, n.adapt = 100) ``` ```{r} update(jags, 1000) codaSamples<-coda.samples(jags, c('beta0','beta1'), 10000, 1) ### Sampling from jags # mySamples<-jags.samples(jags, # c('beta0', 'beta1'), # 10000) ``` ```{r} library(coda) plot(codaSamples, trace = FALSE, density = TRUE) summary(codaSamples) traceplot(codaSamples) ```