---
title: "R Notebook"
output: GLMs
---

We will work with two classes of GLM's in this exercise: regressions models for Poisson and Binomial data.

### Poisson Regression Model 

We observed seedling counts in a set of quadrats placed in a forest.  At each quadrat, we measure an integrated metric of light levels.  We want to estimate a Poisson regression model for these data to determine the relationship between light levels and seedlng abundance.

We can use the following code to simulate data in order to recapture model paramters:

```{r}

light<-runif(1000)
# Alt specification for light levels: light=c(0.10, 0.18, 0.26, 0.33, 0.41, 0.49, 0.57, 0.64, 0.72, 0.80)
# Alt specification for light levels: light<-seq(0.1, 0.8,len=10)

beta0<-1.0; beta1<-3.0
lam<-exp(beta0+beta1*light) # this is equivalent to ln(lambda)<-beta0+beta1*light
nSeedlings<-rpois(n=length(lam),lambda=lam)

seedlingData<-data.frame(light,nSeedlings) # if you want a data frame
plot(light,nSeedlings)
```


### 1. Fit the Poisson regression using maximum likelihood.

The likelihood function:

```{r}
nllPoi<-function(parVec,sdlgs,lght){
    b0<-parVec[1]
    b1<-parVec[2]
  sdlgPred<-exp(b0+b1*lght)
  nllik<- -sum(dpois(x=sdlgs,lambda=sdlgPred,log=TRUE))
  # cat("nllik= ",nllik,sep=" ",fill=T);cat(" ",sep=" ",fill=T)
  return(nllik)
}
```


```{r}
parVec<-c(0.5,1.0) # Initial parameter values 
outPois<-optim(par=parVec,fn=nllPoi,method="L-BFGS-B",lower=c(-Inf,-Inf),upper=c(Inf,Inf),sdlgs=nSeedlings,lght=light)
outPois$par # 1.01036 2.98204
outPois$val # nllik 2676.105
myAIC<-2*2 + 2*outPois$val # 5356.21

```

Plotting Poisson regression fit to the data:

```{r}


lightPred<-light[order(light)]
plot(light,nSeedlings)
lines(lightPred, exp(outPois$par[1]+outPois$par[2]*lightPred),col='red',lwd=2)
```

### 2. Compare your maximum likelihood fit to the R glm function.

```{r}
outglm<-glm(nSeedlings~light, family=poisson,data=seedlingData)
summary(outglm)
```


### 3.  Compare your fit to Stan using brms library

```{r}
library(brms)

outStan <- brm(formula = nSeedlings ~ light,
            data = seedlingData, family = poisson())

summary(outStan)
plot(outStan)
plot(marginal_effects(outStan), points = TRUE)

outStan1 <- brm(formula = nSeedlings ~ light,
            data = seedlingData, family = poisson(),
            prior = c(set_prior("normal(0,100)", class = "b")),
            warmup = 1000, iter = 2000, chains = 4)

summary(outStan1)
plot(outStan1)


plot(marginal_effects(outStan1), points = TRUE)
```


### 4. Compare your fit to Stan using rstanarm library


```{r}
library(rstanarm)
outrstanarm<-stan_glm(nSeedlings ~ light,data = seedlingData, family = poisson, iter=2000, warmup=1000, cores=4)

summary(outrstanarm)
plot(outrstanarm)

```


### 5. Fit the Poisson regression using Rstan


```{r}
library(rstan)
myData<-list(nSeedlings=nSeedlings,light=light,N=length(light))

# resStan <- stan(model_code = 'poissonReg.stan', data = myData,
#                chains = 3, iter = 3000, warmup = 500, thin = 10)

resStan <- stan(model_code = modelString, data = myData,
                chains = 3, iter = 3000, warmup = 500, thin = 10)
summary(resStan,par=c('beta0','beta1'))

```

```{r}
library(coda)
post_fit<-As.mcmc.list(resStan) 
plot(post_fit,par=c('beta0','beta1'))
```


```{r}
modelString<-"data {
  int<lower=0> N;
  real<lower=0,upper=1> light[N];
  int<lower=0> nSeedlings[N];
}

parameters {
  real beta0;
  real beta1;
}

transformed parameters {
  real lp[N];
  real <lower=0> mu[N];
  
  for(i in 1:N){
    lp[i] = beta0 + beta1 * light[i];
    mu[i] = exp(lp[i]);
  }
}

model {
  nSeedlings ~ poisson(mu);
}"
```

### 6. Fit the Poisson regression using Rjags


```{r}
library(rjags)
jags <- jags.model('poissonReg.jags.txt',
                   data = list('nSeedlings' = nSeedlings,
                               'light' = light,
                               'N' = length(light)),
                   inits<-list(
                       list('beta0'=1,'beta1'=2),
                       list('beta0'=1,'beta1'=.2),
                       list('beta0'=.1,'beta1'=2),
                       list('beta0'=2,'beta1'=5)),
                   n.chains = 4,
                   n.adapt = 100)
```

```{r}
update(jags, 1000)

jags.samples(jags,
             c('beta0', 'beta1'),
             10000)
```
```{r}
library(coda)
codaSamples<-coda.samples(jags, c('beta0','beta1'), 10000, 1)
```
```{r}
plot(codaSamples, trace = FALSE, density = TRUE)
summary(codaSamples)
traceplot(codaSamples)
```


```{r}
modelString<-"
model {
  for (i in 1:N){
    nSeedlings[i] ~ dpois(lam.hat[i])
    log(lam.hat[i]) <- beta0 + beta1 * light[i]
  }
  beta0 ~ dnorm(0, .0001)
  beta1 ~ dnorm(0, .0001)
}
"
writeLines(modelString, con='poissonReg.jags.txt')
```


### *Binomial Regression Model*

We observed seedling survival in a in a forest.  At each seedlng, we measure an integrated metric of light levels.  We want to estimate a logistic regression model for these data to determine the relationship between light levels and seedlng survival.  The model will be of the form:
logit(p)<-beta0 + beta1 * light
seedling ~ binomial(n=1, p)

Repeat steps 1-6 from the Poisson regression model above except for the binomial model.

Please use the following code to simulate data in order to recapture model paramters (beta0 and beta1):

Simulating data for a binomial regression. 

```{r}
beta0<- -1.0; beta1<-3.0
light<-runif(1000)
pVect<-exp(beta0+beta1*light)/(1+exp(beta0+beta1*light))
range(pVect)
seedlingSurv<-rbinom(n=length(light),size=1,prob=pVect)

seedlingSurvData<-data.frame(light,seedlingSurv) # if you want a data frame
plot(light,seedlingSurv)

```

### 1. Fit the binomial regression using maximum likelihood.

The likelihood function:

```{r}
nllBin<-function(parVec,seedlingSurv,light){
    b0<-parVec[1]
    b1<-parVec[2]
  pPred<-exp(b0+b1*light)/(1+exp(b0+b1*light))
  nllik<- -sum(dbinom(x=seedlingSurv,size=1,prob=pPred,log=TRUE))
  # cat("nllik= ",nllik,sep=" ",fill=T);cat(" ",sep=" ",fill=T)
  return(nllik)
}
```


```{r}
parVec<-c(0.5,1.0) # Initial parameter values 
outBinom<-optim(par=parVec,fn=nllBin,method="L-BFGS-B",lower=c(-Inf,-Inf),upper=c(Inf,Inf),
               seedlingSurv=seedlingSurv,light=light)
outBinom$par # -1.021732  3.086268
outBinom$val # nllik 593.8812
myAIC<-2*2 + 2*outBinom$val # 1191.762
```

```{r}
lightPred<-light[order(light)]
b0<-outBinom$par[1]
b1<-outBinom$par[2]
pPred<-exp(b0+b1*lightPred)/(1+exp(b0+b1*lightPred))
plot(light,seedlingSurv)
lines(lightPred, pPred,col='red',lwd=2)
```

### 2. Compare your maximum likelihood fit to the R glm function.

```{r}
outglm<-glm(seedlingSurv~light, family=binomial(),data=seedlingSurvData)
summary(outglm)
```


### 3.  Compare your fit to Stan using brms library

```{r}
library(brms)

outStan <- brm(formula = seedlingSurv ~ light,
            data = seedlingSurvData, family = bernoulli()) # or binomial()

summary(outStan)
plot(outStan)
# plot(marginal_effects(outStan), points = TRUE)

```

### 4. Compare your fit to Stan using rstanarm library


```{r}
library(rstanarm)

outrstanarm <- stan_glm(seedlingSurv ~ light,  # cbind(agree, disagree) ~ education + gender,
                              data = seedlingSurvData,
                              family = binomial(link = "logit"), 
                              prior = normal(0,100), 
                              prior_intercept = normal(0,100))
                              # chains = CHAINS, cores = CORES, seed = SEED)
summary(outrstanarm)
plot(outrstanarm)
prior_summary(outrstanarm)

```

### 5. Fit the Binomial regression using Rstan

```{r}
library(rstan)
myData<-list(seedlingSurv=seedlingSurv,light=light,N=length(light))

# resStan <- stan(model_code = 'poissonReg.stan', data = myData,
#                chains = 3, iter = 3000, warmup = 500, thin = 10)

resStan <- stan(model_code = modelString, data = myData,
                chains = 3, iter = 3000, warmup = 500, thin = 10)
summary(resStan,par=c('beta0','beta1'))
```


```{r}
library(coda)
post_fit<-As.mcmc.list(resStan) 
summary(post_fit)
plot(post_fit,par=c('beta0','beta1'))
```


```{r}
modelString<-"data {
  int<lower=0> N;
  real<lower=0,upper=1> light[N];
  int<lower=0,upper=1> seedlingSurv[N];
}

parameters {
  real beta0;
  real beta1;
}

transformed parameters {
  real logitTheta[N];
  real<lower=0,upper=1> theta[N];
  
  for(i in 1:N){
    logitTheta[i] = beta0 + beta1 * light[i];
    theta[i] = inv_logit(logitTheta[i]);
  }
}

model {
  beta0~normal(0,100);
  beta1~normal(0,100);
  seedlingSurv ~ bernoulli(theta);
}"
```


### 6. Fit the Binomial regression using Rjags

```{r}
modelString<-"
model {
  for (i in 1:N){
    seedlingSurv[i] ~ dbern(theta.hat[i])
    logit(theta.hat[i]) <- beta0 + beta1 * light[i]
  }
  beta0 ~ dnorm(0, .0001)
  beta1 ~ dnorm(0, .0001)
}
"
writeLines(modelString, con='binomReg.jags.txt')
```


```{r}
library(rjags)
jags <- jags.model('binomReg.jags.txt',
                   data = list('seedlingSurv' = seedlingSurv,
                               'light' = light,
                               'N' = length(light)),
                   inits<-list(
                       list('beta0'=1,'beta1'=2),
                       list('beta0'=1,'beta1'=0.2),
                       list('beta0'=0.1,'beta1'=2),
                       list('beta0'=2,'beta1'=5)),
                   n.chains = 4,
                   n.adapt = 100)
```


```{r}
update(jags, 1000)
codaSamples<-coda.samples(jags, c('beta0','beta1'), 10000, 1)
### Sampling from jags
# mySamples<-jags.samples(jags,
#              c('beta0', 'beta1'),
#              10000)
```


```{r}
library(coda)
plot(codaSamples, trace = FALSE, density = TRUE)
summary(codaSamples)
traceplot(codaSamples)
```