---
title: "Hierarchical model: Baseball batting averages"
output: html_notebook
---

```{r}
setwd("~/Documents/Web/Teaching/PBIO_294/Exercises/11.29.2017")
getwd()
```

K's code used in book to automate the analysis
This works but sort of blackboxish...let's parse the code further below.

```{r}
# Read the data
myData = read.csv('BattingAverage.csv')
# Load the relevant model into R's working memory:
source('Jags-Ybinom-XnomSsubjCcat-MbinomBetaOmegaKappa.R')
# Generate the MCMC chain:
mcmcCoda = genMCMC( data=myData,
                    zName='Hits', NName='AtBats', sName='Player',
                    cName='PriPos', numSavedSteps=11000, thinSteps=20 )
# Display diagnostics of chain, for specified parameters:
for ( parName in c('omega[1]','omega0','kappa[1]','kappa0', 'theta[1]') ) {
  diagMCMC( codaObject=mcmcCoda, parName=parName,
            saveName=fileNameRoot, saveType=graphFileType )
}
# Get summary statistics of chain:
summarylnfo = smryMCMC( mcmcCoda, compVal=NULL )
# Display posterior information:
plotMCMC( mcmcCoda, data=myData,
          zName='Hits', NName='AtBats', sName='Player', cName='PriPos',
          compVal=NULL,
          diffCList=list( c('Pitcher','Catcher'),
                          c('Catcher','1st Base') ),
          diffSList=list( c('Kyle Blanks','Bruce Chen'),
                          c('Mike Leake','Wandy Rodriguez'),
                          c('Andrew McCutchen','Brett Jackson'),
                          c('ShinSoo Choo','Ichiro Suzuki') ),
          compValDiff=0.0 )

```


Parsing K's code from above:

```{r}
# setwd("~/Documents/Web/Teaching/PBIO_294/Exercises/11.29.2017")
getwd()
```

```{r}
myData = read.csv('BattingAverage.csv')
data=myData
zName='Hits'
NName='AtBats'
sName='Player'
cName='PriPos'
z = data[[zName]]
N = data[[NName]]
s = data[[sName]]
c = data[[cName]]
Nsubj = length(unique(s))
Ncat =  length(unique(c))
  # Specify the data in a list, for later shipment to JAGS:
dataList = list(
  z = z ,
  N = N ,
  c = as.numeric(c) , # c in JAGS is numeric, in R is possibly factor
  Nsubj = Nsubj ,
  Ncat = Ncat
)
```

Note: alternative beta(a,b) parametization is beta( w (k-2)+1, (1-w) (k-2)+1 ) where w is mode and k is concentration.  mode w = (a − 1)/(a + b − 2) 


```{r}
  modelString = "
  model {

    # subjects, e.g., individual players
    for ( sIdx in 1:Nsubj ) {
      z[sIdx] ~ dbin( theta[sIdx] , N[sIdx] )
      theta[sIdx] ~ dbeta( omega[c[sIdx]]*(kappa[c[sIdx]]-2)+1 , 
                           (1-omega[c[sIdx]])*(kappa[c[sIdx]]-2)+1 ) 
    }
    
    # categories, e.g., positions
    for ( cIdx in 1:Ncat ) {
      omega[cIdx] ~ dbeta( omegaO*(kappaO-2)+1 , 
                           (1-omegaO)*(kappaO-2)+1 )
      kappa[cIdx] <- kappaMinusTwo[cIdx] + 2
      kappaMinusTwo[cIdx] ~ dgamma( 0.01 , 0.01 ) # mean=1 , sd=10 (generic vague)
    }
    omegaO ~ dbeta( 1.0 , 1.0 ) 
    #omegaO ~ dbeta( 1.025 , 1.075 ) # mode=0.25 , concentration=2.1
    kappaO <- kappaMinusTwoO + 2
    kappaMinusTwoO ~ dgamma( 0.01 , 0.01 )  # mean=1 , sd=10 (generic vague)

    # Contrasts
    c1Minusc3<-omega[1]-omega[3] # difference in modes of pitcher and 1st base positions
    c1Minusc3Prob<-step(c1Minusc3) # returns a 1 if c1Minusc3 > 0

    #kappaMinusTwoO ~ dgamma( 1.01005 , 0.01005012 )  # mode=1 , sd=100
    #kappaMinusTwoO ~ dgamma( 1.105125 , 0.1051249 )  # mode=1 , sd=10
    #kappaMinusTwoO ~ dgamma( 1.105125 , 0.01051249 )  # mode=10 , sd=100
  }
  " # close quote for modelString
  writeLines( modelString , con="TEMPmodel.txt" )
```


```{r}
require(rjags)
# require(runjags)
require(coda)

# parameters = c( "theta","omega","kappa","omegaO","kappaO") 
parameters = c( "omegaO","kappaO","c1Minusc3","c1Minusc3Prob","omega") 
adaptSteps = 500 # Number of steps to adapt the samplers
burnInSteps = 1000  
jagsModel = jags.model( "TEMPmodel.txt" , data=dataList )
update( jagsModel , n.iter=burnInSteps )
  

  # codaSamples<-coda.samples(jags, c('beta0','beta1'), 10000, 1)
codaSamples = coda.samples( jagsModel, variable.names=parameters, 
                                n.iter=10000, 
                                thin=5 )
plot(codaSamples, trace = FALSE, density = TRUE)
# plot(post_fit,par=c('beta0','beta1')) 
HPDinterval(codaSamples)
summary(codaSamples)
traceplot(codaSamples)
  
# jagsModel = jags.model( "TEMPmodel.txt" , data=dataList , inits=initsList , 
#                             n.chains=nChains , n.adapt=adaptSteps )
# codaSamples = coda.samples( jagsModel, variable.names=parameters, 
#                                 n.iter=ceiling(numSavedSteps*thinSteps/nChains), 
#                                 thin=thinSteps )

```


*Things to do:*

1.  Add gamma hyperpriors to the concentration parameter k for the position categories.  This will allow us to examine the disribution of the concentration parameter k for the postion categories.

```{r}
  modelString = "
  model {

    # subjects, e.g., individual players
    for ( sIdx in 1:Nsubj ) {
      z[sIdx] ~ dbin( theta[sIdx] , N[sIdx] )
      theta[sIdx] ~ dbeta( omega[c[sIdx]]*(kappa[c[sIdx]]-2)+1 , 
                           (1-omega[c[sIdx]])*(kappa[c[sIdx]]-2)+1 ) 
    }
    
    # categories, e.g., positions
    for ( cIdx in 1:Ncat ) {
      omega[cIdx] ~ dbeta( omegaO*(kappaO-2)+1 , 
                           (1-omegaO)*(kappaO-2)+1 )
      kappa[cIdx] <- kappaMinusTwo[cIdx] + 2
      kappaMinusTwo[cIdx] ~ dgamma( Sk , Rk ) # was 0.01, 0.01 -> mean=1 , sd=10 (generic vague) *** NEW
    }
    omegaO ~ dbeta( 1.0 , 1.0 ) 
    #omegaO ~ dbeta( 1.025 , 1.075 ) # mode=0.25 , concentration=2.1
    kappaO <- kappaMinusTwoO + 2
    kappaMinusTwoO ~ dgamma( 0.01 , 0.01 )  # mean=1 , sd=10 (generic vague)
    Sk ~ dgamma( 0.01 , 0.01 )  # mean=1 , sd=10 (generic vague) *** NEW
    Rk ~ dgamma( 0.01 , 0.01 )  # mean=1 , sd=10 (generic vague) *** NEW

    # Contrasts
    c1Minusc3<-omega[1]-omega[3] # difference in modes of pitcher and 1st base positions
    c1Minusc3Prob<-step(c1Minusc3) # returns a 1 if c1Minusc3 > 0

    #kappaMinusTwoO ~ dgamma( 1.01005 , 0.01005012 )  # mode=1 , sd=100
    #kappaMinusTwoO ~ dgamma( 1.105125 , 0.1051249 )  # mode=1 , sd=10
    #kappaMinusTwoO ~ dgamma( 1.105125 , 0.01051249 )  # mode=10 , sd=100
  }
  " # close quote for modelString
  writeLines( modelString , con="TEMPmodel.txt" )
```

```{r}
require(rjags)
# require(runjags)
require(coda)

# parameters = c( "theta","omega","kappa","omegaO","kappaO") 
parameters = c( "omegaO","kappaO","c1Minusc3","c1Minusc3Prob","omega","Sk","Rk") 
adaptSteps = 500 # Number of steps to adapt the samplers
burnInSteps = 1000  
jagsModel = jags.model( "TEMPmodel.txt" , data=dataList )
update( jagsModel , n.iter=burnInSteps )
  

  # codaSamples<-coda.samples(jags, c('beta0','beta1'), 10000, 1)
codaSamples = coda.samples( jagsModel, variable.names=parameters, 
                                n.iter=10000, 
                                thin=5 )
plot(codaSamples, trace = FALSE, density = TRUE)
# plot(post_fit,par=c('beta0','beta1')) 
HPDinterval(codaSamples)
summary(codaSamples)
traceplot(codaSamples)
  
# jagsModel = jags.model( "TEMPmodel.txt" , data=dataList , inits=initsList , 
#                             n.chains=nChains , n.adapt=adaptSteps )
# codaSamples = coda.samples( jagsModel, variable.names=parameters, 
#                                 n.iter=ceiling(numSavedSteps*thinSteps/nChains), 
#                                 thin=thinSteps )

```


2.  Add a comparison for two specific players to decide if their batting averages are significantly different.


```{r}
  modelString = "
  model {

    # subjects, e.g., individual players
    for ( sIdx in 1:Nsubj ) {
      z[sIdx] ~ dbin( theta[sIdx] , N[sIdx] )
      theta[sIdx] ~ dbeta( omega[c[sIdx]]*(kappa[c[sIdx]]-2)+1 , 
                           (1-omega[c[sIdx]])*(kappa[c[sIdx]]-2)+1 ) 
    }
    
    # categories, e.g., positions
    for ( cIdx in 1:Ncat ) {
      omega[cIdx] ~ dbeta( omegaO*(kappaO-2)+1 , 
                           (1-omegaO)*(kappaO-2)+1 )
      kappa[cIdx] <- kappaMinusTwo[cIdx] + 2
      kappaMinusTwo[cIdx] ~ dgamma( Sk , Rk ) # was 0.01, 0.01 -> mean=1 , sd=10 (generic vague) *** NEW
    }
    omegaO ~ dbeta( 1.0 , 1.0 ) 
    #omegaO ~ dbeta( 1.025 , 1.075 ) # mode=0.25 , concentration=2.1
    kappaO <- kappaMinusTwoO + 2
    kappaMinusTwoO ~ dgamma( 0.01 , 0.01 )  # mean=1 , sd=10 (generic vague)
    Sk ~ dgamma( 0.01 , 0.01 )  # mean=1 , sd=10 (generic vague) *** NEW
    Rk ~ dgamma( 0.01 , 0.01 )  # mean=1 , sd=10 (generic vague) *** NEW

    # Contrasts
    c1Minusc3<-omega[1]-omega[3] # difference in modes of pitcher and 1st base positions
    c1Minusc3Prob<-step(c1Minusc3) # returns a 1 if c1Minusc3 > 0

    p1Minusp2<-theta[1]-theta[2] # difference in batting average of player 1 and player 2
    p1Minusp2Prob<-step(p1Minusp2) # returns a 1 if p1Minusp2 > 0

    #kappaMinusTwoO ~ dgamma( 1.01005 , 0.01005012 )  # mode=1 , sd=100
    #kappaMinusTwoO ~ dgamma( 1.105125 , 0.1051249 )  # mode=1 , sd=10
    #kappaMinusTwoO ~ dgamma( 1.105125 , 0.01051249 )  # mode=10 , sd=100
  }
  " # close quote for modelString
  writeLines( modelString , con="TEMPmodel.txt" )
```

```{r}
require(rjags)
# require(runjags)
require(coda)

# parameters = c( "theta","omega","kappa","omegaO","kappaO") 
parameters = c( "omegaO","kappaO","c1Minusc3","c1Minusc3Prob","omega","Sk","Rk","p1Minusp2","p1Minusp2Prob") 
adaptSteps = 500 # Number of steps to adapt the samplers
burnInSteps = 1000  
jagsModel = jags.model( "TEMPmodel.txt" , data=dataList )
update( jagsModel , n.iter=burnInSteps )
  

  # codaSamples<-coda.samples(jags, c('beta0','beta1'), 10000, 1)
codaSamples = coda.samples( jagsModel, variable.names=parameters, 
                                n.iter=10000, 
                                thin=5 )
plot(codaSamples, trace = FALSE, density = TRUE)
# plot(post_fit,par=c('beta0','beta1')) 
HPDinterval(codaSamples)
summary(codaSamples)
traceplot(codaSamples)
  
# jagsModel = jags.model( "TEMPmodel.txt" , data=dataList , inits=initsList , 
#                             n.chains=nChains , n.adapt=adaptSteps )
# codaSamples = coda.samples( jagsModel, variable.names=parameters, 
#                                 n.iter=ceiling(numSavedSteps*thinSteps/nChains), 
#                                 thin=thinSteps )

```

3.  Code this model in Stan.