- JAGS
- background
- model specification syntax
- workflow
- tips & tricks
\[ \definecolor{firebrick}{RGB}{178,34,34} \newcommand{\red}[1]{{\color{firebrick}{#1}}} \] \[ \definecolor{mygray}{RGB}{178,34,34} \newcommand{\mygray}[1]{{\color{mygray}{#1}}} \] \[ \newcommand{\set}[1]{\{#1\}} \] \[ \newcommand{\tuple}[1]{\langle#1\rangle} \] \[\newcommand{\States}{{T}}\] \[\newcommand{\state}{{t}}\] \[\newcommand{\pow}[1]{{\mathcal{P}(#1)}}\]
Bayes rule for data analysis:
\[\underbrace{P(\theta \, | \, D)}_{posterior} \propto \underbrace{P(\theta)}_{prior} \times \underbrace{P(D \, | \, \theta)}_{likelihood}\]
normalizing constant:
\[ \int P(\theta') \times P(D \mid \theta') \, \text{d}\theta' = P(D) \]
easy to solve only if:
Markov Chain Monte Carlo
get sequence of samples \(x_1, \dots, x_n\) s.t.
MCMC algorithms
assessing quality of sample chains
BUGS project (1989 - present ???)
WinBUGS (1997 - 2007)
Just Another Gibbs Sampler
R2Jags, rjags …coded by Martyn Plummer
require('rjags') # also loads `coda` package
modelString = "
model{
theta ~ dbeta(1,1)
k ~ dbinom(theta, N)
}"
# prepare for JAGS
dataList = list(k = 7, N = 24)
# set up and run model
jagsModel = jags.model(file = textConnection(modelString),
data = dataList,
n.chains = 2)
update(jagsModel, n.iter = 5000)
codaSamples = coda.samples(jagsModel,
variable.names = c("theta"),
n.iter = 5000)
ms = ggmcmc::ggs(codaSamples)
ms %>% group_by(Parameter) %>%
summarise(mean = mean(value),
HDIlow = coda::HPDinterval(as.mcmc(value))[1],
HDIhigh = coda::HPDinterval(as.mcmc(value))[2])
## # A tibble: 1 × 4 ## Parameter mean HDIlow HDIhigh ## <fctr> <dbl> <dbl> <dbl> ## 1 theta 0.3087857 0.1436357 0.4823556
tracePlot = ggmcmc::ggs_traceplot(ms)
densPlot = ggmcmc::ggs_density(ms) +
stat_function(fun = function(x) dbeta(x, 8,18), color = "black")
gridExtra::grid.arrange(tracePlot, densPlot, ncol = 2)
=ifelse, step … commands for boolean switchingfor loops for vector construction
# declare size of variables if needed
var ... ;
# do some data massaging (usually done in R)
data{
...
}
# specify model
model{
...
}
var myData[5,100], myMu[5], myTau[5];
data{
N = sum(counts) # counts is input to JAGS from R
indVector = c(3,2,4) # works like in R
specialConditions = counts[indVector] # like R; new in JAGS 4
}
model{
for (i in 1:dim(myData)[1]){
for (j in 1:dim(myData)[2]){
myData[i,j] ~ dnorm(myMu[i], myTau[i])
}
}
for (i in 1:5){
tmp[i] ~ dbeta(1,1)
myMu[i] = 100 + tmp[i] - 0.5
myTau[i] ~ dunif(0, 1000)
}
}
x ~ dbeta(1,1) - 0.5 myCostumDistribution = ... # something fancy x ~ myCustomDistribution(0,1)
Your turn: what's this model good for?
obs is a vector of 100 observations (real numbers)model{
mu ~ dunif(-2,2)
var ~ dunif(0, 100)
tau = 1/var
for (i in 1:100){
obs[i] ~ dnorm(mu,tau)
}
}
NB: JAGS uses precision \(\tau = 1/\sigma^2\), not standard deviation \(\sigma\) in dnorm
model{
mu ~ dunif(-2,2)
var ~ dunif(0, 100)
tau = 1/var
for (i in 1:100){
obs[i] ~ dnorm(mu,tau)
}
}
\[\mu \sim \text{Unif}(-2,2)\] \[\sigma^2 \sim \text{Unif}(0,100)\] \[obs_i \sim \text{Norm}(\mu, \sigma)\]
set.seed(1789)
fakeData = rnorm(200, mean = 0, sd = 1)
f = function(mu, sigma){
if (sigma <=0){
return(0)
}
priorMu = dunif(mu, min = -4, max = 4)
priorSigma = dunif(sigma, min = 0, max = 4)
likelihood = prod(dnorm(fakeData, mean = mu, sd = sigma))
return(priorMu * priorSigma * likelihood)
}
samplesMH = MH(f,
iterations = 60000,
chains = 2,
burnIn = 10000) # outputs mcmc.list from `coda` package
modelString = "
model{
mu ~ dunif(-4,4)
sigma ~ dunif(0,4)
tau = 1/sigma^2
for (i in 1:length(obs)){
obs[i] ~ dnorm(mu,tau)
}
}"
jagsModel = jags.model(file = textConnection(modelString),
data = list(obs = fakeData),
n.chains = 2)
update(jagsModel, n.iter = 5000)
samplesJAGS = coda.samples(jagsModel,
variable.names = c("mu", "sigma"),
n.iter = 5000)
grid.arrange(ggs_density(ggs(samplesMH)) , ggs_density(ggs(samplesJAGS)) )
grid.arrange(ggs_traceplot(ggs(samplesMH)) + theme(plot.background=element_blank()),
ggs_traceplot(ggs(samplesJAGS)) + theme(plot.background=element_blank()))
# function from Kruschke, defined in 'DBDA2E-utilities.R' DbdaAcfPlot(samplesMH) + theme(plot.background=element_blank())
## NULL
# function from Kruschke, defined in 'DBDA2E-utilities.R' DbdaAcfPlot(samplesJAGS) + theme(plot.background=element_blank())
## NULL
develop step by step & monitor each new intermediate variable
modelString = "
model{
mu ~ dnorm(0,1)
}"
jagsModel = jags.model(file = textConnection(modelString),
data = list(obs = fakeData),
n.chains = 2, n.adapt = 10)
update(jagsModel, n.iter = 10)
codaSamples = coda.samples(jagsModel, variable.names = c("mu"), n.iter = 5000)
ggs_density(ggs(codaSamples))
model{
thetaPost ~ beta(1,1)
thetaPrior ~ beta(1,1)
# generate data from prior distribution
priorPredictive ~ dbin(thetaPrior,n)
# here `thetaPost` is conditioned on observed data!!
kObs ~ dbin(thetaPost, n)
# generate data from posterior
posteriorPredictive ~ dbin(thetaPost, n)
}
prior predictive
\[ P(D) = \int P(\theta) \ P(D \mid \theta) \ \text{d}\theta \]
posterior predictive
\[ P(D \mid D') = \int P(\theta \mid D') \ P(D \mid \theta) \ \text{d}\theta \]
x <= y, x != y, x || y as usual (see manual)ifelse, equals and step for boolean conditioningmodel{
flag ~ dbern(0.5)
parameter1 = ifelse(flag, 0, -100)
parameter2 = ifelse(flag, 1, 100)
}
What is this model trying to achieve? And, why does it not work?
model{
flag ~ dbern(0.5)
SOMEPDF = ifelse(flag, dnorm, dunif)
parameter1 = ifelse(flag, 0, -100)
parameter2 = ifelse(flag, 1, 100)
for(i in 1:length(obs)){
obs[i] ~ SOMEPDF(parameter1, parameter2)
}
}
data{
for (i in 1:length(obs)){
ones[i] = 1 # create a vector of ones
}
}
model{
flag ~ dbern(0.5)
parameter1 = ifelse(flag, 0, -100)
parameter2 = ifelse(flag, 1, 100)
for(i in 1:length(obs)){
theta[i] = ifelse(flag,
dnorm(obs[i],parameter1, parameter2),
dunif(obs[i],parameter1, parameter2))
ones[i] ~ dbern(theta[i])
}
}
Friday
Tuesday
totally obligatory
R and JAGS (optional and recommended: RStudio)R2jags, rjags and runjagsCode/ParameterEstimation/Binomial/Rate_1_jags.R