\[ \definecolor{firebrick}{RGB}{178,34,34} \newcommand{\red}[1]{{\color{firebrick}{#1}}} \] \[ \definecolor{mygray}{RGB}{178,34,34} \newcommand{\mygray}[1]{{\color{mygray}{#1}}} \] \[ \newcommand{\set}[1]{\{#1\}} \] \[ \newcommand{\tuple}[1]{\langle#1\rangle} \] \[\newcommand{\States}{{T}}\] \[\newcommand{\state}{{t}}\] \[\newcommand{\pow}[1]{{\mathcal{P}(#1)}}\]
set.seed(2018)
fakeData = rnorm(200, mean = 0, sd = 1)
ggplot(tibble(x = fakeData), aes(x)) + geom_density()
c(mean = mean(fakeData), sd = sd(fakeData))
## mean sd
## 0.07321794 1.01468222
likelihood_function = function(mu, sigma){
if (sigma <=0){
return(0)
}
priorMu = dunif(mu, min = -4, max = 4)
priorSigma = dunif(sigma, min = 0, max = 4)
likelihood = prod(dnorm(fakeData,
mean = mu,
sd = sigma))
return(priorMu * priorSigma * likelihood)
}
MH = function(f, iterations = 50, chains = 2, burnIn = 0){
out = array(0, dim = c(chains, iterations - burnIn, 2))
dimnames(out) = list("chain" = 1:chains,
"iteration" = 1:(iterations-burnIn),
"variable" = c("mu", "sigma"))
for (c in 1:chains){
mu = runif(1, min = -4, max = 4)
sigma = runif(1, min = 0, max = 4)
for (i in 1:iterations){
muNext = mu + runif(1, min = -1.25, max = 1.25)
sigmaNext = sigma + runif(1, min = -0.25, max = 0.25)
rndm = runif(1, 0, 1)
if (f(mu, sigma) < f(muNext, sigmaNext) |
f(muNext, sigmaNext) >= f(mu, sigma) * rndm) {
mu = muNext
sigma = sigmaNext
}
if (i >= burnIn){
out[c,i-burnIn,1] = mu
out[c,i-burnIn,2] = sigma
}
}
}
return(coda::mcmc.list(coda::mcmc(out[1,,]),
coda::mcmc(out[2,,])))
}
MH_fit_coda = MH(likelihood_function, iterations = 60000,
chains = 2, burnIn = 10000)
MH_fit = ggmcmc::ggs(MH_fit_coda) # casting into tibble
MH_fit
## # A tibble: 200,000 x 4
## Iteration Chain Parameter value
## <int> <int> <fct> <dbl>
## 1 1 1 mu 0.124
## 2 2 1 mu 0.124
## 3 3 1 mu 0.124
## 4 4 1 mu 0.124
## 5 5 1 mu 0.124
## 6 6 1 mu 0.124
## 7 7 1 mu 0.124
## 8 8 1 mu 0.124
## 9 9 1 mu 0.124
## 10 10 1 mu 0.124
## # ... with 199,990 more rows
MH_fit %>% group_by(Parameter) %>%
summarize(HDI_lower = HDInterval::hdi(value, 0.95)[[1]],
mean = mean(value),
HDI_upper = HDInterval::hdi(value, 0.95)[[2]])
## # A tibble: 2 x 4
## Parameter HDI_lower mean HDI_upper
## <fct> <dbl> <dbl> <dbl>
## 1 mu -0.0710 0.0727 0.206
## 2 sigma 0.920 1.02 1.12
ggplot(MH_fit, aes(x = value)) + geom_density() +
facet_grid(~ Parameter, scales = "free")
dummy
dummy
dummy
BUGS project (1989 - present ???)
WinBUGS (1997 - 2007)
actively developed by Andrew Gelman, Bob Carpenter and others
uses HMC and NUTS, but can do variational Bayes and MAPs too
optimized performance for certain kinds of hierarchical models (e.g., hierarchical GLMs)
written in C++
cross-platform
interfaces: command line, Python, Julia, R, Matlab, Stata, Mathematica
many additional packages/goodies
Content of file binomial.stan
data { // declare data variables
int<lower=0> N ;
int<lower=0,upper=N> k ;
}
parameters { // declare parameter variables
real<lower=0,upper=1> theta ;
}
model { // prior and likelihood
theta ~ beta(1,1) ;
k ~ binomial(N, theta) ;
}
Content of file binomial.r
# package to communicate with Stan
require('rstan')
# prepare data for Stan
dataList = list(k = 7, N = 24)
# fit the model to the data
stan_fit = stan(file = 'binomial.stan',
data = dataList)
model as Bayesian network
homework
run this model on your own computer!
stanfit
objectstan_fit
## Inference for Stan model: binomial.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## theta 0.31 0.00 0.09 0.15 0.25 0.31 0.37 0.49 1452 1
## lp__ -16.54 0.02 0.70 -18.54 -16.69 -16.27 -16.10 -16.05 2042 1
##
## Samples were drawn using NUTS(diag_e) at Sun Dec 2 13:47:48 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
plot(stan_fit, plotfun = "dens") +
geom_line(data = tibble(theta=seq(0,.75,length.out=1000),
dens = dbeta(theta, dataList$k + 1,
dataList$N - dataList$k + 1)),
aes(x = theta, y = dens),
color = "skyblue", size = 1)
idea
approximate posterior distribution by a well-behaved function
concretely
find parametric function \(F(\theta \mid \phi)\) and look for best-fitting parameters:
\[\phi^* = \arg\min_{\phi} \text{KL}(F(\theta \mid \phi) \ \ || \ \ P(\theta \mid D) ) \]
where KL is the Kullback-Leibler divergence:
\[ \text{KL}(P || Q) = \int P(x) \log \frac{P(x)}{Q(x)} \ \text{d}x \]
the MAP is the most likely parameter vector under the posterior distribution:
\[ \theta^* = \arg\max_{\theta} P(\theta \mid D) \]
for flat (possibly improper) priors the MAP is the MLE
convergence/representativeness
efficiency
compare how similar several independent sample chains are
look at the temporal development of single chains
library('coda') # base r
library('ggmcmc', 'bayesplot') # tidyverse
coda
and ggmcmc
basically do the same thing, but they differ in, say, aesthetics
rstan
has its own tools for diagnosis and relies on bayesplot
for plotting
data {
vector[200] y ;
}
parameters {
real mu ; real sigma ;
}
model {
y ~ normal(mu, sigma) ;
}
dataList = list(y = fakeData)
stan_fit = stan(file = 'Gaussian.stan',
data = dataList)
plot(stan_fit, plotfun = "dens")
MH_fit = MH(likelihood_function, iterations = 60000,
chains = 2, burnIn = 10000)
ggplot(MH_fit, aes(x = value)) + geom_density() +
facet_grid(~ Parameter, scales = "free")
ggmcmc::ggs_traceplot(ggs(stan_fit))
ggmcmc::ggs_traceplot(MH_fit)
trace plots from multiple chains should look like:
a bunch of hairy caterpillars madly in love with each other
burn-in (warm-up)
remove initial chunk of sample sequence to discard effects of (random) starting position
thinning
only consider every \(i\)-th sample to remove autocorrelation
\(\hat{R}\)-statistics,
summary(stan_fit)$summary[c("mu", "sigma"),"Rhat"]
## mu sigma
## 1.000069 1.000540
coda::gelman.diag(MH_fit_coda)
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## mu 1 1
## sigma 1 1
##
## Multivariate psrf
##
## 1
ggmcmc::ggs_autocorrelation(ggs(stan_fit))
ggmcmc::ggs_autocorrelation(MH_fit)
\[\text{ESS} = \frac{N}{ 1 + 2 \sum_{k=1}^{\infty} ACF(k)}\]
# remember: total samples 4000
summary(stan_fit)$summary[c("mu", "sigma"), "n_eff"]
## mu sigma
## 3162.536 3114.108
# remember: total samples 100,000
coda::effectiveSize(MH_fit_coda)
## mu sigma
## 2585.473 2361.593
Tuesday
[important!!!] install Stan via R-package rstan
, following these instructions
run the model from the slide “example: inferring a coin’s bias with Stan” on your own computer (files for download)