\[ \definecolor{firebrick}{RGB}{178,34,34} \newcommand{\red}[1]{{\color{firebrick}{#1}}} \] \[ \definecolor{mygray}{RGB}{178,34,34} \newcommand{\mygray}[1]{{\color{mygray}{#1}}} \] \[ \newcommand{\set}[1]{\{#1\}} \] \[ \newcommand{\tuple}[1]{\langle#1\rangle} \] \[\newcommand{\States}{{T}}\] \[\newcommand{\state}{{t}}\] \[\newcommand{\pow}[1]{{\mathcal{P}(#1)}}\]
Simple model
data { // declare data variables
int<lower=0> N ;
int<lower=0,upper=N> k ;
}
parameters { // declare parameter variables
real<lower=0,upper=1> theta ;
}
model { // prior and likelihood
theta ~ beta(1,1) ;
k ~ binomial(N, theta) ;
}
model as Bayesian network
Hierarchical model
data {
int<lower=0> N ;
int<lower=0,upper=N> k ;
}
parameters {
real<lower=0> a ;
real<lower=0> b ;
real<lower=0,upper=1> theta ;
}
model {
a ~ gamma(1,1) ;
b ~ gamma(1,1) ;
theta ~ beta(a,b) ;
k ~ binomial(N, theta) ;
}
model as Bayesian network
data {
int<lower=0> N ; int<lower=0,upper=N> k ;
}
parameters {
real<lower=0> a ;
real<lower=0> b ;
real<lower=0,upper=1> theta ;
}
model {
a ~ gamma(1,1) ;
b ~ gamma(1,1) ;
theta ~ beta(a,b) ;
k ~ binomial(N, theta) ;
}
generated quantities {
real<lower=0> a_prior ;
real<lower=0> b_prior ;
real<lower=0,upper=1> theta_prior ;
a_prior = gamma_rng(1,1) ;
b_prior = gamma_rng(1,1) ;
theta_prior = beta_rng(a_prior,b_prior) ;
}
plot(stan_fit, plotfun = 'dens', ncol = 1,
pars = c("a", "b", "theta"))
\[ \begin{align*} P(\theta, a, b \, | \, D) & \propto P(\theta, a, b) \ P(D \, | \, \theta, a, b)\\ & = P(a) \ P(b) \ P(\theta \mid a, b) \ P(D \, | \, \theta) \end{align*} \]
dummy dummy
\[ \begin{align*} P(\theta, s \, | \, D) & \propto P(\theta, s) \ P(D \, | \, \theta, s)\\ & = P(s) \ \underbrace{P(\theta \mid s)}_{\text{degenerate!}} \ P(D \, | \, \theta) \end{align*} \]
CAVEAT: this is tricky to implement in Stan (why?)
Lee & Wagenmakers (2013)
// (...)
model {
a ~ gamma(1,1) ;
b ~ gamma(1,1) ;
theta ~ beta(a,b) ;
k ~ binomial(N, theta) ;
}
generated quantities {
// (...)
a_prior = gamma_rng(1,1) ;
b_prior = gamma_rng(1,1) ;
theta_prior = beta_rng(a_prior,b_prior) ;
}
plot(stan_fit, plotfun = 'dens', ncol = 1,
pars = c("a_prior", "b_prior", "theta_prior"))
parameters {
real<lower=0,upper=1> mu ;
real<lower=1,upper=100> v ;
real<lower=0,upper=1> theta ;
}
transformed parameters {
real<lower=0> a ; real<lower=0> b ;
a = mu*v ; b = (1-mu)*v ;
}
model {
mu ~ beta(1,1) ; theta ~ beta(a,b) ;
k ~ binomial(N,theta) ;
}
generated quantities {
real<lower=0> mu_prior ;
real<lower=1,upper=100> v_prior ;
real<lower=0> a_prior; real<lower=0> b_prior;
real<lower=0,upper=1> theta_prior;
mu_prior = beta_rng(1,1) ;
v_prior = uniform_rng(1,100);
a_prior = mu_prior * v_prior;
b_prior = (1- mu_prior) * v_prior ;
theta_prior = beta_rng(a_prior,b_prior);
}
puzzle: where is the prior for \(v\)?
plot(stan_fit,
plotfun="dens",
nrow=2)
think: mixed effects regression
(Kruschke, chapter 9.2.4)
TTdata = readr::read_csv('data/04_therapeutic_touchers.csv') %>%
group_by(s) %>%
summarize(k = sum(y), N = length(y))
# k - number of successes per subject
# N - number of trials per subject
head(TTdata)
## # A tibble: 6 x 3
## s k N
## <chr> <int> <int>
## 1 S01 1 10
## 2 S02 2 10
## 3 S03 3 10
## 4 S04 3 10
## 5 S05 3 10
## 6 S06 3 10
ggplot(TTdata, aes(x = s, y = k/N)) +
geom_bar(stat='identity')
data {
int<lower=0> N[28] ;
int<lower=0> k[28] ;
}
parameters {
real<lower=0,upper=1> mu ;
real<lower=1,upper=100> v ;
real<lower=0,upper=1> theta[28] ;
}
transformed parameters {
real<lower=0> a;
real<lower=0> b;
a = mu * v;
b = (1- mu) * v ;
}
model {
mu ~ beta(1,1) ;
theta ~ beta(a,b) ;
k ~ binomial(N,theta) ;
}
samples = ggs(stan_fit_TT) %>% filter(! Parameter %in% c("mu", "v", "a", "b"))
samples$subject = with(samples,factor(regmatches(Parameter, regexpr("[0-9]+", Parameter)), levels = 1:28))
ggplot(samples, aes(x = subject, y = value)) + geom_boxplot() + ylab("bias level (posterior)")
problem
(sometimes) practical solution
15 people answered 40 exam questions. Here’s their success rates:
latent discrete mixture model
Lee & Wagenmakers, Chapter 6.1
data {
int<lower=1> p; int<lower=0> k[p]; int<lower=1> n;
}
transformed data {
real psi; psi <- .5;
}
parameters {
real<lower=.5,upper=1> phi;
}
transformed parameters {
vector[2] log_alpha[p];
for (i in 1:p) {
log_alpha[i,1] <- log(.5) + binomial_log(k[i], n, phi);
log_alpha[i,2] <- log(.5) + binomial_log(k[i], n, psi);
}
}
model {
for (i in 1:p)
increment_log_prob(log_sum_exp(log_alpha[i]));
}
generated quantities {
int<lower=0,upper=1> z[p];
for (i in 1:p) {
z[i] <- bernoulli_rng(softmax(log_alpha[i])[1]);
}
}
log score
\[\log P(k_i, n, z_i, \phi) = \log \text{Bern}(z_i \mid 0.5) + \log \text{Binom}(k_i, n, \text{ifelse}(z=1, \phi, 0.5))\]
marginalizing out
\[ \begin{align*} P(k_i, n, \phi) & = \sum_{z_i \in \set{0,1}} \text{Bern}(z_i \mid 0.5) \ \text{Binom}(k_i, n, \text{ifelse}(z=1, \phi, 0.5)) \\ & = \text{Bern}(0 \mid 0.5) \ \text{Binom}(k_i, n, 0.5)) + \\ & \ \ \ \ \ \ \ \ \ \ \text{Bern}(1 \mid 0.5) \ \text{Binom}(k_i, n, \phi)) \\ & = \underbrace{0.5 \ \text{Binom}(k_i, n, 0.5))}_{\alpha_0} + \underbrace{0.5 \ \text{Binom}(k_i, n, \phi))}_{\alpha_1} \\ \log P(k_i, n, \phi) & = \log \sum_{i=0}^1 \exp \log \alpha_i \end{align*}\]
posterior
data & model
Thursday
in 2019