rmarkdown
working in order to ‘knit’ the HTML output.ctrl/cmd
+ shift
+ K
in RStudio) to produce a HTML file.rmarkdown
(comes with RStudio)tidyverse
(or ggplot2
, dplyr
, purrr
, tibble
)TeachingDemos
rstan
ggmcmc
bridgesampling
polspline
As always, load the required packages and change the seed to your last name.
library(TeachingDemos)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.3.0
## ✔ tibble 2.0.1 ✔ dplyr 0.7.8
## ✔ tidyr 0.8.2 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.3.0
## ── Conflicts ────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(rstan)
## Loading required package: StanHeaders
## rstan (Version 2.18.2, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
##
## Attaching package: 'rstan'
## The following object is masked from 'package:tidyr':
##
## extract
library(ggmcmc)
library(bridgesampling)
library(polspline)
# set cores to use to the total number of cores (minimally 4)
options(mc.cores = max(parallel::detectCores(), 4))
# save a compiled version of the Stan model file
rstan_options(auto_write = TRUE)
lastname <- "YOURLASTNAME"
char2seed(lastname)
library(TeachingDemos)
library(tidyverse)
library(rstan)
library(ggmcmc)
library(bridgesampling)
library(polspline)
# set cores to use to the total number of cores (minimally 4)
options(mc.cores = max(parallel::detectCores(), 4))
# save a compiled version of the Stan model file
rstan_options(auto_write = TRUE)
lastname <- "bayes"
char2seed(lastname)
Here is some data from a forgetting experiment. t
is the time point, and y
is the proportion of people who correctly recalled the stimulus.
y <- c(.94, .77, .40, .26, .24, .16)
t <- c(1, 3, 6, 9, 12, 18)
obs <- y * 100
forgetting_data <- tibble(obs = obs,
N = 100,
t = t)
Below is the code for the power model of forgetting we discussed in class: \[P(\text{recall} | t; c, d) = ct^{-d}\] where \(0 < c, d < 1.5\)
Implement an alternative model based on this exponential model for each participant:
\[P(\text{recall} | t; a, b) = a \exp(-bt)\] where \(0 < a, b < 1.5\)
power_model_str <- "
// power model of forgetting data
data {
int obs[6] ;
real t[6] ;
}
parameters {
real<lower=0,upper=1.5> c ;
real<lower=0,upper=1.5> d ;
}
transformed parameters {
real theta[6];
for (i in 1:6) {
theta[i] = c * t[i]^(-d) ;
}
}
model {
// increment target log score with
// probability of current samples from prior
target += uniform_lpdf(c | 0, 1.5);
target += uniform_lpdf(d | 0, 1.5);
// increment target log score with
// likelihood of data given current parameter samples
target += binomial_lpmf(obs | 100, theta);
}
"
exponential_model_str <- "
// exponential model of forgetting data
// STAN CODE HERE
"
# SOLUTION:
power_model_str <- "
// power model of forgetting data
data {
int obs[6] ;
real t[6] ;
}
parameters {
real<lower=0,upper=1.5> c ;
real<lower=0,upper=1.5> d ;
}
transformed parameters {
real theta[6];
for (i in 1:6) {
theta[i] = c * t[i]^(-d) ;
}
}
model {
// increment target log score with
// probability of current samples from prior
target += uniform_lpdf(c | 0, 1.5);
target += uniform_lpdf(d | 0, 1.5);
// increment target log score with
// likelihood of data given current parameter samples
target += binomial_lpmf(obs | 100, theta);
}
"
exponential_model_str <- "
// exponential model of forgetting data
data {
int obs[6] ;
real t[6] ;
}
parameters {
real<lower=0,upper=1.5> a ;
real<lower=0,upper=1.5> b ;
}
transformed parameters {
real theta[6];
for (i in 1:6) {
theta[i] = a * exp(-b * t[i]) ;
}
}
model {
// increment target log score with
// probability of current samples from prior
target += uniform_lpdf(a | 0, 1.5);
target += uniform_lpdf(b | 0, 1.5);
// increment target log score with
// likelihood of data given current parameter samples
target += binomial_lpmf(obs | 100, theta);
}
"
For both models, complete the code below to draw samples from the posterior distribution. NB: you should use a large amount of samples (as indicated) so that later bridge sampling is reliable. We use the control
parameter here to fine-tune the inference for these models. Remember to set eval = TRUE
.
# complete the code below
stanfit_exponential <- stan(# specify model
iter = 50000,
warmup = 10000,
control = list(adapt_delta = 0.999),
pars = , # replace NA with parameters to inspect
data = ## provide data
)
stanfit_power <- stan(# specify model
iter = 50000,
warmup = 10000,
control = list(adapt_delta = 0.999),
pars = , # replace NA with parameters to inspect
data = ## provide data
)
# SOLUTION:
stanfit_exponential <- stan(model_code = exponential_model_str,
iter = 50000,
warmup = 10000,
control = list(adapt_delta = 0.999),
pars = c("a", "b"),
data = forgetting_data)
stanfit_power <- stan(model_code = power_model_str,
iter = 50000,
warmup = 10000,
control = list(adapt_delta = 0.999),
pars = c("c", "d"),
data = forgetting_data)
Show a summary for each fit
# YOUR CODE HERE
# SOLUTION:
show(stanfit_exponential)
## Inference for Stan model: ebcc794d3a0629c00c6cbdfedb95d92f.
## 4 chains, each with iter=50000; warmup=10000; thin=1;
## post-warmup draws per chain=40000, total post-warmup draws=160000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## a 1.06 0 0.03 0.99 1.04 1.06 1.08 1.12 48108 1
## b 0.13 0 0.01 0.11 0.12 0.13 0.14 0.15 48505 1
## lp__ -23.78 0 1.00 -26.46 -24.16 -23.47 -23.07 -22.81 44400 1
##
## Samples were drawn using NUTS(diag_e) at Fri Mar 1 22:48:15 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
show(stanfit_power)
## Inference for Stan model: 91ad2c5bbd7f83d5685ce9546dab87f1.
## 4 chains, each with iter=50000; warmup=10000; thin=1;
## post-warmup draws per chain=40000, total post-warmup draws=160000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## c 0.95 0 0.02 0.90 0.93 0.95 0.96 0.98 57181 1
## d 0.50 0 0.03 0.43 0.47 0.50 0.52 0.56 68283 1
## lp__ -30.69 0 1.00 -33.38 -31.08 -30.38 -29.98 -29.72 47769 1
##
## Samples were drawn using NUTS(diag_e) at Fri Mar 1 22:49:28 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
Make traceplots for the samples for both models
# YOUR CODE HERE
# SOLUTION:
stanfit_exponential %>%
ggs() %>%
ggs_traceplot
stanfit_power %>%
ggs() %>%
ggs_traceplot
Plot the density for posterior samples for both models
# YOUR CODE HERE
# SOLUTION:
plot(stanfit_exponential, plotfun = "dens")
plot(stanfit_power, plotfun = "dens")
Approximate the marginal likelihood of each model using the bridgesampling
package (Hint: use bridgesampling::bridge_sampler()
as we did in the lecture, and use option silent = T
to suppress output in the markdown).
# YOUR CODE HERE
# SOLUTION:
# approximate marginal likelihoods with bridge sampling
bridge_exponential <- bridge_sampler(samples = stanfit_exponential, silent = F)
## Warning: 46 of the 80000 log_prob() evaluations on the proposal draws
## produced -Inf/Inf.
## Iteration: 1
## Iteration: 2
## Iteration: 3
## Iteration: 4
bridge_power <- bridge_sampler(samples = stanfit_power, silent = F)
## Warning: 225 of the 80000 log_prob() evaluations on the proposal draws
## produced -Inf/Inf.
## Iteration: 1
## Iteration: 2
## Iteration: 3
## Iteration: 4
Determine the error of the estimated marginal likelihoods using bridgesampling::error_measures
.
# YOUR CODE HERE
# SOLUTION:
# get approximate error measures for marginal likelihood estimates
error_measures(bridge_exponential)
## $re2
## [1] 1.42645e-07
##
## $cv
## [1] 0.0003776838
##
## $percentage
## [1] "0.0378%"
error_measures(bridge_power)
## $re2
## [1] 2.794152e-07
##
## $cv
## [1] 0.0005285974
##
## $percentage
## [1] "0.0529%"
The previous results only give you an estimate for the marginal likelihood of the data given each model. But you can use these estimates to compute the Bayes factor in favour of the exponential model.
# YOUR CODE HERE
# SOLUTION:
# approximate Bayes factor
bayes_factor(bridge_exponential, bridge_power)
## Estimated Bayes factor in favor of x1 over x2: 1220.39183
Interpret the Bayes factor. What is the value? What does it mean?
ANSWER:
SOLUTION: The Bayes factor is 1221 in favour of the exponential model. This means that a rational agent should update their beliefs by a factor of 1221 in favour of the exponential model, compared to the power model. This is massive empirical evidence in favor of the exponential model.
Use the Savage-Dickey method to compute the Bayes factor in favor of the exponential model, when compared against a special case (properly nested model) of the exponential model where a = 1.
Fill in the required code:
# NB: we take only a subsest of the samples because otherwise polspline will not work
post_samples <- filter(ggs(stanfit_exponential),
Parameter == "a" & Iteration <= 1000)$value
fit.posterior <- polspline::oldlogspline(post_samples,
ubound=1.5, lbound=0)
posterior_a <- polspline::doldlogspline(1, fit.posterior)
# calculate the Bayes factor via the Savage-Dickey method
# YOUR CODE HERE
# SOLUTION:
# NB: we take only a subsest of the samples because otherwise polspline will not work
post_samples <- filter(ggs(stanfit_exponential),
Parameter == "a" & Iteration <= 1000)$value
fit.posterior <- polspline::oldlogspline(post_samples,
ubound=1.5, lbound=0)
# height of the posterior distribution at a = 1 (blue)
posterior_a <- polspline::doldlogspline(1, fit.posterior)
# height of the prior distribution at a = 1 (red)
prior_a <- dunif(x = 1, min = 0, max = 1.5)
# plot posterior density
stanfit_exponential %>%
ggs %>% filter(Parameter == "a") %>%
ggplot(aes(x = value)) + geom_density() +
geom_hline(yintercept = prior_a, linetype = "dashed") +
geom_point(x = 1, y = posterior_a, color = "blue") +
geom_point(x = 1, y = prior_a, color = "red")
# calculate the Bayes factor via the Savage-Dickey method in favor of nested model
savage_dickey_bf <- posterior_a / prior_a
paste0("Approximate BF: ",
round(savage_dickey_bf, 3))
## [1] "Approximate BF: 3.114"
Interpret the result
Is this strong or mild evidence in favor of one of the two models?
ANSWER:
SOLUTION: A bayes factor of 3 is mild evidence for the nested model. As can be seen in the graph, after conditioning on the observed data, the likelihood of a = 1 has increased.
We want to approximate the posterior predictive distribution. For this, we draw samples from the posterior, using basically the same scripts as above for posterior estimation. Additionally we generate a “sample imaginary prediction” of data that we’d expect to see in an exact repetition of the experiment, for each sampled vector of parameter values. To do so, we use the generated parameters
block at the end of the Stan model code.
Complete the pseudo-code below for the exponential and the power model with actual Stan code which does that. (Hint: use command binomial_rng
to take samples from a binomial distribution; check the Stan manual for its syntax.)
power_pred_model_str <- "
// Stan code here
// fill in the gaps and replace the pseudo-code
data {
// as before
}
parameters {
// as before
}
transformed parameters {
// as before
}
model {
// as before
}
## PSEUDO-CODE!
generated quantities {
// fill in for yourself
// DECLARE VARIABLE 'post_predict_sample' WITH SAME SHAPE AS THE DATA
// post_predict_sample = SAMPLE FROM BINOMIAL DISTRIBUTION, WITH CURRENT PARAMETER ESTIMATES
}
"
exponential_pred_model_str <- "
// Stan code here
// fill in the gaps and replace the pseudo-code
data {
// as before
}
parameters {
// as before
}
transformed parameters {
// as before
}
model {
// as before
}
## PSEUDO-CODE!
generated quantities {
// fill in for yourself
// DECLARE VARIABLE 'post_predict_sample' WITH SAME SHAPE AS THE DATA
// post_predict_sample = SAMPLE FROM BINOMIAL DISTRIBUTION, WITH CURRENT PARAMETER ESTIMATES
}
"
# SOLUTION:
power_pred_model_str <- "
// power model of forgetting data
data {
int obs[6] ;
real t[6] ;
}
parameters {
real<lower=0,upper=1.5> c ;
real<lower=0,upper=1.5> d ;
}
transformed parameters {
real theta[6];
for (i in 1:6) {
theta[i] = c * t[i]^(-d) ;
}
}
model {
// increment target log score with
// probability of current samples from prior
target += uniform_lpdf(c | 0, 1.5);
target += uniform_lpdf(d | 0, 1.5);
// incement target log score with
// likelihood of data given current parameter samples
target += binomial_lpmf(obs | 100, theta);
}
generated quantities {
int post_predict_sample[6];
post_predict_sample = binomial_rng(100, theta);
}
"
exponential_pred_model_str <- "
// exponential model of forgetting data
data {
int obs[6] ;
real t[6] ;
}
parameters {
real<lower=0,upper=1.5> a ;
real<lower=0,upper=1.5> b ;
}
transformed parameters {
real theta[6];
for (i in 1:6) {
theta[i] = a * exp(-b * t[i]) ;
}
}
model {
// increment target log score with
// probability of current samples from prior
target += uniform_lpdf(a | 0, 1.5);
target += uniform_lpdf(b | 0, 1.5);
// incermenten target log score with
// likelihood of data given current parameter samples
target += binomial_lpmf(obs | 100, theta);
}
generated quantities {
int post_predict_sample[6];
post_predict_sample = binomial_rng(100, theta);
}
"
Plot the data and a visual posterior predictive check
Use the samples from variable post_predict_sample
to generate plots for a visual posterior predictive check, which should similar what we had in the lecture. Use y limits of 0 to 100.
Hint: use geom_point()
for the real data and geom_pointrange()
for the predicted values and 95% credible intervals.
# YOUR CODE HERE
# SOLUTION:
stanfit_exponential_pred <- stan(model_code = exponential_pred_model_str,
iter = 50000,
warmup = 10000,
control = list(adapt_delta = 0.999),
pars = c("post_predict_sample"),
data = forgetting_data)
exp_summary <- summary(stanfit_exponential_pred,
pars = c("post_predict_sample"))$summary %>%
as_tibble()
ggplot(forgetting_data, aes(x = t, y = obs)) +
geom_point() +
geom_pointrange(x = t, y = exp_summary$mean,
ymin = exp_summary$`2.5%`,
ymax = exp_summary$`97.5%`,
color = "skyblue",
alpha = 0.5) +
ylim(0,100)
stanfit_power_pred <- stan(model_code = power_pred_model_str,
iter = 50000,
warmup = 10000,
control = list(adapt_delta = 0.999),
pars = c("post_predict_sample"),
data = forgetting_data)
power_summary <- summary(stanfit_power_pred,
pars = c("post_predict_sample"))$summary %>%
as_tibble()
ggplot(forgetting_data, aes(x = t, y = obs)) +
geom_point() +
geom_pointrange(x = t, y = power_summary$mean,
ymin = power_summary$`2.5%`,
ymax = power_summary$`97.5%`,
color = "red",
alpha = 0.5) +
ylim(0,100)
To compute the posterior predictive \(p\)-value most efficiently (in the sense of having to write least code; not necessarily in the sense of fastest run time), we will expand the Stan code even more. Based on your previous code which computes samples from the posterior predictive distribution, expand the Stan code even more to also generate two more quantities. The first is the likelihood of the observed data under the currently sampled parameter values. The second is the likelihood of the post_predict_sample
under the currently sampled parameter value. The posterior predictive \(p\)-value is approximated by the mean of the variable post_pred_p_value_flag
in the pseudo-code below.
Complete the code accordingly.
power_ppp_model_str <- "
// fill in the gaps and replace the pseudo-code
data {
// as before
}
parameters {
// as before
}
transformed parameters {
// as before
}
model {
// as before
}
## PSEUDO-CODE!
generated quantities {
// fill in for yourself
// DECLARE VARIABLE 'post_predict_sample' WITH SAME SHAPE AS THE DATA
// post_predict_sample = SAMPLE FROM BIOMIAL DISTRIBUTION, WITH CURRENT PARAMETER ESTIMATES
// likelihood_data = binomial_lpmf( ... FILL ME ... )
// likelihood_post_predict_sample = binomial_lpmf( ... FILL ME ... )
// post_pred_p_value_flag = likelihood_data >= likelihood_post_predict_sample;
}
"
exponential_ppp_model_str <- "
// fill in the gaps and replace the pseudo-code
data {
// as before
}
parameters {
// as before
}
transformed parameters {
// as before
}
model {
// as before
}
## PSEUDO-CODE!
generated quantities {
// fill in for yourself
// DECLARE VARIABLE 'post_predict_sample' WITH SAME SHAPE AS THE DATA
// post_predict_sample = SAMPLE FROM BIOMIAL DISTRIBUTION, WITH CURRENT PARAMETER ESTIMATES
// likelihood_data = binomial_lpmf( ... FILL ME ... )
// likelihood_post_predict_sample = binomial_lpmf( ... FILL ME ... )
// post_pred_p_value_flag = likelihood_data >= likelihood_post_predict_sample;
}
"
power_model_ppp_str <- "
// power model of forgetting data
data {
int obs[6] ;
real t[6] ;
}
parameters {
real<lower=0,upper=1.5> c ;
real<lower=0,upper=1.5> d ;
}
transformed parameters {
real theta[6];
for (i in 1:6) {
theta[i] = c * t[i]^(-d) ;
}
}
model {
// increment target log score with
// probability of current samples from prior
target += uniform_lpdf(c | 0, 1.5);
target += uniform_lpdf(d | 0, 1.5);
// increment target log score with
// likelihood of data given current parameter samples
target += binomial_lpmf(obs | 100, theta);
}
generated quantities {
// declare variables
int post_predict_sample[6];
real likelihood_data;
real likelihood_post_predict_sample;
int post_predict_p_value_flag;
// generate predictions
post_predict_sample = binomial_rng(100, theta);
// calculate likelihoods
likelihood_data = binomial_lpmf(obs | 100, theta);
likelihood_post_predict_sample = binomial_lpmf(post_predict_sample | 100, theta);
// compare likelihoods
post_predict_p_value_flag = likelihood_data >= likelihood_post_predict_sample;
}
"
exponential_model_ppp_str <- "
// exponential model of forgetting data
data {
int obs[6] ;
real t[6] ;
}
parameters {
real<lower=0,upper=1.5> a ;
real<lower=0,upper=1.5> b ;
}
transformed parameters {
real theta[6];
for (i in 1:6) {
theta[i] = a * exp(-b * t[i]) ;
}
}
model {
// increment target log score with
// probability of current samples from prior
target += uniform_lpdf(a | 0, 1.5);
target += uniform_lpdf(b | 0, 1.5);
// increment target log score with
// likelihood of data given current parameter samples
target += binomial_lpmf(obs | 100, theta);
}
generated quantities {
// declare variables
int post_predict_sample[6];
real likelihood_data;
real likelihood_post_predict_sample;
int post_predict_p_value_flag;
// generate predictions
post_predict_sample = binomial_rng(100, theta);
// calculate likelihoods
likelihood_data = 0;
for (n in 1:6) {
likelihood_data = likelihood_data + binomial_lpmf(obs[n] | 100, theta[n]);
}
likelihood_post_predict_sample = 0;
for (n in 1:6) {
likelihood_post_predict_sample = likelihood_post_predict_sample + binomial_lpmf(post_predict_sample[n] | 100, theta[n]);
}
// compare likelihoods
post_predict_p_value_flag = likelihood_data >= likelihood_post_predict_sample;
}
"
Retrieve estimates of the posterior predictive \(p\)-values for both the exponential and power models.
# YOUR CODE HERE
# Run the stan models and retrieve an estimate of the posterior predictive
# p-value for both exponential and power model.
stanfit_exponential_ppp <- stan(model_code = exponential_model_ppp_str,
iter = 50000,
warmup = 10000,
control = list(adapt_delta = 0.999),
pars = c("post_predict_p_value_flag"),
data = forgetting_data)
exponential_p_summary <- summary(stanfit_exponential_ppp,
pars = c("post_predict_p_value_flag"))$summary %>%
as_tibble()
exponential_p_summary$mean
## [1] 0.0670625
stanfit_power_ppp <- stan(model_code = power_model_ppp_str,
iter = 50000,
warmup = 10000,
control = list(adapt_delta = 0.999),
pars = c("post_predict_p_value_flag"),
data = forgetting_data)
## Warning: There were 1 divergent transitions after warmup. Increasing adapt_delta above 0.999 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: Examine the pairs() plot to diagnose sampling problems
power_p_summary <- summary(stanfit_power_ppp,
pars = c("post_predict_p_value_flag"))$summary %>%
as_tibble()
power_p_summary$mean
## [1] 0.00015
Can either of the models be criticised based on these values? If so, which and why?
ANSWER:
SOLUTION: Yes, the power model could be criticised because the data seems surprising to the model as the p-value is below the conventional alpha level of 0.05.
End of assignment