In this demo, we illustrate how to fit models with several states in which the assignment of individuals to states is difficult to make with certainty. We start by breeding states then continue with disease states and end by illustrating how to incorporate individual heterogeneity in capture-recapture models.
library(tidyverse)
library(nimble)
library(MCMCvis)
Let’s get back to the analysis of the Sooty shearwater data. For some individuals, we have some uncertainty in assigning individuals in the breeding or non-breeding state (code 3 in the data).
titis <- read_csv2("titis_with_uncertainty.csv", col_names = FALSE)
ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
Rows: 1013 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ";"
dbl (7): X1, X2, X3, X4, X5, X6, X7
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
y <- as.matrix(titis)
head(y)
X1 X2 X3 X4 X5 X6 X7
[1,] 0 0 0 0 0 0 3
[2,] 0 0 0 0 0 0 3
[3,] 0 0 0 0 0 0 3
[4,] 0 0 0 0 0 0 3
[5,] 0 0 0 0 0 0 3
[6,] 0 0 0 0 0 0 3
Let’s write down the model code.
multievent <- nimbleCode({
# -------------------------------------------------
# Parameters:
# phiB: survival probability state B
# phiNB: survival probability state NB
# psiBNB: transition probability from B to NB
# psiNBB: transition probability from NB to B
# pB: recapture probability B
# pNB: recapture probability NB
# piB prob. of being in initial state breeder
# betaNB prob to ascertain the breeding status of an individual encountered as non-breeder
# betaB prob to ascertain the breeding status of an individual encountered as breeder
# -------------------------------------------------
# States (z):
# 1 alive B
# 2 alive NB
# 3 dead
# Observations (y):
# 1 = non-detected
# 2 = seen and ascertained as breeder
# 3 = seen and ascertained as non-breeder
# 4 = not ascertained
# -------------------------------------------------
# priors
phiB ~ dunif(0, 1)
phiNB ~ dunif(0, 1)
psiBNB ~ dunif(0, 1)
psiNBB ~ dunif(0, 1)
pB ~ dunif(0, 1)
pNB ~ dunif(0, 1)
piB ~ dunif(0, 1)
betaNB ~ dunif(0, 1)
betaB ~ dunif(0, 1)
# vector of initial stats probs
delta[1] <- piB # prob. of being in initial state B
delta[2] <- 1 - piB # prob. of being in initial state NB
delta[3] <- 0 # prob. of being in initial state dead
# probabilities of state z(t+1) given z(t)
gamma[1,1] <- phiB * (1 - psiBNB)
gamma[1,2] <- phiB * psiBNB
gamma[1,3] <- 1 - phiB
gamma[2,1] <- phiNB * psiNBB
gamma[2,2] <- phiNB * (1 - psiNBB)
gamma[2,3] <- 1 - phiNB
gamma[3,1] <- 0
gamma[3,2] <- 0
gamma[3,3] <- 1
# probabilities of y(t) given z(t)
omega[1,1] <- 1 - pB # Pr(alive B t -> non-detected t)
omega[1,2] <- pB * betaB # Pr(alive B t -> detected B t)
omega[1,3] <- 0 # Pr(alive B t -> detected NB t)
omega[1,4] <- pB * (1 - betaB) # Pr(alive B t -> detected U t)
omega[2,1] <- 1 - pNB # Pr(alive NB t -> non-detected t)
omega[2,2] <- 0 # Pr(alive NB t -> detected B t)
omega[2,3] <- pNB * betaNB # Pr(alive NB t -> detected NB t)
omega[2,4] <- pNB * (1 - betaNB) # Pr(alive NB t -> detected U t)
omega[3,1] <- 1 # Pr(dead t -> non-detected t)
omega[3,2] <- 0 # Pr(dead t -> detected N t)
omega[3,3] <- 0 # Pr(dead t -> detected NB t)
omega[3,4] <- 0 # Pr(dead t -> detected U t)
omega.init[1,1] <- 0 # Pr(alive B t = 1 -> non-detected t = 1)
omega.init[1,2] <- betaB # Pr(alive B t = 1 -> detected B t = 1)
omega.init[1,3] <- 0 # Pr(alive B t = 1 -> detected NB t = 1)
omega.init[1,4] <- 1 - betaB # Pr(alive B t = 1 -> detected U t = 1)
omega.init[2,1] <- 0 # Pr(alive NB t = 1 -> non-detected t = 1)
omega.init[2,2] <- 0 # Pr(alive NB t = 1 -> detected B t = 1)
omega.init[2,3] <- betaNB # Pr(alive NB t = 1 -> detected NB t = 1)
omega.init[2,4] <- 1 - betaNB # Pr(alive NB t = 1 -> detected U t = 1)
omega.init[3,1] <- 1 # Pr(dead t = 1 -> non-detected t = 1)
omega.init[3,2] <- 0 # Pr(dead t = 1 -> detected N t = 1)
omega.init[3,3] <- 0 # Pr(dead t = 1 -> detected NB t = 1)
omega.init[3,4] <- 0 # Pr(dead t = 1 -> detected U t = 1)
# likelihood
for (i in 1:N){
# latent state at first capture
z[i,first[i]] ~ dcat(delta[1:3])
y[i,first[i]] ~ dcat(omega.init[z[i,first[i]],1:4])
for (t in (first[i]+1):K){
# z(t) given z(t-1)
z[i,t] ~ dcat(gamma[z[i,t-1],1:3])
# y(t) given z(t)
y[i,t] ~ dcat(omega[z[i,t],1:4])
}
}
})
Get the data of first capture.
get.first <- function(x) min(which(x != 0))
first <- apply(y, 1, get.first)
Constants in a list.
my.constants <- list(first = first,
K = ncol(y),
N = nrow(y))
Data in a list.
my.data <- list(y = y + 1)
Initial values.
zinit <- y
zinit[zinit==3] <- sample(c(1,2), sum(zinit==3), replace = TRUE)
for (i in 1:nrow(y)) {
for (j in 1:ncol(y)) {
if (j > first[i] & y[i,j]==0) {zinit[i,j] <- which(rmultinom(1, 1, c(1/2,1/2))==1)}
if (j < first[i]) {zinit[i,j] <- 0}
}
}
zinit <- as.matrix(zinit)
initial.values <- function(){list(phiNB = runif(1, 0, 1),
phiB = runif(1, 0, 1),
psiNBB = runif(1, 0, 1),
psiBNB = runif(1, 0, 1),
pNB = runif(1, 0, 1),
pB = runif(1, 0, 1),
piB = runif(1, 0, 1),
betaB = runif(1, 0, 1),
betaNB = runif(1, 0, 1),
z = zinit)}
Parameters to be monitored.
parameters.to.save <- c("phiB",
"phiNB",
"psiNBB",
"psiBNB",
"piB",
"pB",
"pNB",
"betaNB",
"betaB")
MCMC details.
n.iter <- 5000
n.burnin <- 2500
n.chains <- 2
Run nimble.
mcmc.multievent <- nimbleMCMC(code = multievent,
constants = my.constants,
data = my.data,
inits = initial.values,
monitors = parameters.to.save,
niter = n.iter,
nburnin = n.burnin,
nchains = n.chains)
defining model...
building model...
setting data and initial values...
running calculate on model (any error reports that follow may simply reflect missing values in model variables) ...
checking model sizes and dimensions...
checking model calculations...
model building finished.
compiling... this may take a minute. Use 'showCompilerOutput = TRUE' to see C++ compilation details.
compilation finished.
running chain 1...
|-------------|-------------|-------------|-------------|
|-------------------------------------------------------|
running chain 2...
|-------------|-------------|-------------|-------------|
|-------------------------------------------------------|
Let’s have a look to the results. Breeders were mostly assigned as uncertain, while non-breeders were positively assigned.
MCMCsummary(mcmc.multievent, round = 2)
mean sd 2.5% 50% 97.5% Rhat n.eff
betaB 0.19 0.01 0.16 0.19 0.22 1.01 213
betaNB 0.75 0.05 0.64 0.75 0.85 1.04 52
pB 0.57 0.03 0.51 0.57 0.62 1.02 219
pNB 0.60 0.03 0.53 0.60 0.66 1.03 172
phiB 0.81 0.02 0.78 0.81 0.84 1.02 389
phiNB 0.84 0.02 0.80 0.84 0.88 1.01 304
piB 0.71 0.03 0.65 0.71 0.75 1.02 166
psiBNB 0.22 0.02 0.18 0.22 0.28 1.01 230
psiNBB 0.24 0.05 0.15 0.24 0.34 1.09 86
Caterpillar plot of the parameter estimates.
MCMCplot(mcmc.multievent)
We now turn to the analysis of data from a study of the dynamics of conjunctivitis in the house finch (Carpodacus mexicanus Müller). A challenge in modeling disease dynamics involves the estimation of recovery and infection rates, which can be complicated when disease state is uncertain (seen at distance in the house finch case study). In the data we have 0 for a non-detection, 1 for a detection of a healthy individual, 2 for the detection of a sick individual and 3 for an individual for which we cannot say whether it is sane or ill.
y <- as.matrix(read.table("hofi_2000.txt"))
head(y)
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13
[1,] 0 0 0 0 0 0 0 0 3 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0 0 1
[3,] 0 0 0 0 0 0 3 0 0 0 0 0 1
[4,] 0 0 0 0 0 0 0 0 0 0 1 0 0
[5,] 0 0 0 0 0 0 3 3 1 0 0 0 1
[6,] 0 1 0 0 0 0 0 0 0 0 0 0 0
Write the model. The model is similar to the model of the previous section.
hmm.disease <- nimbleCode({
# priors
phiH ~ dunif(0, 1) # prior survival healthy
phiI ~ dunif(0, 1) # prior survival ill
psiIH ~ dunif(0, 1) # prior transition ill -> healthy
psiHI ~ dunif(0, 1) # prior transition healthy -> ill
pH ~ dunif(0, 1) # prior detection healthy
pI ~ dunif(0, 1) # prior detection ill
pi ~ dunif(0, 1) # prob init state healthy
betaH ~ dunif(0,1)
betaI ~ dunif(0,1)
# HMM ingredients
delta[1] <- pi # Pr(healthy t = 1) = pi
delta[2] <- 1 - pi # Pr(ill t = 1) = 1 - pi
delta[3] <- 0 # Pr(dead t = 1) = 0
gamma[1,1] <- phiH * (1 - psiHI) # Pr(H t -> H t+1)
gamma[1,2] <- phiH * psiHI # Pr(H t -> I t+1)
gamma[1,3] <- 1 - phiH # Pr(alive t -> dead t+1)
gamma[2,1] <- phiI * psiIH # Pr(I t -> H t+1)
gamma[2,2] <- phiI * (1 - psiIH) # Pr(I t -> I t+1)
gamma[2,3] <- 1 - phiI # Pr(alive t -> dead t+1)
gamma[3,1] <- 0 # Pr(dead t -> alive t+1)
gamma[3,2] <- 0 # Pr(dead t -> alive t+1)
gamma[3,3] <- 1 # Pr(dead t -> dead t+1)
omega[1,1] <- 1 - pH # Pr(H t -> non-detected t)
omega[1,2] <- pH * betaH # Pr(H t -> detected H t)
omega[1,3] <- 0 # Pr(H t -> detected I t)
omega[1,4] <- pH * (1 - betaH) # Pr(H t -> detected U t)
omega[2,1] <- 1 - pI # Pr(I t -> non-detected t)
omega[2,2] <- 0 # Pr(I t -> detected H t)
omega[2,3] <- pI * betaI # Pr(I t -> detected I t)
omega[2,4] <- pI * (1 - betaI) # Pr(I t -> detected U t)
omega[3,1] <- 1 # Pr(dead t -> non-detected t)
omega[3,2] <- 0 # Pr(dead t -> detected H t)
omega[3,3] <- 0 # Pr(dead t -> detected I t)
omega[3,4] <- 0 # Pr(dead t -> detected U t)
omega.init[1,1] <- 0 # Pr(H t = 1 -> non-detected t = 1)
omega.init[1,2] <- betaH # Pr(H t = 1 -> detected H t = 1)
omega.init[1,3] <- 0 # Pr(H t = 1 -> detected I t = 1)
omega.init[1,4] <- 1 - betaH # Pr(H t = 1 -> detected U t = 1)
omega.init[2,1] <- 0 # Pr(I t = 1 -> non-detected t = 1)
omega.init[2,2] <- 0 # Pr(I t = 1 -> detected H t = 1)
omega.init[2,3] <- betaI # Pr(I t = 1 -> detected I t = 1)
omega.init[2,4] <- 1 - betaI # Pr(I t = 1 -> detected U t = 1)
omega.init[3,1] <- 1 # Pr(dead t = 1 -> non-detected t = 1)
omega.init[3,2] <- 0 # Pr(dead t = 1 -> detected H t = 1)
omega.init[3,3] <- 0 # Pr(dead t = 1 -> detected I t = 1)
omega.init[3,4] <- 0 # Pr(dead t = 1 -> detected U t = 1)
# likelihood
for (i in 1:N){
z[i,first[i]] ~ dcat(delta[1:3])
y[i,first[i]] ~ dcat(omega.init[z[i,first[i]], 1:4])
for (j in (first[i]+1):K){
z[i,j] ~ dcat(gamma[z[i,j-1], 1:3])
y[i,j] ~ dcat(omega[z[i,j], 1:4])
}
}
})
Get the date of first capture.
first <- apply(y, 1, function(x) min(which(x !=0)))
Constants in a list.
my.constants <- list(N = nrow(y), K = ncol(y), first = first)
Data in a list.
my.data <- list(y = y + 1)
Initial values.
zinit <- y
zinit[zinit==3] <- sample(c(1,2), sum(zinit==3), replace = TRUE)
for (i in 1:nrow(y)) {
for (j in 1:ncol(y)) {
if (j > first[i] & y[i,j]==0) {zinit[i,j] <- which(rmultinom(1, 1, c(1/2,1/2))==1)}
if (j < first[i]) {zinit[i,j] <- 0}
}
}
zinit <- as.matrix(zinit)
initial.values <- function() list(phiH = runif(1, 0, 1),
phiI = runif(1, 0, 1),
pH = runif(1, 0, 1),
pI = runif(1, 0, 1),
pi = runif(1, 0, 1),
betaH = runif(1, 0, 1),
betaI = runif(1, 0, 1),
psiHI = runif(1, 0, 1),
psiIH = runif(1, 0, 1),
z = zinit)
Parameters to be monitored.
parameters.to.save <- c("phiH",
"phiI",
"pH",
"pI",
"pi",
"betaH",
"betaI",
"psiHI",
"psiIH")
MCMC details.
n.iter <- 40000
n.burnin <- 25000
n.chains <- 2
Run nimble.
out <- nimbleMCMC(code = hmm.disease,
constants = my.constants,
data = my.data,
inits = initial.values,
monitors = parameters.to.save,
niter = n.iter,
nburnin = n.burnin,
nchains = n.chains)
save(out, file = "house_finch.RData")
Inspect the results.
MCMCsummary(out, round = 2)
mean sd 2.5% 50% 97.5% Rhat n.eff
betaH 0.99 0.01 0.97 0.99 1.00 1.00 1301
betaI 0.05 0.01 0.03 0.05 0.08 1.00 6458
pH 0.75 0.15 0.45 0.77 0.98 1.27 156
pI 0.25 0.02 0.21 0.25 0.29 1.11 452
phiH 0.69 0.04 0.62 0.69 0.77 1.03 454
phiI 0.99 0.01 0.96 0.99 1.00 1.06 525
pi 0.96 0.01 0.93 0.96 0.98 1.00 3769
psiHI 0.80 0.06 0.66 0.81 0.88 1.20 298
psiIH 0.17 0.04 0.12 0.16 0.27 1.26 196
Let’s have a look to the trace plots, and the estimated posterior distribution of the detection probabilities in healthy and ill states.
MCMCtrace(out, pdf = FALSE, params = c("pH", "pI"))
What about the assignment probabilities? The non‐infected birds were positively identified, while most infected encounters were classified as unknown.
MCMCtrace(out, pdf = FALSE, params = c("betaH", "betaI"))
And survival?
MCMCtrace(out, pdf = FALSE, params = c("phiH", "phiI"))
What about the infection and recovery rates?
MCMCtrace(out, pdf = FALSE, params = c("psiHI", "psiIH"))
Last, the proportion of new marked individuals that are in the healthy state.
MCMCtrace(out, pdf = FALSE, params = c("pi"))
For our last example, we will see how to incorporate individual heterogeneity in capture-recapture models using finite mixtures. We will use a case study on gray wolves. The data were kindly provided by the French Office for Biodiversity.
y <- as.matrix(read.table("wolf.txt"))
# 1 seen
# 0 not seen
Let’s write the model. Only thing to notice is that we have two alive states to represent two classes of individuals. We choose to have the detection probabilities dependent on the states and constant survival.
hmm.phipmix <- nimbleCode({
# priors
phi ~ dunif(0, 1) # prior survival
p1 ~ dunif(0, 1) # prior detection
p2 ~ dunif(0, 1) # prior detection
pi ~ dunif(0, 1) # prob init state 1
# HMM ingredients
gamma[1,1] <- phi # Pr(alive 1 t -> alive 1 t+1)
gamma[1,2] <- 0 # Pr(alive 1 t -> alive 2 t+1) // no transition
gamma[1,3] <- 1 - phi # Pr(alive t -> dead t+1)
gamma[2,1] <- 0 # Pr(alive 1 t -> alive 1 t+1) // no transition
gamma[2,2] <- phi # Pr(alive 1 t -> alive 2 t+1)
gamma[2,3] <- 1 - phi # Pr(alive t -> dead t+1)
gamma[3,1] <- 0 # Pr(dead t -> alive 1 t+1)
gamma[3,2] <- 0 # Pr(dead t -> alive 1 t+1)
gamma[3,3] <- 1 # Pr(dead t -> dead t+1)
delta[1] <- pi # Pr(alive t = 1) = pi
delta[2] <- 1 - pi # Pr(alive t = 1) = 1 - pi
delta[3] <- 0 # Pr(dead t = 1) = 0
omega[1,1] <- 1 - p1 # Pr(alive state 1 t -> non-detected t)
omega[1,2] <- p1 # Pr(alive state 1 t -> detected t)
omega[2,1] <- 1 - p2 # Pr(alive state 2 t -> non-detected t)
omega[2,2] <- p2 # Pr(alive state 2 t -> detected t)
omega[3,1] <- 1 # Pr(dead t -> non-detected t)
omega[3,2] <- 0 # Pr(dead t -> detected t)
omega.init[1,1] <- 0 # Pr(alive state 1 t -> non-detected t)
omega.init[1,2] <- 1 # Pr(alive state 1 t -> detected t)
omega.init[2,1] <- 0 # Pr(alive state 2 t -> non-detected t)
omega.init[2,2] <- 1 # Pr(alive state 2 t -> detected t)
omega.init[3,1] <- 1 # Pr(dead t -> non-detected t)
omega.init[3,2] <- 0 # Pr(dead t -> detected t)
# likelihood
for (i in 1:N){
z[i,first[i]] ~ dcat(delta[1:3])
y[i,first[i]] ~ dcat(omega.init[z[i,first[i]], 1:2])
for (j in (first[i]+1):K){
z[i,j] ~ dcat(gamma[z[i,j-1], 1:3])
y[i,j] ~ dcat(omega[z[i,j], 1:2])
}
}
})
Get the date of first capture.
first <- apply(y, 1, function(x) min(which(x != 0)))
Constants in a list.
my.constants <- list(N = nrow(y),
K = ncol(y),
first = first)
Data in a list.
my.data <- list(y = y + 1)
Initial values.
zinit <- y
for (i in 1:nrow(y)) {
for (j in first[i]:ncol(y)) {
if (j == first[i]) zinit[i,j] <- which(rmultinom(1, 1, c(1/2,1/2))==1) # pick alive state
if (j > first[i]) zinit[i,j] <- zinit[i,j-1] # because no transition, state remains the same
}
}
zinit <- as.matrix(zinit)
initial.values <- function() list(phi = runif(1,0,1),
p1 = runif(1,0,1),
p2 = runif(1,0,1),
pi = runif(1,0,1),
z = zinit)
Parameters to be monitored.
parameters.to.save <- c("phi", "p1", "p2", "pi")
MCMC details.
n.iter <- 10000
n.burnin <- 5000
n.chains <- 2
Run nimble.
mcmc.phipmix <- nimbleMCMC(code = hmm.phipmix,
constants = my.constants,
data = my.data,
inits = initial.values,
monitors = parameters.to.save,
niter = n.iter,
nburnin = n.burnin,
nchains = n.chains)
defining model...
building model...
setting data and initial values...
running calculate on model (any error reports that follow may simply reflect missing values in model variables) ...
checking model sizes and dimensions...
checking model calculations...
model building finished.
compiling... this may take a minute. Use 'showCompilerOutput = TRUE' to see C++ compilation details.
compilation finished.
running chain 1...
|-------------|-------------|-------------|-------------|
|-------------------------------------------------------|
running chain 2...
|-------------|-------------|-------------|-------------|
|-------------------------------------------------------|
Numerical summaries.
MCMCsummary(mcmc.phipmix, round = 2)
mean sd 2.5% 50% 97.5% Rhat n.eff
p1 0.46 0.08 0.30 0.46 0.63 1.01 244
p2 0.34 0.13 0.14 0.32 0.63 1.01 112
phi 0.82 0.06 0.71 0.82 0.93 1.02 202
pi 0.50 0.13 0.27 0.49 0.76 1.01 122
Let’s have a look to the trace and posterior distribution of the proportion of individuals in each class, as well as the detection probabilities.
MCMCtrace(mcmc.phipmix, pdf = FALSE, params = c("pi", "p1", "p2"))
Last, survival.
MCMCtrace(mcmc.phipmix, pdf = FALSE, params = c("phi"))