I need to have in one place some code to simulate data and fit spatial capture-recapture models, for both closed and open populations.
The reference book for spatial capture-capture (SCR) models is Spatial Capture-Recapture by Royle, Chandler, Sollmann and Gardner. Andy Royle has nice slides for an introduction to SCR models, and a video.
We will go the Bayesian way. Richard Chandler provides nice slides for Bayesian inference in closed population SCR models, and open population SCR models.
To fit SCR models, we will use Nimble
developed by Perry de Valpine and colleagues. We might implement a few tricks by Daniel Turek to improve and reach convergence faster.
Load Nimble
.
library(nimble)
We use some code shared by Jose Jimenez on the SCR Google group and adapted by Perry de Valpine. A formal description of the model we consider is given in the references given above.
The traps.
tr <- seq(15, 85, length = 10)
traps <- data.frame(X = rep(tr, each = length(tr)),
Y = rep(tr, times = length(tr))) # 100 coord. traps
Visualize.
viz_traps <- traps %>%
ggplot(aes(x = X, y = Y)) +
geom_point(pch = 3) +
xlim(0, 100) +
ylim(0, 100)
viz_traps
Generate population.
set.seed(10)
xlim <- c(0, 100)
ylim <- c(0, 100) # area 100 * 100 = 1e4
A <- (xlim[2] - xlim[1]) * (ylim[2] - ylim[1])/10000
A
## [1] 1
mu <- 50 # density
N <- rpois(1, mu*A) # generate population
N
## [1] 50
Generate activity centers.
s <- data.frame(s.x = runif(N, xlim[1], xlim[2]),
s.y = runif(N, ylim[1], ylim[2]))
Visualize.
viz_traps_ac <- viz_traps +
geom_point(data = s, aes(x = s.x, y = s.y), pch = 16, color = "red")
viz_traps_ac
Generate detections.
sigma <- 5
lambda0 <- 0.4
J <- nrow(traps) # nb of traps
K <- 5 # nb capture occasions
yy <- array(NA, c(N, J, K))
for(j in 1:J) {
dist <- sqrt((traps$X[j] - s$s.x)^2 + (traps$Y[j] - s$s.y)^2)
lambda <- lambda0 * exp(-dist^2 / (2 * sigma^2))
for(k in 1:K) {
yy[,j,k] <- rpois(N, lambda)
}
}
n <- apply(yy, c(2,3), sum)
Plot detections.
tot <- apply(n, 1, sum)
dat <- data.frame(traps, tot = tot)
viz_traps_ac +
geom_point(data = dat, aes(x = X, y = Y, size = tot), alpha = 0.3) +
scale_size(range = c(0, 20)) +
labs(x = "",
y = "",
size = "# detections")
Define the model.
code <- nimbleCode({
sigma ~ dunif(0, 10)
lam0 ~ dunif(0, 5)
psi ~ dbeta(1, 1)
for(i in 1:M) {
z[i] ~ dbern(psi)
s[i,1] ~ dunif(xlim[1], xlim[2])
s[i,2] ~ dunif(ylim[1], ylim[2])
dist[i,1:J] <- (s[i,1] - X[1:J,1])^2 + (s[i,2] - X[1:J,2])^2
lam[i,1:J] <- exp(-dist[i,1:J] / (2 * sigma^2)) * z[i]
}
for(j in 1:J){
bigLambda[j] <- lam0 * sum(lam[1:M,j])
for(k in 1:K) {
n[j,k] ~ dpois(bigLambda[j])
}
}
N <- sum(z[1:M])
})
Define constants, data and inits.
M <- 200
constants <- list(M = M,
K = K,
J = J)
n1 <- apply(n, 1, sum)
data <- list(n = n,
X = traps,
xlim = xlim,
ylim = ylim)
s <- cbind(runif(M, xlim[1], xlim[2]),
runif(M, ylim[1], ylim[2]))
z <- rep(1, M)
inits <- list(sigma = 0.5,
lam0 = 0.1,
s = s,
z = z,
psi = 0.5)
Build R
model (not compiled yet).
Rmodel <- nimbleModel(code = code,
constants = constants,
data = data,
inits = inits)
Check whether the model is fully initialized. If you failed at providing initial values for some parameters (e.g. \(\psi\)), you’ll get NAs.
Rmodel$calculate()
## [1] -8089.009
Now compile the model in C++
.
Cmodel <- compileNimble(Rmodel)
The R
and C
models are exactly the same versions of the model.
calculate(Cmodel)
## [1] -8089.009
You can simulate from prior.
Cmodel$simulate('lam0')
calculate(Cmodel)
## [1] -7669.788
Specify MCMC.
conf <- configureMCMC(Rmodel,
monitors = c("N", "lam0", "psi", "sigma"))
## ===== Monitors =====
## thin = 1: N, lam0, psi, sigma
## ===== Samplers =====
## RW sampler (402)
## - sigma
## - lam0
## - s[] (400 elements)
## conjugate sampler (1)
## - psi
## binary sampler (200)
## - z[] (200 elements)
Build an executable MCMC.
Rmcmc <- buildMCMC(conf)
Compile in C++
.
Cmcmc <- compileNimble(Rmcmc, project = Cmodel)
Run compiled model (do not run the uncompiled model with runMCMC(Rmcmc,100)
).
samples <- runMCMC(Cmcmc,100)
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
Explore.
dim(samples)
## [1] 100 4
colnames(samples)
## [1] "N" "lam0" "psi" "sigma"
samplesSummary(samples)
## Mean Median St.Dev. 95%CI_low 95%CI_upp
## N 126.4300000 123.5000000 19.7638866 95.9500000 185.3500000
## lam0 1.0657507 1.0453553 0.1245833 0.7794961 1.2568560
## psi 0.6426589 0.6284448 0.1080914 0.4638732 0.9672855
## sigma 2.2640864 2.2800982 0.1605662 1.7889305 2.4354455
Run compiled model. Takes 10-15 minutes on my machine.
samplesList <- runMCMC(Cmcmc,
niter = 10000,
nburnin = 5000,
nchains = 2)
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
samples <- rbind(samplesList[[1]],
samplesList[[2]])
str(samples)
## num [1:10000, 1:4] 31 29 27 23 22 22 19 24 28 29 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:4] "N" "lam0" "psi" "sigma"
Calculate ESS effective sample size.
library(coda)
apply(samples, 2, effectiveSize)
## N lam0 psi sigma
## 61.64045 143.41820 58.91601 44.60710
Produce trace an density plots.
library(basicMCMCplots)
chainsPlot(samplesList,
var = c("N", "sigma", "lam0"))
Display summary stats. Compare to the values used to simulate data, in particuler \(N = 50\), \(\sigma = 5\) and \(\lambda_0 = 0.4\).
summary(samples)
## N lam0 psi sigma
## Min. : 9.00 Min. :0.2070 Min. :0.02896 Min. :2.773
## 1st Qu.: 38.00 1st Qu.:0.4230 1st Qu.:0.19116 1st Qu.:3.965
## Median : 55.00 Median :0.5031 Median :0.27948 Median :4.504
## Mean : 61.08 Mean :0.5151 Mean :0.30760 Mean :4.619
## 3rd Qu.: 80.00 3rd Qu.:0.5903 3rd Qu.:0.40230 3rd Qu.:5.076
## Max. :174.00 Max. :1.3284 Max. :0.87699 Max. :9.349
We use some code shared by Beth Gardner and colleagues in their paper State space and movement specification in open population spatial capture–recapture models. A formal description of the model we consider is given in the paper. We consider a model with constant activity centers.
Function to calculate the distance between multiple points.
e2dist <- function(x, y){
i <- sort(rep(1:nrow(y), nrow(x)))
dvec <- sqrt((x[, 1] - y[i, 1])^2 + (x[, 2] - y[i, 2])^2)
matrix(dvec, nrow = nrow(x), ncol = nrow(y), byrow = F)
}
simJS.fn <- function(N, phi0, lam0, M, T, grid, xl, xu, yl, yu, sigma, K){ # M is total ever alive
ntraps <- dim(grid)[1]
nreps <- K
lam0 <- rep(lam0, T)
phi<- rep(phi0, T)
pmat <- lam <- list()
gamma <- NULL
gamma[1] <- N/M
sx <- runif(M, xl, xu)
sy <- runif(M, yl, yu)
z <- r <- al <- matrix(0, nrow = M, ncol = T)
r[,1] <- rbinom(M, 1, gamma[1])
z[,1] <- r[,1]
for (t in 2:T){
# survival
surv <- rbinom(M, 1, z[,t-1] * phi[t])
# recruitment
al[,t] <- apply(matrix(z[,1:(t-1)], nrow = M, byrow = FALSE), 1, sum) > 0
idx <- 1 - as.numeric(al[,t])
gamma[t] <- (N - sum(surv)) / sum(idx)
if (gamma[t] < 0) gamma[t] <- 0
r[,t] <- rbinom(M, idx, gamma[t])
z[,t] <- surv + r[,t]
}
S <- cbind(sx, sy)
dmat <- e2dist(S, grid)
psi<- exp(-(1 / (2 * sigma * sigma)) * dmat * dmat)
for (t in 1:T){
lam[[t]] <- lam0[t] * psi
pmat[[t]] <- 1 - exp(-lam[[t]])
}
y <- array(0, dim = c(M, ntraps, T))
for (t in 1:T){
yfull <- array(0, dim = c(M, ntraps, K))
for (i in 1:M){
for (k in 1:K){
yfull[i, 1:ntraps, k] <- rbinom(ntraps, 1, pmat[[t]][i,] * z[i,t])
}
}
y[, 1:ntraps, t] <- apply(yfull, 1:2, sum)
}
ycapt <- y[which(rowSums(y[,,])>0), , ]
list(y = ycapt,
z = z,
r = r,
gamma = gamma,
N = apply(z, 2, sum),
R = apply(r, 2, sum),
SX = sx,
SY = sy)
}
Set up the basic trap array in a 7x7 grid.
gridx <- seq(-3, 3, 1)
grid <- as.matrix(expand.grid(gridx, gridx))
J <- dim(grid)[1]
Set the upper and lower x and y coordinates for the state space.
xl <- -5
yl <- -5
xu <- 5
yu <- 5
Parameters specification.
T <- K <- 5 # number of years / seasons
sigma <- 0.5
lam0 <- 0.5
N <- 40
M <- 150
phi0 <- 0.75
tau <- 0.5
Mc <- 150 # upper M for data augmentation
Simulate data.
simdat <- simJS.fn(N = N,
phi0 = phi0,
lam0 = lam0,
M = Mc,
T = T,
grid = grid,
xl = xl,
xu = xu,
yl = yl,
yu = yu,
sigma = sigma,
K = K)
str(simdat)
## List of 8
## $ y : num [1:45, 1:49, 1:5] 0 0 0 0 0 0 0 0 0 0 ...
## $ z : num [1:150, 1:5] 0 0 0 0 1 0 0 0 0 1 ...
## $ r : num [1:150, 1:5] 0 0 0 0 1 0 0 0 0 1 ...
## $ gamma: num [1:5] 0.2667 0.1071 0.125 0.0645 0.1047
## $ N : num [1:5] 38 36 38 41 39
## $ R : num [1:5] 38 8 11 7 8
## $ SX : num [1:150] -3.49 -2.23 -3.88 -2.26 2.88 ...
## $ SY : num [1:150] 1.67 -1.57 -4.9 -3.71 1.65 ...
Visualise traps.
viz_traps <- grid %>%
as_tibble() %>%
ggplot(aes(x = Var1, y = Var2)) +
geom_point(pch = 3) +
xlim(-5, 5) +
ylim(-5, 5)
viz_traps
Add activity centers.
viz_traps_ac <- viz_traps +
geom_point(data = data.frame(s.x = simdat$SX, s.y = simdat$SY),
aes(x = s.x, y = s.y), pch = 16, color = "red")
viz_traps_ac
Define the model.
code <- nimbleCode({
# set priors for sigma2, lam0 (encounter rate), gamma, and phi
sigma ~ dunif(0, 10)
sigma2 <- sigma * sigma
lam0 ~ dunif(0, 5)
phi ~ dunif(0, 1) # survival
for(t in 1:T){ # T = 10 years
gamma[t] ~ dunif(0, 1) # recruitment
}
for (i in 1:M){ # loop over M individuals (includes the augmented data)
ncaps[i] <- sum(z[i,1:T])
alive[i] <- 1 - equals(ncaps[i],0)
z[i,1] ~ dbin(gamma[1], 1)
SX[i] ~ dunif(xl, xu) # set priors for the X and Y coordinates of each individual
SY[i] ~ dunif(yl, yu)
for(j in 1:J) { # loop over all traps of that year
D2[i,j] <- pow(SX[i] - trapmat[j,1], 2) + pow(SY[i] - trapmat[j,2], 2)
g[i,j] <- lam0 * exp(-D2[i,j] / (2 * sigma2))
pmean[i,j] <- 1 - exp(-g[i,j])
for (t in 1:T){
tmp[t,i,j] <- z[i,t] * pmean[i,j]
y[t,i,j] ~ dbin(tmp[t,i,j], K) # K is the number of days a trap was operational
}
}
a[i,1] <- (1 - z[i,1])
a[i,2] <- (1-z[i,1]) * (1-z[i,2])
a[i,3] <- (1-z[i,1]) * (1-z[i,2]) * (1-z[i,3])
a[i,4] <- (1-z[i,1]) * (1-z[i,2]) * (1-z[i,3]) * (1-z[i,4])
gammatmp[i,2] <- gamma[2] * a[i,1]
mu[i,2] <- (phi * z[i,1]) + gammatmp[i,2]
z[i,2] ~ dbern(mu[i,2])
gammatmp[i,3] <- gamma[3] * a[i,2]
mu[i,3] <- (phi * z[i,2]) + gammatmp[i,3]
z[i,3] ~ dbern(mu[i,3])
gammatmp[i,4] <- gamma[4] * a[i,3]
mu[i,4] <- (phi * z[i,3]) + gammatmp[i,4]
z[i,4] ~ dbern(mu[i,4])
gammatmp[i,5] <- gamma[5] * a[i,4]
mu[i,5] <- (phi * z[i,4]) + gammatmp[i,5]
z[i,5] ~ dbern(mu[i,5])
R[i,1] <- z[i,1]
R[i,2] <- (1 - z[i,1]) * z[i,2]
R[i,3] <- (1 - z[i,1]) * (1 - z[i,2]) * z[i,3]
R[i,4] <- (1 - z[i,1]) * (1 - z[i,2]) * (1 - z[i,3]) * z[i,4]
R[i,5] <- (1 - z[i,1]) * (1 - z[i,2]) * (1 - z[i,3]) * (1 - z[i,4]) * z[i,5]
}
N1 <- sum(z[1:M,1])
N2 <- sum(z[1:M,2])
N3 <- sum(z[1:M,3])
N4 <- sum(z[1:M,4])
N5 <- sum(z[1:M,5])
Nalive <- sum(alive[1:M])
R1 <- sum(R[1:M,1])
R2 <- sum(R[1:M,2])
R3 <- sum(R[1:M,3])
R4 <- sum(R[1:M,4])
R5 <- sum(R[1:M,5])
})
Function to create initial coordinates of activity centers for each individual.
Sin <- function(T = T, M = M, xl = xl, xu = xu, yl = yl, yu = yu, ntot = ntot){
SX <- SY <- matrix(NA, nrow = M, ncol = 1)
for(i in 1:M){
for (t in 1:T){
SX[i] <- runif(1, xl, xu)
SY[i] <- runif(1, yl, yu)
traps <- which(dataug[i,,t] > 0)
if(length(traps) > 0){
SX[i] <- mean(grid[traps,1])
SY[i] <- mean(grid[traps,2])
}
}
}
return(list(SX, SY))
}
Data, inits and parameters to monitored.
ntot <- dim(simdat$y)[1] # total ever observed in this simulated dataset
# add Mc-ntot zero encounter histories (data augmentation)
dataug <- array(0, dim = c(Mc, J, T))
dataug[1:ntot, , ] <- simdat$y
dataugTMJ <- aperm(dataug, c(3,1,2))
# create intial values for z state
zinit <- matrix(0,nrow = Mc, ncol = T)
zinit[1:ntot,] <- 1
constants <- list(M = Mc,
K = K,
J = J,
T = T)
data <- list(y = dataugTMJ,
xl = xl,
xu = xu,
yl = yl,
yu = yu,
trapmat = as.matrix(grid))
inits <- list(phi = runif(1),
gamma = runif(T, 0, 1),
sigma = runif(1, 1, 2),
z = zinit,
lam0 = runif(1),
SY = as.vector(Sin(T = T, M = Mc, xl = xl, xu = xu, yl = yl, yu = yu, ntot = ntot)[[2]]),
SX = as.vector(Sin(T = T, M = Mc, xl = xl, xu = xu, yl = yl, yu = yu, ntot = ntot)[[1]]))
Build R
model (not compiled yet).
Rmodel <- nimbleModel(code = code,
constants = constants,
data = data,
inits = inits)
Check whether the model is fully initialized. If you failed at providing initial values for some parameters, you’ll get NAs.
Rmodel$calculate()
## [1] -4250.743
Now compile the model in C++
.
Cmodel <- compileNimble(Rmodel)
The R
and C
models are exactly the same versions of the model.
calculate(Cmodel)
## [1] -4250.743
Specify MCMC.
conf <- configureMCMC(Rmodel,
monitors = c("N1", "N2", "N3", "N4", "N5",
"R1", "R2", "R3", "R4", "R5",
"lam0", "phi", "sigma", "gamma"))
## ===== Monitors =====
## thin = 1: N1, N2, N3, N4, N5, R1, R2, R3, R4, R5, lam0, phi, sigma, gamma
## ===== Samplers =====
## RW sampler (308)
## - sigma
## - lam0
## - phi
## - gamma[] (5 elements)
## - SX[] (150 elements)
## - SY[] (150 elements)
## binary sampler (750)
## - z[] (750 elements)
Build an executable MCMC.
Rmcmc <- buildMCMC(conf)
Compile in C++
.
Cmcmc <- compileNimble(Rmcmc, project = Cmodel)
Run compiled model (do not run the uncompiled model with runMCMC(Rmcmc,100)
).
samples <- runMCMC(Cmcmc,100)
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
Explore.
dim(samples)
## [1] 100 18
colnames(samples)
## [1] "N1" "N2" "N3" "N4" "N5" "R1"
## [7] "R2" "R3" "R4" "R5" "gamma[1]" "gamma[2]"
## [13] "gamma[3]" "gamma[4]" "gamma[5]" "lam0" "phi" "sigma"
samplesSummary(samples)
## Mean Median St.Dev. 95%CI_low 95%CI_upp
## N1 29.59000000 29.50000000 3.84312320 24.000000000 38.0000000
## N2 28.69000000 28.00000000 3.87349189 23.000000000 37.5250000
## N3 28.29000000 28.00000000 4.27181271 24.000000000 37.5250000
## N4 25.93000000 26.00000000 3.39713189 21.000000000 33.0000000
## N5 29.12000000 28.00000000 5.98698926 23.000000000 52.5250000
## R1 29.59000000 29.50000000 3.84312320 24.000000000 38.0000000
## R2 7.16000000 7.00000000 2.09241053 4.000000000 12.5250000
## R3 6.57000000 6.00000000 2.06096476 4.000000000 12.1500000
## R4 2.97000000 3.00000000 1.33677586 1.000000000 6.0000000
## R5 8.69000000 7.00000000 5.01854138 4.000000000 26.5250000
## gamma[1] 0.16919976 0.17661999 0.06453979 0.113927531 0.4013108
## gamma[2] 0.11063352 0.08167757 0.07486256 0.081677572 0.3033200
## gamma[3] 0.08553335 0.06135237 0.10735876 0.050281573 0.4129394
## gamma[4] 0.06957256 0.04133114 0.07849900 0.008983692 0.2510197
## gamma[5] 0.18088091 0.04534513 0.24459565 0.045345125 0.9406812
## lam0 0.21497361 0.18036729 0.07355855 0.180367286 0.4878812
## phi 0.78080431 0.75782749 0.05923465 0.652677801 0.9385426
## sigma 1.03025412 1.06901044 0.29021636 0.547231590 1.4427899
Run compiled model. Takes 10-15 minutes on my machine.
samplesList <- runMCMC(Cmcmc,
niter = 10000,
nburnin = 5000,
nchains = 2)
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
## |-------------|-------------|-------------|-------------|
## |-------------------------------------------------------|
samples <- rbind(samplesList[[1]],
samplesList[[2]])
str(samples)
## num [1:10000, 1:18] 48 41 48 45 51 48 45 50 49 42 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:18] "N1" "N2" "N3" "N4" ...
Calculate ESS effective sample size.
library(coda)
apply(samples, 2, effectiveSize)
## N1 N2 N3 N4 N5 R1 R2 R3
## 717.2841 482.4110 411.3577 499.8184 621.4969 717.2841 1098.0484 944.7996
## R4 R5 gamma[1] gamma[2] gamma[3] gamma[4] gamma[5] lam0
## 1386.0613 713.9504 787.0948 811.2436 769.2386 855.3241 569.3414 646.4931
## phi sigma
## 805.7276 451.5871
Produce trace an density plots.
library(basicMCMCplots)
chainsPlot(samplesList,
var = c("N1", "N2", "N3", "N4", "N5"))
chainsPlot(samplesList,
var = c("R1", "R2", "R3", "R4", "R5"))
chainsPlot(samplesList,
var = c("sigma", "phi", "lam0"))
Display summary stats. Compare to the values used to simulate data, in particuler \(N = 40\), \(\sigma = 0.5\), \(\phi = 0.75\) and \(\lambda_0 = 0.5\).
samplesSummary(samples)
## Mean Median St.Dev. 95%CI_low 95%CI_upp
## N1 43.63980000 43.00000000 5.52813820 34.00000000 55.0000000
## N2 40.39300000 40.00000000 4.97127975 31.00000000 51.0000000
## N3 40.16660000 40.00000000 4.87719419 32.00000000 51.0000000
## N4 36.76950000 36.00000000 4.98448134 28.00000000 48.0000000
## N5 37.75720000 37.00000000 5.23774680 29.00000000 49.0000000
## R1 43.63980000 43.00000000 5.52813820 34.00000000 55.0000000
## R2 8.10210000 8.00000000 3.30396235 3.00000000 16.0000000
## R3 9.21150000 9.00000000 3.19802916 4.00000000 16.0000000
## R4 4.38620000 4.00000000 2.19516091 1.00000000 10.0000000
## R5 8.78010000 8.00000000 3.45987587 3.00000000 17.0000000
## gamma[1] 0.29496053 0.29449161 0.05206358 0.19969686 0.3995589
## gamma[2] 0.08476703 0.07849382 0.04036266 0.02159974 0.1810982
## gamma[3] 0.10161386 0.09572947 0.04358690 0.03020756 0.2032622
## gamma[4] 0.06000837 0.05362958 0.03497782 0.01181903 0.1451888
## gamma[5] 0.11406401 0.10689076 0.05480415 0.02945924 0.2416850
## lam0 0.46816190 0.46611279 0.04109518 0.39465776 0.5543210
## phi 0.76918276 0.77226987 0.04784245 0.66787149 0.8542278
## sigma 0.50507284 0.50455148 0.01744205 0.47277816 0.5419839
Plot estimated pop size.
res <- samplesSummary(samples)
res <- cbind(param = rownames(res), res)
res %>%
as_tibble() %>%
janitor::clean_names() %>%
mutate(mean = round(as.numeric(mean)),
low = round(as.numeric(x95_percent_ci_low)),
up = round(as.numeric(x95_percent_ci_upp))) %>%
filter(str_detect(param, "N")) %>%
mutate(year = row_number()) %>%
ggplot() +
aes(x = year, y = mean) +
geom_point() +
geom_errorbar(aes(ymin = low, ymax = up), width=.1)
Add formal description of the models.
Add code to plot map of estimated density and activity centers.
Improve convergence.
Provide Jags implementation (?).
Provide secr and oSCR implementation for max likelihood counterpart (?).