Load some packages:
library(tidyverse) # manipulate and visualize data
theme_set(theme_light(base_size = 16)) # set theme for visualisation and font size
library(unmarked) # occupancy modelling
library(sf) # spatial dataviz
We use data from Mapping and explaining wolf recolonization in France using dynamic occupancy models and opportunistic data by Louvrier et al. in Ecography available at https://onlinelibrary.wiley.com/doi/abs/10.1111/ecog.02874. Detections and non-detections data were collected between 1994 and 2017 (23 years or primary occasions). Occasions are months December, January, February and March (4 visits or secondary occasions). A 100km\(^2\) grid was overlaid over France, and cells were considered as sites. See paper for more details.
First detection/non-detection data:
<- readRDS("data/wolf_louvrier.rds")
y dim(y)
## [1] 3450 92
There are > 3000 cells in rows. In columns we have 23 years times 4 visits, arranged in visits within years: Visit 1 to visit 4 for year 1994 in the first four columns, then visit 1 to visit 4 for year 1995, and so on.
Now we read in some covariates. First the survey effort which is (more or less) the number of observers per cell:
<- readRDS("data/effort_louvrier.rds") effort
The effort covariate is a yearly site-level covariate, with values that do not vary between visits within a year, but that do vary between years:
dim(effort)
## [1] 3450 23
We have some environmental covariates as well, all site-level covariates:
<- readRDS("data/envcov_louvrier.rds") envcov
The 7 covariates are specifically in columns: farmland cover, altitude, distance to closest barrier, road density, forest cover, high altitude, rock cover:
colnames(envcov)
## [1] "p_agri" "p_alti" "p_dbarr" "p_road" "p_forest" "p_halt" "p_rock"
Last, we have two additional yearly site-level covariates which capture dispersal abilities of wolves, namely the number of occupied neighboring cells at short (10km) and long (150km) distances:
# short distance
<- readRDS("data/shortd_spatialautocorr.rds")
SDAC # long distance
<- readRDS("data/longd_spatialautocorr.rds") LDAC
There are other yearly site-level covariates we’d like to consider. First a year effect:
<- matrix(rep(c('01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23'),nrow(y)),
year nrow = nrow(y),
ncol = 23,
byrow = TRUE)
head(year)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## [1,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14"
## [2,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14"
## [3,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14"
## [4,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14"
## [5,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14"
## [6,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14"
## [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23]
## [1,] "15" "16" "17" "18" "19" "20" "21" "22" "23"
## [2,] "15" "16" "17" "18" "19" "20" "21" "22" "23"
## [3,] "15" "16" "17" "18" "19" "20" "21" "22" "23"
## [4,] "15" "16" "17" "18" "19" "20" "21" "22" "23"
## [5,] "15" "16" "17" "18" "19" "20" "21" "22" "23"
## [6,] "15" "16" "17" "18" "19" "20" "21" "22" "23"
And a linear trend over time:
<- matrix(rep(1:23,nrow(y)),
trendyear nrow=nrow(y),
ncol=23,
byrow=TRUE)
head(trendyear)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## [1,] 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## [2,] 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## [3,] 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## [4,] 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## [5,] 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## [6,] 1 2 3 4 5 6 7 8 9 10 11 12 13 14
## [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23]
## [1,] 15 16 17 18 19 20 21 22 23
## [2,] 15 16 17 18 19 20 21 22 23
## [3,] 15 16 17 18 19 20 21 22 23
## [4,] 15 16 17 18 19 20 21 22 23
## [5,] 15 16 17 18 19 20 21 22 23
## [6,] 15 16 17 18 19 20 21 22 23
These two covariates differ in the way we treat the year effect, either as a factor or a numeric variables.
Last, we’ll need a covariate to assess a visit (or a month) effect, that varies by sites, visits and years, usually referred to as ‘survey-covariate’ or ‘observation covariate’:
<- array(0,c(3450,4,23))
occ for (i in 1:3450){
for (j in 1:23){
1:4,j] <- c('1','2','3','4') # dec, jan, feb, mar
occ[i,if (effort[i,j] == 0) occ[i,1:4,j] <- NA
}
}<- occ[,,1]
occbis for (i in 2:23){
<- cbind(occbis,occ[,,i])
occbis
}tail(occbis) # 3450 x 92 ou 92 est 4 occ x 23 ans
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [3445,] NA NA NA NA NA NA NA NA NA NA NA NA NA
## [3446,] NA NA NA NA NA NA NA NA NA NA NA NA NA
## [3447,] NA NA NA NA NA NA NA NA NA NA NA NA NA
## [3448,] NA NA NA NA NA NA NA NA NA NA NA NA NA
## [3449,] NA NA NA NA NA NA NA NA NA NA NA NA NA
## [3450,] NA NA NA NA NA NA NA NA NA NA NA NA NA
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
## [3445,] NA NA NA NA NA NA NA NA NA NA NA NA
## [3446,] NA NA NA NA NA NA NA NA NA NA NA NA
## [3447,] NA NA NA NA NA NA NA NA NA NA NA NA
## [3448,] NA NA NA NA NA NA NA NA NA NA NA NA
## [3449,] NA NA NA NA NA NA NA NA NA NA NA NA
## [3450,] NA NA NA NA NA NA NA NA NA NA NA NA
## [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37]
## [3445,] NA NA NA NA NA NA NA NA NA NA NA NA
## [3446,] NA NA NA NA NA NA NA NA NA NA NA NA
## [3447,] NA NA NA NA NA NA NA NA NA NA NA NA
## [3448,] NA NA NA NA NA NA NA NA NA NA NA NA
## [3449,] NA NA NA NA NA NA NA NA NA NA NA NA
## [3450,] NA NA NA NA NA NA NA NA NA NA NA NA
## [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49]
## [3445,] NA NA NA NA NA NA NA NA NA NA NA NA
## [3446,] NA NA NA NA NA NA NA NA NA NA NA NA
## [3447,] NA NA NA NA NA NA NA NA NA NA NA NA
## [3448,] NA NA NA NA NA NA NA NA NA NA NA NA
## [3449,] NA NA NA NA NA NA NA NA NA NA NA NA
## [3450,] NA NA NA NA NA NA NA NA NA NA NA NA
## [,50] [,51] [,52] [,53] [,54] [,55] [,56] [,57] [,58] [,59] [,60] [,61]
## [3445,] NA NA NA NA NA NA NA NA NA NA NA "1"
## [3446,] NA NA NA NA NA NA NA NA NA NA NA NA
## [3447,] NA NA NA NA NA NA NA NA NA NA NA NA
## [3448,] NA NA NA NA NA NA NA NA NA NA NA NA
## [3449,] NA NA NA NA NA NA NA NA NA NA NA NA
## [3450,] NA NA NA NA NA NA NA NA NA NA NA NA
## [,62] [,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73]
## [3445,] "2" "3" "4" "1" "2" "3" "4" "1" "2" "3" "4" "1"
## [3446,] NA NA NA "1" "2" "3" "4" NA NA NA NA "1"
## [3447,] NA NA NA NA NA NA NA NA NA NA NA "1"
## [3448,] NA NA NA NA NA NA NA NA NA NA NA "1"
## [3449,] NA NA NA NA NA NA NA NA NA NA NA "1"
## [3450,] NA NA NA NA NA NA NA NA NA NA NA "1"
## [,74] [,75] [,76] [,77] [,78] [,79] [,80] [,81] [,82] [,83] [,84] [,85]
## [3445,] "2" "3" "4" NA NA NA NA NA NA NA NA "1"
## [3446,] "2" "3" "4" NA NA NA NA "1" "2" "3" "4" "1"
## [3447,] "2" "3" "4" "1" "2" "3" "4" "1" "2" "3" "4" NA
## [3448,] "2" "3" "4" "1" "2" "3" "4" "1" "2" "3" "4" NA
## [3449,] "2" "3" "4" NA NA NA NA "1" "2" "3" "4" NA
## [3450,] "2" "3" "4" NA NA NA NA NA NA NA NA NA
## [,86] [,87] [,88] [,89] [,90] [,91] [,92]
## [3445,] "2" "3" "4" "1" "2" "3" "4"
## [3446,] "2" "3" "4" "1" "2" "3" "4"
## [3447,] NA NA NA "1" "2" "3" "4"
## [3448,] NA NA NA "1" "2" "3" "4"
## [3449,] NA NA NA "1" "2" "3" "4"
## [3450,] NA NA NA "1" "2" "3" "4"
Site-level covariates first:
<- data.frame(
sites.covs forest = envcov[,"p_forest"], # forest cover
agr = envcov[,"p_agri"], # farmland cover
rock = envcov[,"p_rock"], # rock cover
halt = envcov[,"p_halt"], # high altitude
alt = envcov[,"p_alti"], # altitude
dbarr = envcov[,"p_dbarr"], # distance to closest barrier
acc = envcov[,"p_road"]) # road density
Yearly site-level covariates:
<- list(
yearly.site.covs effort = effort, # effort
year = year, # year effect
trendyear = trendyear, # linear trend over time
SDAC = SDAC, # short distance
LDAC = LDAC) # long distance
Observation covariates:
<- list(OCC = occbis) obs.covs
Organise detection/non-detection data and covariates in a
data.frame
that can be used by unmarked
:
<- unmarkedMultFrame(y = y,
umf siteCovs = sites.covs,
yearlySiteCovs = yearly.site.covs,
obsCovs = obs.covs,
numPrimary = 23)
Summarize data:
summary(umf)
## unmarkedFrame Object
##
## 3450 sites
## Maximum number of observations per site: 92
## Mean number of observations per site: 57.91
## Number of primary survey periods: 23
## Number of secondary survey periods: 4
## Sites with at least one detection: 547
##
## Tabulation of y observations:
## 0 1 2 <NA>
## 196086 3215 503 117596
##
## Site-level covariates:
## forest agr rock halt
## Min. :-1.218838 Min. :-1.50457 Min. :-0.253118 Min. :-0.172297
## 1st Qu.:-0.939161 1st Qu.:-0.83057 1st Qu.:-0.253118 1st Qu.:-0.172297
## Median :-0.102772 Median : 0.04001 Median :-0.253118 Median :-0.172297
## Mean : 0.003797 Mean :-0.01065 Mean : 0.006638 Mean : 0.004287
## 3rd Qu.: 0.754320 3rd Qu.: 0.82406 3rd Qu.:-0.253118 3rd Qu.:-0.172297
## Max. : 3.087256 Max. : 1.89463 Max. : 8.074589 Max. :11.368946
## alt dbarr acc
## Min. :-0.99359 Min. :-1.153377 Min. :-1.732584
## 1st Qu.:-0.61255 1st Qu.:-0.825046 1st Qu.:-0.682261
## Median :-0.33687 Median :-0.248552 Median : 0.329160
## Mean : 0.01199 Mean : 0.002765 Mean :-0.005365
## 3rd Qu.: 0.34066 3rd Qu.: 0.573661 3rd Qu.: 0.757069
## Max. : 4.41822 Max. : 3.898308 Max. : 1.846292
##
## Observation-level covariates:
## OCC
## 1 : 49951
## 2 : 49951
## 3 : 49951
## 4 : 49951
## NA's:117596
##
## Yearly-site-level covariates:
## effort year trendyear SDAC
## Min. :0.0000 01 : 3450 Min. : 1 Min. :0.000
## 1st Qu.:0.0000 02 : 3450 1st Qu.: 6 1st Qu.:0.000
## Median :0.1278 03 : 3450 Median :12 Median :0.000
## Mean :0.6243 04 : 3450 Mean :12 Mean :0.209
## 3rd Qu.:0.7983 05 : 3450 3rd Qu.:18 3rd Qu.:0.000
## Max. :8.0075 06 : 3450 Max. :23 Max. :8.000
## (Other):58650
## LDAC
## Min. :-1.0912
## 1st Qu.:-0.6831
## Median :-0.4912
## Mean : 0.0171
## 3rd Qu.: 0.5300
## Max. : 2.5113
##
Fit a model with all parameters constant,but detection which is a function of effort:
<- colext(
fm0 psiformula = ~ 1, # initial occupancy
gammaformula = ~ 1, # colonization
epsilonformula = ~ 1, # extinction
pformula = ~ effort, # detection
data = umf, # data
control = list(trace = 1)) # display number of iterations by blocks of 10
## initial value 37465.552381
## iter 10 value 12706.122901
## iter 20 value 11195.999294
## iter 30 value 10963.405378
## final value 10963.302306
## converged
Inspect results:
fm0
##
## Call:
## colext(psiformula = ~1, gammaformula = ~1, epsilonformula = ~1,
## pformula = ~effort, data = umf, control = list(trace = 1))
##
## Initial:
## Estimate SE z P(>|z|)
## -6.17 0.673 -9.16 5.25e-20
##
## Colonization:
## Estimate SE z P(>|z|)
## -4.01 0.0509 -78.6 0
##
## Extinction:
## Estimate SE z P(>|z|)
## -1.3 0.0653 -19.9 5.89e-88
##
## Detection:
## Estimate SE z P(>|z|)
## (Intercept) -2.236 0.0819 -27.3 3.70e-164
## effort 0.543 0.0276 19.7 1.37e-86
##
## AIC: 21936.6
Model coefficients (or parameters):
coef(fm0)
## psi(Int) col(Int) ext(Int) p(Int) p(effort)
## -6.1658594 -4.0053675 -1.2988148 -2.2358314 0.5434858
Get parameter estimates on natural scale. Start with colonization:
backTransform(fm0, type = "col")
## Backtransformed linear combination(s) of Colonization estimate(s)
##
## Estimate SE LinComb (Intercept)
## 0.0179 0.000895 -4.01 1
##
## Transformation: logistic
Then extinction:
backTransform(fm0, type = "ext")
## Backtransformed linear combination(s) of Extinction estimate(s)
##
## Estimate SE LinComb (Intercept)
## 0.214 0.011 -1.3 1
##
## Transformation: logistic
And initial occupancy:
backTransform(fm0, type = "psi")
## Backtransformed linear combination(s) of Initial estimate(s)
##
## Estimate SE LinComb (Intercept)
## 0.0021 0.00141 -6.17 1
##
## Transformation: logistic
Finally, we get detection as a function of effort:
# grid
<- seq(min(yearly.site.covs$effort), max(yearly.site.covs$effort), length = 100)
effort_grid # predict on logit scale
<- coef(fm0)[4] + coef(fm0)[5] * effort_grid
logit_det # backtransform
<- plogis(logit_det)
det # plot
plot(x = effort_grid,
y = det,
type = 'l',
xlab = "effort",
ylab = "estimated detection probability",
lwd= 3)
Here we fit the model with all covariates as considered by Louvrier and colleagues in their paper:
<- colext(
fm psiformula = ~ 1, # initial occupancy
gammaformula = ~ forest + agr + halt + alt + SDAC + LDAC, # colonization
epsilonformula = ~ 1, # extinction
pformula = ~ effort + acc + OCC, # detection
data = umf, # data
control = list(trace = 1), # display number of iterations by blocks of 10
se = FALSE) # do not compute SE and confidence intervals to reduce computational burden
## initial value 37465.552381
## iter 10 value 12721.165262
## iter 20 value 11671.497667
## iter 30 value 10755.918886
## iter 40 value 9850.393837
## iter 50 value 9803.647828
## final value 9799.071153
## converged
Inspect results (NA’s are produced because we did not compute SE’s when we fitted the model):
fm
##
## Call:
## colext(psiformula = ~1, gammaformula = ~forest + agr + halt +
## alt + SDAC + LDAC, epsilonformula = ~1, pformula = ~effort +
## acc + OCC, data = umf, se = FALSE, control = list(trace = 1))
##
## Initial:
## Estimate SE z P(>|z|)
## -6.28 NA NA NA
##
## Colonization:
## Estimate SE z P(>|z|)
## (Intercept) -5.166 NA NA NA
## forest 0.929 NA NA NA
## agr 0.768 NA NA NA
## halt -0.144 NA NA NA
## alt 0.834 NA NA NA
## SDAC 0.576 NA NA NA
## LDAC 0.384 NA NA NA
##
## Extinction:
## Estimate SE z P(>|z|)
## -1.11 NA NA NA
##
## Detection:
## Estimate SE z P(>|z|)
## (Intercept) -2.497 NA NA NA
## effort 0.393 NA NA NA
## acc -0.842 NA NA NA
## OCC2 0.495 NA NA NA
## OCC3 0.436 NA NA NA
## OCC4 0.419 NA NA NA
##
## AIC: 19628.14
Model coefficients are:
coef(fm)
## psi(Int) col(Int) col(forest) col(agr) col(halt) col(alt)
## -6.2803098 -5.1664074 0.9291481 0.7678356 -0.1435305 0.8342459
## col(SDAC) col(LDAC) ext(Int) p(Int) p(effort) p(acc)
## 0.5761154 0.3843558 -1.1123998 -2.4965811 0.3927426 -0.8415244
## p(OCC2) p(OCC3) p(OCC4)
## 0.4949260 0.4359181 0.4188788
Parameter estimates are given on the logit scale. You may compare these values to those obtained by Louvrier and colleagues using a Bayesian approach (see supplementary material).
There are two ways to map annual occupancy. We may compute the
occupancy probability \(\psi_{i,t}\),
or we rely directly on realized occupancy, that is the latent state
\(z_{i,t}\) which tells us whether cell
\(i\) is occupied in year \(t\) (\(z_{i,t} =
1\)) or not (\(z_{i,t} = 0\)).
We go for realized occupancy. To do so, we need the \(z\) estimates that we can get in
unmarked
via empirical Bayes methods: The posterior
distribution of \(z\) is estimated with
data and the parameter estimates we obtained from fitting our model
above. The mode of the posterior distribution is the “empirical best
unbiased predictor (EBUP)”.
<- ranef(fm) # estimate posterior distribution of z
re <- bup(re, stat = "mode") # get mode of posterior distribution z_mode
The z_mode
object contains the estimated \(z\) with cells in rows and years in
columns. In passing, if you wish to estimate the proportion of area
occupied by the species (corrected by imperfect detection), you just
need to sum the \(z\) for each year,
and divide by the number of sites.
Back to mapping occupancy, we’ll need a map of France:
<- st_read("shp/pays/Country.shp") all_countries
## Reading layer `Country' from data source
## `/Users/oliviergimenez/Dropbox/OG/GITHUB/occupancy-workshop/tutorials/shp/pays/Country.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 54 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -2177943 ymin: 5251488 xmax: 4523874 ymax: 11388590
## Projected CRS: RGF93 v1 / Lambert-93
<- all_countries %>% filter(NAME == "France") france
Then our 10x10km grid:
<- st_read("shp/grille10par10/grille_France.shp") %>%
grid_rect st_transform(crs = st_crs(france))
## Reading layer `grille_France' from data source
## `/Users/oliviergimenez/Dropbox/OG/GITHUB/occupancy-workshop/tutorials/shp/grille10par10/grille_France.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 5160 features and 1 field
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 510000 ymin: 6110000 xmax: 1110000 ymax: 6970000
## Projected CRS: RGF93 v1 / Lambert-93
Get the coordinates of the cells:
<- readRDS("data/coordcells_wolf.rds")
coord <- coord %>%
grid st_as_sf(coords = c('X','Y'), crs = st_crs(grid_rect))
The grid
object is spatial sf
object of
type POINT
. We need POLYGONS
instead:
<- st_join(grid_rect, grid, join = st_covers)
grid_poly <- grid_poly[!is.na(grid_poly$id),] grid_poly
We’re ready to map occupancy! Let’s focus on the last year for illustration, that is 2017:
%>%
grid_rect ggplot() +
geom_sf(alpha = 0,
lwd = 0.01) +
geom_sf(data = grid_poly,
aes(fill = as_factor(z_mode[,23])),
lwd = 0.01) +
geom_sf(data = france, alpha = 0) +
scale_fill_manual(name = "",
values = c("gray90",
"steelblue1"),
labels = c("non-occupied cell",
"occupied cell")) +
labs(title = "Map of wolf occupancy in 2017")