1 Objectives

2 Pre-requisites

Load some packages:

library(tidyverse) # manipulate and visualize data
theme_set(theme_light(base_size = 16)) # set theme for visualisation and font size
library(unmarked) # occupancy modelling
library(sf) # spatial dataviz

3 Data

We use data from Mapping and explaining wolf recolonization in France using dynamic occupancy models and opportunistic data by Louvrier et al. in Ecography available at https://onlinelibrary.wiley.com/doi/abs/10.1111/ecog.02874. Detections and non-detections data were collected between 1994 and 2017 (23 years or primary occasions). Occasions are months December, January, February and March (4 visits or secondary occasions). A 100km\(^2\) grid was overlaid over France, and cells were considered as sites. See paper for more details.

3.1 Read in data

First detection/non-detection data:

y <- readRDS("data/wolf_louvrier.rds")
dim(y)
## [1] 3450   92

There are > 3000 cells in rows. In columns we have 23 years times 4 visits, arranged in visits within years: Visit 1 to visit 4 for year 1994 in the first four columns, then visit 1 to visit 4 for year 1995, and so on.

Now we read in some covariates. First the survey effort which is (more or less) the number of observers per cell:

effort <- readRDS("data/effort_louvrier.rds")

The effort covariate is a yearly site-level covariate, with values that do not vary between visits within a year, but that do vary between years:

dim(effort)
## [1] 3450   23

We have some environmental covariates as well, all site-level covariates:

envcov <- readRDS("data/envcov_louvrier.rds")

The 7 covariates are specifically in columns: farmland cover, altitude, distance to closest barrier, road density, forest cover, high altitude, rock cover:

colnames(envcov)
## [1] "p_agri"   "p_alti"   "p_dbarr"  "p_road"   "p_forest" "p_halt"   "p_rock"

Last, we have two additional yearly site-level covariates which capture dispersal abilities of wolves, namely the number of occupied neighboring cells at short (10km) and long (150km) distances:

# short distance
SDAC <- readRDS("data/shortd_spatialautocorr.rds")
# long distance 
LDAC <- readRDS("data/longd_spatialautocorr.rds")

There are other yearly site-level covariates we’d like to consider. First a year effect:

year <- matrix(rep(c('01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23'),nrow(y)), 
               nrow = nrow(y), 
               ncol = 23, 
               byrow = TRUE)
head(year)
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## [1,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10"  "11"  "12"  "13"  "14" 
## [2,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10"  "11"  "12"  "13"  "14" 
## [3,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10"  "11"  "12"  "13"  "14" 
## [4,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10"  "11"  "12"  "13"  "14" 
## [5,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10"  "11"  "12"  "13"  "14" 
## [6,] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10"  "11"  "12"  "13"  "14" 
##      [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23]
## [1,] "15"  "16"  "17"  "18"  "19"  "20"  "21"  "22"  "23" 
## [2,] "15"  "16"  "17"  "18"  "19"  "20"  "21"  "22"  "23" 
## [3,] "15"  "16"  "17"  "18"  "19"  "20"  "21"  "22"  "23" 
## [4,] "15"  "16"  "17"  "18"  "19"  "20"  "21"  "22"  "23" 
## [5,] "15"  "16"  "17"  "18"  "19"  "20"  "21"  "22"  "23" 
## [6,] "15"  "16"  "17"  "18"  "19"  "20"  "21"  "22"  "23"

And a linear trend over time:

trendyear <- matrix(rep(1:23,nrow(y)), 
                    nrow=nrow(y),
                    ncol=23, 
                    byrow=TRUE)
head(trendyear)
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## [1,]    1    2    3    4    5    6    7    8    9    10    11    12    13    14
## [2,]    1    2    3    4    5    6    7    8    9    10    11    12    13    14
## [3,]    1    2    3    4    5    6    7    8    9    10    11    12    13    14
## [4,]    1    2    3    4    5    6    7    8    9    10    11    12    13    14
## [5,]    1    2    3    4    5    6    7    8    9    10    11    12    13    14
## [6,]    1    2    3    4    5    6    7    8    9    10    11    12    13    14
##      [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23]
## [1,]    15    16    17    18    19    20    21    22    23
## [2,]    15    16    17    18    19    20    21    22    23
## [3,]    15    16    17    18    19    20    21    22    23
## [4,]    15    16    17    18    19    20    21    22    23
## [5,]    15    16    17    18    19    20    21    22    23
## [6,]    15    16    17    18    19    20    21    22    23

These two covariates differ in the way we treat the year effect, either as a factor or a numeric variables.

Last, we’ll need a covariate to assess a visit (or a month) effect, that varies by sites, visits and years, usually referred to as ‘survey-covariate’ or ‘observation covariate’:

occ <- array(0,c(3450,4,23))
for (i in 1:3450){
    for (j in 1:23){
        occ[i,1:4,j] <- c('1','2','3','4') # dec, jan, feb, mar
        if (effort[i,j] == 0) occ[i,1:4,j] <- NA
                }
    }
occbis <- occ[,,1]
for (i in 2:23){
    occbis <- cbind(occbis,occ[,,i])
}
tail(occbis) # 3450 x 92 ou 92 est 4 occ x 23 ans
##         [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [3445,] NA   NA   NA   NA   NA   NA   NA   NA   NA   NA    NA    NA    NA   
## [3446,] NA   NA   NA   NA   NA   NA   NA   NA   NA   NA    NA    NA    NA   
## [3447,] NA   NA   NA   NA   NA   NA   NA   NA   NA   NA    NA    NA    NA   
## [3448,] NA   NA   NA   NA   NA   NA   NA   NA   NA   NA    NA    NA    NA   
## [3449,] NA   NA   NA   NA   NA   NA   NA   NA   NA   NA    NA    NA    NA   
## [3450,] NA   NA   NA   NA   NA   NA   NA   NA   NA   NA    NA    NA    NA   
##         [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
## [3445,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## [3446,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## [3447,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## [3448,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## [3449,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## [3450,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
##         [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37]
## [3445,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## [3446,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## [3447,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## [3448,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## [3449,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## [3450,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
##         [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49]
## [3445,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## [3446,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## [3447,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## [3448,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## [3449,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## [3450,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
##         [,50] [,51] [,52] [,53] [,54] [,55] [,56] [,57] [,58] [,59] [,60] [,61]
## [3445,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    "1"  
## [3446,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## [3447,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## [3448,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## [3449,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
## [3450,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA   
##         [,62] [,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73]
## [3445,] "2"   "3"   "4"   "1"   "2"   "3"   "4"   "1"   "2"   "3"   "4"   "1"  
## [3446,] NA    NA    NA    "1"   "2"   "3"   "4"   NA    NA    NA    NA    "1"  
## [3447,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    "1"  
## [3448,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    "1"  
## [3449,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    "1"  
## [3450,] NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    "1"  
##         [,74] [,75] [,76] [,77] [,78] [,79] [,80] [,81] [,82] [,83] [,84] [,85]
## [3445,] "2"   "3"   "4"   NA    NA    NA    NA    NA    NA    NA    NA    "1"  
## [3446,] "2"   "3"   "4"   NA    NA    NA    NA    "1"   "2"   "3"   "4"   "1"  
## [3447,] "2"   "3"   "4"   "1"   "2"   "3"   "4"   "1"   "2"   "3"   "4"   NA   
## [3448,] "2"   "3"   "4"   "1"   "2"   "3"   "4"   "1"   "2"   "3"   "4"   NA   
## [3449,] "2"   "3"   "4"   NA    NA    NA    NA    "1"   "2"   "3"   "4"   NA   
## [3450,] "2"   "3"   "4"   NA    NA    NA    NA    NA    NA    NA    NA    NA   
##         [,86] [,87] [,88] [,89] [,90] [,91] [,92]
## [3445,] "2"   "3"   "4"   "1"   "2"   "3"   "4"  
## [3446,] "2"   "3"   "4"   "1"   "2"   "3"   "4"  
## [3447,] NA    NA    NA    "1"   "2"   "3"   "4"  
## [3448,] NA    NA    NA    "1"   "2"   "3"   "4"  
## [3449,] NA    NA    NA    "1"   "2"   "3"   "4"  
## [3450,] NA    NA    NA    "1"   "2"   "3"   "4"

3.2 Format data

Site-level covariates first:

sites.covs <- data.frame(
  forest = envcov[,"p_forest"], # forest cover
  agr = envcov[,"p_agri"], # farmland cover
  rock = envcov[,"p_rock"], # rock cover
  halt = envcov[,"p_halt"], # high altitude
  alt = envcov[,"p_alti"], # altitude
  dbarr = envcov[,"p_dbarr"], # distance to closest barrier
  acc = envcov[,"p_road"]) # road density

Yearly site-level covariates:

yearly.site.covs <- list(
  effort = effort, # effort
  year = year, # year effect
  trendyear = trendyear, # linear trend over time
  SDAC = SDAC, # short distance
  LDAC = LDAC) # long distance

Observation covariates:

obs.covs <- list(OCC = occbis)

Organise detection/non-detection data and covariates in a data.frame that can be used by unmarked:

umf <- unmarkedMultFrame(y = y, 
                         siteCovs = sites.covs,
                         yearlySiteCovs = yearly.site.covs, 
                         obsCovs = obs.covs,
                         numPrimary = 23)

Summarize data:

summary(umf)
## unmarkedFrame Object
## 
## 3450 sites
## Maximum number of observations per site: 92 
## Mean number of observations per site: 57.91 
## Number of primary survey periods: 23 
## Number of secondary survey periods: 4 
## Sites with at least one detection: 547 
## 
## Tabulation of y observations:
##      0      1      2   <NA> 
## 196086   3215    503 117596 
## 
## Site-level covariates:
##      forest               agr                rock                halt          
##  Min.   :-1.218838   Min.   :-1.50457   Min.   :-0.253118   Min.   :-0.172297  
##  1st Qu.:-0.939161   1st Qu.:-0.83057   1st Qu.:-0.253118   1st Qu.:-0.172297  
##  Median :-0.102772   Median : 0.04001   Median :-0.253118   Median :-0.172297  
##  Mean   : 0.003797   Mean   :-0.01065   Mean   : 0.006638   Mean   : 0.004287  
##  3rd Qu.: 0.754320   3rd Qu.: 0.82406   3rd Qu.:-0.253118   3rd Qu.:-0.172297  
##  Max.   : 3.087256   Max.   : 1.89463   Max.   : 8.074589   Max.   :11.368946  
##       alt               dbarr                acc           
##  Min.   :-0.99359   Min.   :-1.153377   Min.   :-1.732584  
##  1st Qu.:-0.61255   1st Qu.:-0.825046   1st Qu.:-0.682261  
##  Median :-0.33687   Median :-0.248552   Median : 0.329160  
##  Mean   : 0.01199   Mean   : 0.002765   Mean   :-0.005365  
##  3rd Qu.: 0.34066   3rd Qu.: 0.573661   3rd Qu.: 0.757069  
##  Max.   : 4.41822   Max.   : 3.898308   Max.   : 1.846292  
## 
## Observation-level covariates:
##    OCC        
##  1   : 49951  
##  2   : 49951  
##  3   : 49951  
##  4   : 49951  
##  NA's:117596  
## 
## Yearly-site-level covariates:
##      effort            year         trendyear       SDAC      
##  Min.   :0.0000   01     : 3450   Min.   : 1   Min.   :0.000  
##  1st Qu.:0.0000   02     : 3450   1st Qu.: 6   1st Qu.:0.000  
##  Median :0.1278   03     : 3450   Median :12   Median :0.000  
##  Mean   :0.6243   04     : 3450   Mean   :12   Mean   :0.209  
##  3rd Qu.:0.7983   05     : 3450   3rd Qu.:18   3rd Qu.:0.000  
##  Max.   :8.0075   06     : 3450   Max.   :23   Max.   :8.000  
##                   (Other):58650                               
##       LDAC        
##  Min.   :-1.0912  
##  1st Qu.:-0.6831  
##  Median :-0.4912  
##  Mean   : 0.0171  
##  3rd Qu.: 0.5300  
##  Max.   : 2.5113  
## 

4 Model with constant parameters, except detection which is a function of effort

Fit a model with all parameters constant,but detection which is a function of effort:

fm0 <- colext(
  psiformula = ~ 1,     # initial occupancy
  gammaformula =  ~ 1,  # colonization
  epsilonformula = ~ 1, # extinction
  pformula = ~ effort,  # detection
  data = umf, # data
  control = list(trace = 1)) # display number of iterations by blocks of 10
## initial  value 37465.552381 
## iter  10 value 12706.122901
## iter  20 value 11195.999294
## iter  30 value 10963.405378
## final  value 10963.302306 
## converged

Inspect results:

fm0
## 
## Call:
## colext(psiformula = ~1, gammaformula = ~1, epsilonformula = ~1, 
##     pformula = ~effort, data = umf, control = list(trace = 1))
## 
## Initial:
##  Estimate    SE     z  P(>|z|)
##     -6.17 0.673 -9.16 5.25e-20
## 
## Colonization:
##  Estimate     SE     z P(>|z|)
##     -4.01 0.0509 -78.6       0
## 
## Extinction:
##  Estimate     SE     z  P(>|z|)
##      -1.3 0.0653 -19.9 5.89e-88
## 
## Detection:
##             Estimate     SE     z   P(>|z|)
## (Intercept)   -2.236 0.0819 -27.3 3.70e-164
## effort         0.543 0.0276  19.7  1.37e-86
## 
## AIC: 21936.6

Model coefficients (or parameters):

coef(fm0)
##   psi(Int)   col(Int)   ext(Int)     p(Int)  p(effort) 
## -6.1658594 -4.0053675 -1.2988148 -2.2358314  0.5434858

Get parameter estimates on natural scale. Start with colonization:

backTransform(fm0, type = "col")
## Backtransformed linear combination(s) of Colonization estimate(s)
## 
##  Estimate       SE LinComb (Intercept)
##    0.0179 0.000895   -4.01           1
## 
## Transformation: logistic

Then extinction:

backTransform(fm0, type = "ext")
## Backtransformed linear combination(s) of Extinction estimate(s)
## 
##  Estimate    SE LinComb (Intercept)
##     0.214 0.011    -1.3           1
## 
## Transformation: logistic

And initial occupancy:

backTransform(fm0, type = "psi")
## Backtransformed linear combination(s) of Initial estimate(s)
## 
##  Estimate      SE LinComb (Intercept)
##    0.0021 0.00141   -6.17           1
## 
## Transformation: logistic

Finally, we get detection as a function of effort:

# grid
effort_grid <- seq(min(yearly.site.covs$effort), max(yearly.site.covs$effort), length = 100)
# predict on logit scale
logit_det <- coef(fm0)[4] + coef(fm0)[5] * effort_grid
# backtransform
det <- plogis(logit_det)
# plot
plot(x = effort_grid, 
     y = det, 
     type = 'l', 
     xlab = "effort", 
     ylab = "estimated detection probability", 
     lwd= 3)

5 Model with all covariates as in the Louvrier et al paper

Here we fit the model with all covariates as considered by Louvrier and colleagues in their paper:

fm <- colext(
  psiformula = ~ 1,                                            # initial occupancy
  gammaformula =  ~ forest + agr + halt + alt + SDAC + LDAC,  # colonization
  epsilonformula = ~ 1,                                       # extinction
  pformula = ~ effort + acc + OCC,                                # detection
  data = umf, # data
  control = list(trace = 1), # display number of iterations by blocks of 10
  se = FALSE) # do not compute SE and confidence intervals to reduce computational burden
## initial  value 37465.552381 
## iter  10 value 12721.165262
## iter  20 value 11671.497667
## iter  30 value 10755.918886
## iter  40 value 9850.393837
## iter  50 value 9803.647828
## final  value 9799.071153 
## converged

Inspect results (NA’s are produced because we did not compute SE’s when we fitted the model):

fm
## 
## Call:
## colext(psiformula = ~1, gammaformula = ~forest + agr + halt + 
##     alt + SDAC + LDAC, epsilonformula = ~1, pformula = ~effort + 
##     acc + OCC, data = umf, se = FALSE, control = list(trace = 1))
## 
## Initial:
##  Estimate SE  z P(>|z|)
##     -6.28 NA NA      NA
## 
## Colonization:
##             Estimate SE  z P(>|z|)
## (Intercept)   -5.166 NA NA      NA
## forest         0.929 NA NA      NA
## agr            0.768 NA NA      NA
## halt          -0.144 NA NA      NA
## alt            0.834 NA NA      NA
## SDAC           0.576 NA NA      NA
## LDAC           0.384 NA NA      NA
## 
## Extinction:
##  Estimate SE  z P(>|z|)
##     -1.11 NA NA      NA
## 
## Detection:
##             Estimate SE  z P(>|z|)
## (Intercept)   -2.497 NA NA      NA
## effort         0.393 NA NA      NA
## acc           -0.842 NA NA      NA
## OCC2           0.495 NA NA      NA
## OCC3           0.436 NA NA      NA
## OCC4           0.419 NA NA      NA
## 
## AIC: 19628.14

Model coefficients are:

coef(fm)
##    psi(Int)    col(Int) col(forest)    col(agr)   col(halt)    col(alt) 
##  -6.2803098  -5.1664074   0.9291481   0.7678356  -0.1435305   0.8342459 
##   col(SDAC)   col(LDAC)    ext(Int)      p(Int)   p(effort)      p(acc) 
##   0.5761154   0.3843558  -1.1123998  -2.4965811   0.3927426  -0.8415244 
##     p(OCC2)     p(OCC3)     p(OCC4) 
##   0.4949260   0.4359181   0.4188788

Parameter estimates are given on the logit scale. You may compare these values to those obtained by Louvrier and colleagues using a Bayesian approach (see supplementary material).

6 Map occupancy

There are two ways to map annual occupancy. We may compute the occupancy probability \(\psi_{i,t}\), or we rely directly on realized occupancy, that is the latent state \(z_{i,t}\) which tells us whether cell \(i\) is occupied in year \(t\) (\(z_{i,t} = 1\)) or not (\(z_{i,t} = 0\)). We go for realized occupancy. To do so, we need the \(z\) estimates that we can get in unmarked via empirical Bayes methods: The posterior distribution of \(z\) is estimated with data and the parameter estimates we obtained from fitting our model above. The mode of the posterior distribution is the “empirical best unbiased predictor (EBUP)”.

re <- ranef(fm) # estimate posterior distribution of z
z_mode <- bup(re, stat = "mode") # get mode of posterior distribution

The z_mode object contains the estimated \(z\) with cells in rows and years in columns. In passing, if you wish to estimate the proportion of area occupied by the species (corrected by imperfect detection), you just need to sum the \(z\) for each year, and divide by the number of sites.

Back to mapping occupancy, we’ll need a map of France:

all_countries <-  st_read("shp/pays/Country.shp")
## Reading layer `Country' from data source 
##   `/Users/oliviergimenez/Dropbox/OG/GITHUB/occupancy-workshop/tutorials/shp/pays/Country.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 54 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -2177943 ymin: 5251488 xmax: 4523874 ymax: 11388590
## Projected CRS: RGF93 v1 / Lambert-93
france <- all_countries %>% filter(NAME == "France")

Then our 10x10km grid:

grid_rect <- st_read("shp/grille10par10/grille_France.shp") %>% 
  st_transform(crs = st_crs(france))
## Reading layer `grille_France' from data source 
##   `/Users/oliviergimenez/Dropbox/OG/GITHUB/occupancy-workshop/tutorials/shp/grille10par10/grille_France.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 5160 features and 1 field
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 510000 ymin: 6110000 xmax: 1110000 ymax: 6970000
## Projected CRS: RGF93 v1 / Lambert-93

Get the coordinates of the cells:

coord <- readRDS("data/coordcells_wolf.rds")
grid <- coord %>% 
  st_as_sf(coords = c('X','Y'), crs = st_crs(grid_rect))

The grid object is spatial sf object of type POINT. We need POLYGONS instead:

grid_poly <- st_join(grid_rect, grid, join = st_covers)
grid_poly <- grid_poly[!is.na(grid_poly$id),]

We’re ready to map occupancy! Let’s focus on the last year for illustration, that is 2017:

grid_rect %>% 
  ggplot() + 
  geom_sf(alpha = 0, 
          lwd = 0.01) + 
  geom_sf(data = grid_poly, 
          aes(fill = as_factor(z_mode[,23])),
          lwd = 0.01) + 
  geom_sf(data = france, alpha = 0) +
  scale_fill_manual(name = "",
                    values = c("gray90",
                               "steelblue1"),
                    labels = c("non-occupied cell", 
                               "occupied cell")) +
  labs(title = "Map of wolf occupancy in 2017")