1 Introduction

This is an analysis of the RINT presence-only data gathered by the MOm NGO through a citizen-science program. The objective is to map the distribution of monk seals by fitting occupancy models to these data.

Load all the packages we will need.

library(tidyverse)
library(lubridate)
theme_set(theme_light())
library(sf)
library(viridis)
library(gridExtra)
library(transformr)
library(here)

2 Exploratory data analyses

Let’s load and explore the data to get a feeling of the info we have (and we do not have).

load(here::here("data","monseal.RData"))

First, the number of sightings over time.

monkseal %>% 
  ggplot() +
  aes(x = year) + 
  geom_bar()

The sightings are done along the year (all years are pooled together). August gets maximum sightings.

monkseal %>% 
  ggplot() +
  aes(x = factor(month)) + 
  geom_bar() + 
  xlab('month')

Regarding observers, sightings are mostly done by local people, then to a lesser extent tourists, spear gun fishers, professional fishermen and sailmen, port police and a few others.

monkseal %>% 
  filter(!is.na(observer)) %>%
  count(observer) %>%
  ggplot() +
  aes(x = n, y = fct_reorder(observer, n)) %>%
  geom_col() +
  labs(x = NULL, y = NULL)

Last, regarding where the seals were when they were spotted, we see that the sightings are mostly at sea (h), from human settlement (i) and on beach (f).

monkseal %>% 
  filter(!is.na(where)) %>%
  count(where) %>%
  ggplot() +
  aes(x = n, y = fct_reorder(where, n)) %>%
  geom_col() +
  labs(x = NULL, y = NULL) 

3 Data mapping

Before diving deep into the statistical analyses, let’s put the sightings on a map.

First, let’s get a map of Greece.

greece <- st_read(here::here("shapefiles","greece.shp"))
## Reading layer `greece' from data source 
##   `/Users/oliviergimenez/Dropbox/OG/GITHUB/monkseal-occupancy/shapefiles/greece.shp' 
##   using driver `ESRI Shapefile'
## replacing null geometries with empty geometries
## Simple feature collection with 10072 features and 3 fields (with 9735 geometries empty)
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: 104110.2 ymin: 3850806 xmax: 1007943 ymax: 4623933
## Projected CRS: GGRS87 / Greek Grid
coastlines <- st_read(here::here("shapefiles","coastlines.shp"))
## Reading layer `coastlines' from data source 
##   `/Users/oliviergimenez/Dropbox/OG/GITHUB/monkseal-occupancy/shapefiles/coastlines.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 3350 features and 1 field
## Geometry type: LINESTRING
## Dimension:     XY
## Bounding box:  xmin: 104130.9 ymin: 3850438 xmax: 1007943 ymax: 4539200
## Projected CRS: GGRS87 / Greek Grid
ggplot() + 
  geom_sf(data = greece) + 
  geom_sf(data = coastlines, color = 'red') + 
  labs(title = 'Map of Greece and coastlines', x = NULL, y = NULL)

And our grid (including a 20-km buffer around coastlines).

grid <- st_read(here::here("shapefiles","grid.shp"))
## Reading layer `grid' from data source 
##   `/Users/oliviergimenez/Dropbox/OG/GITHUB/monkseal-occupancy/shapefiles/grid.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 3016 features and 10 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 99323.68 ymin: 3841064 xmax: 1010706 ymax: 4559199
## Projected CRS: GGRS87 / Greek Grid
ggplot() + 
  geom_sf(data = greece) + 
  geom_sf(data = grid, lwd = 0.1) + 
  labs(title = 'Gridded map of Greece', x = NULL, y = NULL)

Note that a few sites are small, more precisely 0 percent of the 3016 (i.e. 166 sites) are less than \(10km^2\).

Get all sightings and map them:

counts <- monkseal %>%
  group_by(utm) %>%
  count()
names(counts) <- c('UTM_NAME','val')
grid_wcounts <- right_join(grid, counts)
classes <- classInt::classIntervals(grid_wcounts$val, n = 5, style = "jenks")
bwr <- grDevices::colorRampPalette(rev(c('red', 'orange', 'yellow', 'cyan', 'blue')))
grid_wcounts <- grid_wcounts %>%
  mutate(valdiscrete = cut(val, classes$brks, include.lowest = T))
ggplot() + 
  geom_sf(data = greece, colour = "grey50", fill = "white") + 
  geom_sf(data = grid, colour = "grey50", fill = "white", lwd = 0.1) + 
  geom_sf(data = grid_wcounts, lwd = 0.1, aes(fill = valdiscrete)) + 
  geom_sf(data = greece, colour = "grey50", fill = "white",lwd = 0.2) + 
  scale_fill_manual(
    name = 'counts',
    values = bwr(5),
    guide = guide_legend(reverse = TRUE)) + 
  labs(title = '', x = NULL, y = NULL)

ggsave(here::here("figures","map1.png"), dpi = 600)

4 Occupancy analysis

4.1 Data formating

Prepare the data:

grid <- grid %>% 
  filter(as.numeric(areakm2)>10) # filter out small sites
mm <- c('Mar','Apr','May','Jun','Jul','Aug','Sep','Oct') # extract month with data
indyear <- c(2000:2007, 2013:2020) # define two periods
ids <- unique(grid$UTM_NAME)
mom_detections <- list()
for (k in 1:length(indyear)){
  mom_detections[[k]] <- matrix(0,nrow=length(ids),ncol=length(mm))
  data <- filter(monkseal,year==indyear[k])
  ind <- 1
  for (i in mm){
    temp <- filter(data, month==i)
    utm_temp <- unique(temp$utm)
    for (j in utm_temp){
      # 1 if a sighting was made in square j in month i
      mom_detections[[k]][ids==j,ind] <- 1
    }
    ind <- ind + 1
  }
}
mom_det <- do.call(cbind, mom_detections) # bind data from all year in columns
# convert the occupancy dataset in a 3D array:
y <- list()
ind <- 0
for (i in 1:length(indyear)){
    mask <- (ind + i):(ind + i + length(mm) - 1)
    y[[i]] <- mom_det[,mask]
    ind <- ind + length(mm) - 1
}

# convert list into array (https://stackoverflow.com/questions/37433509/convert-list-to-a-matrix-or-array)
y <- array(unlist(y), dim = c(nrow(y[[1]]), ncol(y[[1]]), length(y)))
dim(y)
## [1] 2850    8   16

Now we reformat the data as follows: first, for each year, we determine whether the species was ever detected or not; then for each period, we count the number of times the species was detected:

new_y <- NULL
for (i in 1:dim(y)[3]){ # loop over years
  new_y <- cbind(new_y,apply(y[,1:8,i],1,sum))
}
new_y <- (new_y > 0) 
y1 <- apply(new_y[,1:8],1,sum)
y2 <- apply(new_y[,9:16],1,sum)
y <- cbind(y1,y2)
dim(y)
## [1] 2850    2
range(y)
## [1] 0 8

4.2 Inference

Load nimble package:

library(nimble)

We specify our model:

model <- nimbleCode({

  # occupancy
  mupsi ~ dnorm(-5,sd = 1)
  for(i in 1:nsite){
    cloglog(psi[i]) <- mupsi + phi[i] + offset[i]
  }

  # ICAR
  phi[1:nsite] ~ dcar_normal(adj[1:L],
                             weights[1:L], 
                             num[1:nsite], 
                             tau.phi, 
                             zero_mean = 1) 
  
  tau.phi ~ dgamma(1, 0.01) # precision of the ICAR component
  sigma2.phi <- 1/tau.phi # variance of the ICAR component
  
  # detection
  sdp ~ dunif(0,10)
  for(i in 1:nyear){
    muptemp[i] ~ dunif(0,1)
    logit(mup[i]) <- muptemp[i]
  }
  for(i in 1:nsite){
    lp[i] ~ dnorm(0, sd = sdp)
    for(t in 1:nyear){
      logit(p[i,t]) <- mup[t] + lp[i]
    }
  }
  
  # priors for mean extinction/colonization
  # preliminary analyses show no heterogeneity in these parameters
  mueps ~ dnorm(-5, sd = 1)
  mugam ~ dnorm(-5, sd = 1)
  
  # process model
  for(i in 1:nsite){
    z[i,1] ~ dbern(psi[i])
    cloglog(gamma[i]) <- mugam + offset[i]
    cloglog(eps[i]) <- mueps + offset[i]
    z[i,2] ~ dbern(z[i,1] * (1 - eps[i]) + (1 - z[i,1]) * gamma[i])
  }
  
  # observation model
  for (i in 1:nsite){
    for (k in 1:nyear){
      y[i,k] ~ dbin(z[i,k] * p[i,k],8)
    }
  }
})

Specify data, initial values, parameters to be monitored and various MCMC details:

# data
offset <- log(as.numeric(grid$areakm2))
win.data <- list(y = y)
neighbours <- spdep::poly2nb(pl = grid)
weigths <- spdep::nb2WB(nb = neighbours)
win.constants <- list(nsite = dim(y)[1], 
                      nyear = dim(y)[2], 
                      offset = offset,
                      L = length(weigths$weights),        
                      adj = weigths$adj,
                      num = weigths$num,
                      weights = weigths$weights)

# initial values
zst <- cbind(as.numeric(y1 > 0), as.numeric(y2 > 0)) # observed occurrence as inits for z 
inits <- function() {list(z = zst, 
                          mueps = -4, 
                          mugam = -4, 
                          mupsi = -4,
                          sdp = runif(1,1,9), 
                          muptemp = rep(0,dim(y)[1]),
                          eps = rep(0.5,dim(y)[1]),
                          gamma = rep(0.5,dim(y)[1]),
                          psi = rep(0.5,dim(y)[1]),
                          phi = rep(0,dim(y)[1]),
                          lp = rep(0,dim(y)[1]),
                          mup = rep(0,dim(y)[2]),
                          tau.phi = .1)}

# MCMC settings
ni <- 50000
nb <- 10000
nc <- 2

Create model as a R object:

survival <- nimbleModel(code = model,
                        data = win.data,
                        constants = win.constants,
                        inits = inits(),
                        calculate = FALSE)
#survival$initializeInfo()
#survival$calculate()

Go through nimble workflow:

# compile model
Csurvival <- compileNimble(survival)

# create a MCMC configuration
survivalConf <- configureMCMC(survival, 
                              useConjugacy = FALSE)

# replace RW samplers by slice sampler for standard deviation of random effects on detection
survivalConf$removeSamplers(c('sdp'))
survivalConf$addSampler(target = c('sdp'),
                        type = 'slice')

survivalConf$removeSamplers(c('tau.phi'))
survivalConf$addSampler(target = c('tau.phi'),
                        type = 'slice')

# add some parameters to monitor
survivalConf$addMonitors(c("lp","z","mupsi","mup"))

# create MCMC function and compile it
survivalMCMC <- buildMCMC(survivalConf)
CsurvivalMCMC <- compileNimble(survivalMCMC, project = survival)

Run nimble:

ptm <- proc.time()
out <- runMCMC(mcmc = CsurvivalMCMC, 
               niter = ni,
               nburnin = nb,
               nchains = nc,
               thin = 5)
x <- proc.time() -  ptm
save(out, x, file = here::here("models","monksealscloglog-nimble.RData"))

The code above takes some time to run. I ran it, saved the results and use them from here:

load(here::here("models","monksealscloglog-nimble.RData"))
out_backup <- out

Print results:

out <- rbind(out$chain1, out$chain2)
out %>%
  as_tibble() %>%
  pivot_longer(cols = everything(),  values_to = "value", names_to = "parameter") %>%
  filter(str_detect(parameter, "mu") | str_detect(parameter, "sd")) %>%
  filter(str_detect(parameter, "muptemp", negate = TRUE)) %>%
  group_by(parameter) %>%
  summarize(median = median(value),
            lci = quantile(value, probs = 2.5/100),
            uci = quantile(value, probs = 97.5/100))

4.3 Post-process results

Check out the number of occupied UTM squares by the species over time. First work out the naive occupancy, that is the number of sites that were observed occupied over the two periods:

naiveocc <- rep(NA,2) 
for (i in 1:2){
  naiveocc[i] <- sum(y[,i]>0)
}
naiveocc
## [1] 271 343

Now the estimated occupancy:

zzz <- out[,grepl("z", colnames(out))]
zz <- array(NA, dim = c(nrow(zzz), ncol(zzz)/2, 2))
zz[,,1] <- zzz[1:nrow(zzz),1:(ncol(zzz)/2)]
zz[,,2] <- zzz[1:nrow(zzz),(ncol(zzz)/2+1):ncol(zzz)]
dim(zz) # 2000 x 2850 x 2 (nbMCMC x nSquares x nyears)
## [1] 16000  2850     2
nbpixocc <- apply(zz,c(1,3),sum) # 2000 x 2
meannbpixocc <- apply(nbpixocc,2,median)
sdnbpixocc <- apply(nbpixocc,2,sd)
qulnbpixocc <- apply(nbpixocc,2,quantile,probs=2.5/100)
quunbpixocc <- apply(nbpixocc,2,quantile,probs=97.5/100)

Put everything together:

occupancy <- data.frame(period = c(1,2),
  naiveocc = naiveocc, 
  medianocc = meannbpixocc,
  qulnbpixocc = qulnbpixocc,
  quunbpixocc = quunbpixocc)
occupancy2 <- data.frame(period = rep(c(1,2),2),
  occ = c(occupancy$naiveocc, occupancy$medianocc), 
  quantlower = c(rep(NA,2),occupancy$qulnbpixocc),
  quantupper = c(rep(NA,2), occupancy$quunbpixocc),
  Occupancy = c(rep('naive',2),rep('estimated',2)))
occupancy2

Now plot:

occupancy2 %>% 
  ggplot() + 
  aes(x = period, y = occ, group = Occupancy, linetype = Occupancy) + 
  geom_ribbon(data = occupancy2,
              aes(ymin = quantlower, ymax = quantupper),
              alpha = 0.1, 
              show.legend = FALSE) + 
  geom_line() +
  scale_linetype_manual(values = c("solid", "dashed")) +  
  geom_point() + 
  labs(x = 'Year', y = 'Number of occupied sites')

Now display local extinction, colonization and species detection probabilities estimates with credible intervals:

epsmean <- icloglog(mean(out[,'mueps']) + mean(offset))
epsql <- icloglog(mean(out[,'mueps']) + mean(offset) - 2*sd(out[,'mueps']))
epsqu <- icloglog(mean(out[,'mueps']) + mean(offset) + 2*sd(out[,'mueps']))

gammean <- icloglog(mean(out[,'mugam']) + mean(offset))
gamql <- icloglog(mean(out[,'mugam']) + mean(offset) - 2*sd(out[,'mugam']))
gamqu <- icloglog(mean(out[,'mugam']) + mean(offset) + 2*sd(out[,'mugam']))

pmean1 <- plogis(mean(out[,'mup[1]']))
pmean2 <- plogis(mean(out[,'mup[2]']))
pqu1 <- plogis(mean(out[,'mup[1]']) + 2*sd(out[,'mup[1]']))
pql1 <- plogis(mean(out[,'mup[1]']) - 2*sd(out[,'mup[1]']))
pqu2 <- plogis(mean(out[,'mup[2]']) + 2*sd(out[,'mup[2]']))
pql2 <- plogis(mean(out[,'mup[2]']) - 2*sd(out[,'mup[2]']))

eps <- data.frame(
  param = epsmean, 
  qlo = epsql,
  qup = epsqu)

gam <- data.frame(
  param = gammean, 
  qlo = gamql,
  qup = gamqu)

det <- data.frame(period = c(1,2),
  param = c(pmean1,pmean2),
  qlo = c(pql1,pql2),
  qup = c(pqu1,pqu2))

eps
gam
det

Let’s map the estimated distribution of monk seals. First, compute realized occupancy per site and per period, and put altogether in a big table:

zzz <- out[,grepl("z", colnames(out))]
zz <- array(NA, dim = c(nrow(zzz), ncol(zzz)/2, 2))
zz[,,1] <- zzz[1:nrow(zzz),1:(ncol(zzz)/2)]
zz[,,2] <- zzz[1:nrow(zzz),(ncol(zzz)/2+1):ncol(zzz)]
meanz <- apply(zz,c(2,3),mean)
dim(meanz)
## [1] 2850    2
toplot <- data.frame(
  meanz = c(meanz[,c(1,2)]), 
  UTM_NAME = rep(unique(grid$UTM_NAME),2), 
  yearr = c(rep('2000-2007',nrow(meanz)), rep('2013-2020',nrow(meanz)))) 
ring_occ <- left_join(grid,toplot)
nrow(meanz)
## [1] 2850
sum(meanz[,2] == meanz[,1])
## [1] 161
sum(meanz[,2] > meanz[,1])
## [1] 2437
sum(meanz[,2] < meanz[,1])
## [1] 252

Then, plot two maps, one for each period:

# ggplot() + 
#   geom_sf(data = greece, colour = "grey50", fill = "white") + 
#   geom_sf(data = grid, colour = "grey50", fill = "white", lwd = 0.1) + 
#   geom_sf(data = ring_occ, lwd = 0.1, aes(fill = as.numeric(meanz))) + 
#   geom_sf(data = greece, colour = "grey50", fill = "white",lwd = 0.2) + 
#   scale_fill_viridis(
#     name = 'Pr(occupancy)', 
#     direction = 1,
#     alpha = 0.7) + 
#   labs(title = '') +
#   xlab("") + ylab("") + 
#   facet_wrap(~ yearr, ncol = 2) + 
#   theme(legend.position = 'bottom', 
#         legend.title = element_text(size=8), 
#         legend.text = element_text(size=8),
#         legend.key.size = unit(0.5, "cm"))
ggplot() + 
  geom_sf(data = greece, colour = "grey50", fill = "white") + 
  geom_sf(data = grid, colour = "grey50", fill = "white", lwd = 0.1) + 
  geom_sf(data = ring_occ, lwd = 0.1, aes(fill = as.numeric(meanz))) + 
  geom_sf(data = greece, colour = "grey50", fill = "white",lwd = 0.2) + 
  scale_fill_distiller(
    name = 'Pr(occupancy)', 
    direction = 1,
    palette = "Purples") + 
  labs(title = '') +
  xlab("") + ylab("") + 
  facet_wrap(~ yearr, ncol = 2) + 
  theme(legend.position = 'bottom', 
        legend.title = element_text(size=8), 
        legend.text = element_text(size=8),
        legend.key.size = unit(0.5, "cm"))

ggsave(here::here("figures","map2.png"), dpi = 600)

We’d also like to have two maps of the estimated occupied sites for the two periods. To do so, we say that all squares with prob of occupancy greater than \(25\%\) are occupied. This gives the following maps:

zzz <- out[,grepl("z", colnames(out))]
zz <- array(NA, dim = c(nrow(zzz), ncol(zzz)/2, 2))
zz[,,1] <- zzz[1:nrow(zzz),1:(ncol(zzz)/2)]
zz[,,2] <- zzz[1:nrow(zzz),(ncol(zzz)/2+1):ncol(zzz)]
sumz <- apply(zz,c(2,3),sum)
meanz <- apply(zz,c(2,3),sum)/dim(zz)[1]*100
z_occupied <- (meanz > 25) # 
toplot <- data.frame(
  z_occupied = c(z_occupied[,c(1,2)]), 
  UTM_NAME = rep(unique(grid$UTM_NAME),2), 
  yearr = c(rep('2000-2007',nrow(z_occupied)), rep('2013-2020',nrow(z_occupied)))) 
ring_occ <- left_join(grid,toplot) %>%
  mutate(z_occupied = if_else(z_occupied == TRUE, 'used', 'unused by the species'))
ggplot() + 
  geom_sf(data = greece, colour = "grey50", fill = "white") + 
  geom_sf(data = ring_occ, lwd = 0.1, aes(fill = z_occupied)) + 
    geom_sf(data = greece, colour = "grey50", fill = "white",lwd = 0.2) + 
    scale_fill_manual(values = c('white','blue'),
                    name = "Site is") + 
  xlab("") + ylab("") +
  facet_wrap(~ yearr, ncol = 2) + 
  theme(legend.position = "none")

ggsave(here::here("figures","map3.png"), dpi = 600)

We’d also like to map the difference in occupancy probabilities.

delta <- as.matrix(meanz[,2]/100 - meanz[,1]/100)
ring_occ2 <- ring_occ %>%
  filter(yearr == '2013-2020')
ggplot() + 
  geom_sf(data = greece, colour = "grey50", fill = "white") + 
  geom_sf(data = grid, colour = "grey50", fill = "white", lwd = 0.1) + 
  geom_sf(data = ring_occ2, lwd = 0.1, aes(fill = delta)) + 
  geom_sf(data = greece, colour = "grey50", fill = "white",lwd = 0.2) + 
  scale_fill_gradientn(colours = RColorBrewer::brewer.pal(11,"RdYlGn"),
                       name = '') + 
  labs(title = '') +
  xlab("") + ylab("") + 
  theme(legend.position = 'bottom', 
        legend.title = element_text(size=8), 
        legend.text = element_text(size=8),
        legend.key.size = unit(0.5, "cm"))

ggsave(here::here("figures","map4.png"), dpi = 600)

5 Intersect with protected areas

pa <- st_read(here::here("shapefiles","N2000_v31_marine_WGS84.shp"))
## Reading layer `N2000_v31_marine_WGS84' from data source 
##   `/Users/oliviergimenez/Dropbox/OG/GITHUB/monkseal-occupancy/shapefiles/N2000_v31_marine_WGS84.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 174 features and 5 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 19.35198 ymin: 34.79367 xmax: 29.64895 ymax: 41.25424
## Geodetic CRS:  WGS 84
pa <- st_transform(pa,2100)
sum(rapply(st_geometry(pa), nrow))
## [1] 954842
pa <- st_simplify(pa,dTolerance=50)
sum(rapply(st_geometry(pa), nrow))
## [1] 43195
pa %>% 
  mutate(area = st_area(.),
         areakm2 = units::set_units(area, km^2)) %>%
  summarize(total = sum(areakm2))

Let’s have a look to the map of marine protected areas:

ggplot() + 
  geom_sf(data = greece, colour = "grey50", fill = "grey50") + 
  geom_sf(data = pa, colour = "red", fill = "transparent", lwd = 0.1) 

ggsave(here::here("figures","mpa.png"), dpi = 600)

Look at where the high probability of presence on monk seals pups coincides with marine protected areas, for the period 2013-2020.

ggplot() + 
  geom_sf(data = greece, colour = "grey50", fill = "white") + 
  geom_sf(data = ring_occ2, lwd = 0.1, aes(fill = z_occupied)) +
  geom_sf(data = greece, colour = "grey50", fill = "white",lwd = 0.2) + 
    scale_fill_manual(values = c('white','blue'),
                    name = "Site is") + 
  geom_sf(data = pa, colour = "red", fill = "transparent", lwd = 0.1) +
  xlab("") + ylab("") +
  theme(legend.position = "none")

ggsave(here::here("figures","mpamonkseals.png"), dpi = 600)

Out of the total number of cells with high probability of occurrence of monk seals, this number intersects with marine protected areas.

mask <- ring_occ %>%
  filter(z_occupied == 'used') %>%
  filter(yearr == '2013-2020') %>%
  st_intersects(pa) %>%
  lengths > 0
sum(mask)
## [1] 299
ring_occ2$map <- ring_occ2$z_occupied
mask2 <- (ring_occ2$map == 'used')
ring_occ2$map[mask2][mask] <- 'used, overlap w/ mpa'
ring_occ2$map[mask2][!mask] <- 'used, no overlap w/ mpa'

On a map, this gives.

ggplot() + 
  geom_sf(data = greece, colour = "grey50", fill = "white") + 
  geom_sf(data = ring_occ2, lwd = 0.1, aes(fill = map)) +
  geom_sf(data = greece, colour = "grey50", fill = "white",lwd = 0.2) + 
    scale_fill_manual(values = c('white','blue','red'),
                    name = "Site is") + 
  geom_sf(data = pa, colour = "red", fill = "transparent", lwd = 0.1) +
  xlab("") + ylab("") +
  theme(legend.position = "bottom")

ggsave(here::here("figures","mpamonksealsdetails.png"), dpi = 600)

6 Focus on pups

We do the same analyses with pups.

monkseal <- monkseal %>%
  filter(stage=='PUP0' | stage=='PUP1' | stage=='PUP2' | stage=='PUP3' | stage=='PUP4')

The number of sightings.

monkseal %>% 
  ggplot() +
  aes(x = year) + 
  geom_bar()

The sightings are done along the year (all years are pooled together). October gets maximum sightings.

monkseal %>% 
  ggplot() +
  aes(x = factor(month)) + 
  geom_bar() + 
  labs(x = NULL)

Regarding the observers, we see that the sightings are mostly done by local people, then to a lesser extent tourists, and a few others.

monkseal %>% 
  filter(!is.na(observer)) %>%
  count(observer) %>%
  ggplot() +
  aes(x = n, y = fct_reorder(observer, n)) %>%
  geom_col() +
  labs(y = NULL, x = NULL)

Last, regarding where the seals were when they were spotted, we see that the sightings are mostly on beach (f).

monkseal %>% 
  filter(!is.na(where)) %>%
  count(where) %>%
  ggplot() +
  aes(x = n, y = fct_reorder(where, n)) %>%
  geom_col() +
  labs(y = NULL, x = NULL)

Map counts.

counts <- monkseal %>%
  group_by(utm) %>%
  count()
names(counts) <- c('UTM_NAME','val')
grid_wcounts <- right_join(grid, counts)
classes <- classInt::classIntervals(grid_wcounts$val, n = 3, style = "jenks")
bwr <- grDevices::colorRampPalette(rev(c('red', 'orange', 'yellow', 'cyan', 'blue')))
grid_wcounts <- grid_wcounts %>%
  mutate(valdiscrete = cut(val, classes$brks, include.lowest = T))
ggplot() + 
  geom_sf(data = greece, colour = "grey50", fill = "white") + 
  geom_sf(data = grid, colour = "grey50", fill = "white", lwd = 0.1) + 
  geom_sf(data = grid_wcounts, lwd = 0.1, aes(fill = valdiscrete)) + 
  geom_sf(data = greece, colour = "grey50", fill = "white",lwd = 0.2) + 
  scale_fill_manual(
    name = 'counts',
    values = bwr(5),
    guide = guide_legend(reverse = TRUE)) + 
  labs(title = '', x = NULL, y = NULL)

ggsave(here::here("figures","map1pups.png"),dpi=600)

Format data for occupancy analyses.

mm <- c('Sep','Oct','Nov','Dec','Jan') # extract data
indyear <- c(2000:2007,2013:2020)
ids <- unique(grid$UTM_NAME)
mom_detections <- list()
for (k in 1:length(indyear)){
  mom_detections[[k]] <- matrix(0,nrow=length(ids),ncol=length(mm))
  data <- filter(monkseal,year==indyear[k])
  ind <- 1
  for (i in mm){
    temp <- filter(data, month==i)
    utm_temp <- unique(temp$utm)
    for (j in utm_temp){
      # 1 if a sighting was made in square j in month i
      mom_detections[[k]][ids==j,ind] <- 1
    }
    ind <- ind + 1
  }
}
mom_det <- do.call(cbind, mom_detections) # bind data from all year in columns
# convert the occupancy dataset in a 3D array:
y <- list()
ind <- 0
for (i in 1:length(indyear)){
    mask <- (ind + i):(ind + i + length(mm) - 1)
    y[[i]] <- mom_det[,mask]
    ind <- ind + length(mm) - 1
}

# convert list into array (https://stackoverflow.com/questions/37433509/convert-list-to-a-matrix-or-array)
y <- array(unlist(y), dim = c(nrow(y[[1]]), ncol(y[[1]]), length(y)))
dim(y)
## [1] 2850    5   16
new_y <- NULL
for (i in 1:dim(y)[3]){ # loop over years
  new_y <- cbind(new_y,apply(y[,1:5,i],1,sum))
}
new_y <- (new_y > 0)
y1 <- apply(new_y[,1:8],1,sum)
y2 <- apply(new_y[,9:16],1,sum)
y <- cbind(y1,y2)
dim(y)
## [1] 2850    2
range(y)
## [1] 0 5

Load nimble package:

library(nimble)

We specify our model:

model <- nimbleCode({

  # occupancy
  mupsi ~ dnorm(-5,sd = 1)
  for(i in 1:nsite){
    cloglog(psi[i]) <- mupsi + phi[i] + offset[i]
  }

  # ICAR
  phi[1:nsite] ~ dcar_normal(adj[1:L],
                             weights[1:L], 
                             num[1:nsite], 
                             tau.phi, 
                             zero_mean = 1) 
  
  tau.phi ~ dgamma(1, 0.01) # precision of the ICAR component
  sigma2.phi <- 1/tau.phi # variance of the ICAR component
  
  # detection
  sdp ~ dunif(0,10)
  for(i in 1:nyear){
    muptemp[i] ~ dunif(0,1)
    logit(mup[i]) <- muptemp[i]
  }
  for(i in 1:nsite){
    lp[i] ~ dnorm(0, sd = sdp)
    for(t in 1:nyear){
      logit(p[i,t]) <- mup[t] + lp[i]
    }
  }
  
  # priors for mean extinction/colonization
  # preliminary analyses show no heterogeneity in these parameters
  mueps ~ dnorm(-5, sd = 1)
  mugam ~ dnorm(-5, sd = 1)
  
  # process model
  for(i in 1:nsite){
    z[i,1] ~ dbern(psi[i])
    cloglog(gamma[i]) <- mugam + offset[i]
    cloglog(eps[i]) <- mueps + offset[i]
    z[i,2] ~ dbern(z[i,1] * (1 - eps[i]) + (1 - z[i,1]) * gamma[i])
  }
  
  # observation model
  for (i in 1:nsite){
    for (k in 1:nyear){
      y[i,k] ~ dbin(z[i,k] * p[i,k],8)
    }
  }
})

Specify data, initial values, parameters to be monitored and various MCMC details:

# data
offset <- log(as.numeric(grid$areakm2))
win.data <- list(y = y)
neighbours <- spdep::poly2nb(pl = grid)
weigths <- spdep::nb2WB(nb = neighbours)
win.constants <- list(nsite = dim(y)[1], 
                      nyear = dim(y)[2], 
                      offset = offset,
                      L = length(weigths$weights),        
                      adj = weigths$adj,
                      num = weigths$num,
                      weights = weigths$weights)

# initial values
zst <- cbind(as.numeric(y1 > 0), as.numeric(y2 > 0)) # observed occurrence as inits for z 
inits <- function() {list(z = zst, 
                          mueps = -4, 
                          mugam = -4, 
                          mupsi = -4,
                          sdp = runif(1,1,9), 
                          muptemp = rep(0,dim(y)[1]),
                          eps = rep(0.5,dim(y)[1]),
                          gamma = rep(0.5,dim(y)[1]),
                          psi = rep(0.5,dim(y)[1]),
                          phi = rep(0,dim(y)[1]),
                          lp = rep(0,dim(y)[1]),
                          mup = rep(0,dim(y)[2]),
                          tau.phi = .1)}

# MCMC settings
ni <- 50000
nb <- 10000
nc <- 2

Create model as a R object:

survival <- nimbleModel(code = model,
                        data = win.data,
                        constants = win.constants,
                        inits = inits(),
                        calculate = FALSE)
#survival$initializeInfo()
#survival$calculate()

Go through nimble workflow:

# compile model
Csurvival <- compileNimble(survival)

# create a MCMC configuration
survivalConf <- configureMCMC(survival, 
                              useConjugacy = FALSE)

# replace RW samplers by slice sampler for standard deviation of random effects on detection
survivalConf$removeSamplers(c('sdp'))
survivalConf$addSampler(target = c('sdp'),
                        type = 'slice')

survivalConf$removeSamplers(c('tau.phi'))
survivalConf$addSampler(target = c('tau.phi'),
                        type = 'slice')

# add some parameters to monitor
survivalConf$addMonitors(c("lp","z","mupsi","mup"))

# create MCMC function and compile it
survivalMCMC <- buildMCMC(survivalConf)
CsurvivalMCMC <- compileNimble(survivalMCMC, project = survival)

Run nimble:

ptm <- proc.time()
out <- runMCMC(mcmc = CsurvivalMCMC,
               niter = ni,
               nburnin = nb,
               nchains = nc,
               thin = 5)
x <- proc.time() -  ptm
save(out, x, file = here::here("models","monksealscloglogpups-nimble.RData"))

The code above takes some time to run. I ran it, saved the results and use them from here:

load(here::here("models","monksealscloglogpups-nimble.RData"))
out_backup <- out

Print results:

out <- rbind(out$chain1, out$chain2)
out %>%
  as_tibble() %>%
  pivot_longer(cols = everything(),  values_to = "value", names_to = "parameter") %>%
  filter(str_detect(parameter, "mu") | str_detect(parameter, "sd")) %>%
  filter(str_detect(parameter, "muptemp", negate = TRUE)) %>%
  group_by(parameter) %>%
  summarize(median = median(value),
            lci = quantile(value, probs = 2.5/100),
            uci = quantile(value, probs = 97.5/100))

Check out the number of occupied UTM squares by the species over time. First work out the naive occupancy, that is the number of sites that were observed occupied over the two periods:

naiveocc <- rep(NA,2) 
for (i in 1:2){
  naiveocc[i] <- sum(y[,i]>0)
}
naiveocc
## [1] 30 68

Now the estimated occupancy:

zzz <- out[,grepl("z", colnames(out))]
zz <- array(NA, dim = c(nrow(zzz), ncol(zzz)/2, 2))
zz[,,1] <- zzz[1:nrow(zzz),1:(ncol(zzz)/2)]
zz[,,2] <- zzz[1:nrow(zzz),(ncol(zzz)/2+1):ncol(zzz)]
dim(zz) # 2000 x 2850 x 2 (nbMCMC x nSquares x nyears)
## [1] 16000  2850     2
nbpixocc <- apply(zz,c(1,3),sum) # 2000 x 2
meannbpixocc <- apply(nbpixocc,2,median)
sdnbpixocc <- apply(nbpixocc,2,sd)
qulnbpixocc <- apply(nbpixocc,2,quantile,probs=2.5/100)
quunbpixocc <- apply(nbpixocc,2,quantile,probs=97.5/100)

Put everything together:

occupancy <- data.frame(period = c(1,2),
  naiveocc = naiveocc, 
  medianocc = meannbpixocc,
  qulnbpixocc = qulnbpixocc,
  quunbpixocc = quunbpixocc)
occupancy2 <- data.frame(period = rep(c(1,2),2),
  occ = c(occupancy$naiveocc, occupancy$medianocc), 
  quantlower = c(rep(NA,2),occupancy$qulnbpixocc),
  quantupper = c(rep(NA,2), occupancy$quunbpixocc),
  Occupancy = c(rep('naive',2),rep('estimated',2)))
occupancy2

Now plot:

occupancy2 %>% 
  ggplot() + 
  aes(x = period, y =  occ, group = Occupancy, linetype = Occupancy) + 
    geom_ribbon(data=occupancy2,aes(ymin=quantlower,ymax=quantupper),alpha=0.1, show.legend = FALSE) + 
 geom_line() +
  scale_linetype_manual(values=c("solid", "dashed")) +  
  geom_point() + 
  xlab('Year') + 
  ylab('Number of occupied sites')

Now display local extinction, colonization and species detection probabilities estimates with credible intervals:

epsmean <- icloglog(mean(out[,'mueps']) + mean(offset))
epsql <- icloglog(mean(out[,'mueps']) + mean(offset) - 2*sd(out[,'mueps']))
epsqu <- icloglog(mean(out[,'mueps']) + mean(offset) + 2*sd(out[,'mueps']))

gammean <- icloglog(mean(out[,'mugam']) + mean(offset))
gamql <- icloglog(mean(out[,'mugam']) + mean(offset) - 2*sd(out[,'mugam']))
gamqu <- icloglog(mean(out[,'mugam']) + mean(offset) + 2*sd(out[,'mugam']))

pmean1 <- plogis(mean(out[,'mup[1]']))
pmean2 <- plogis(mean(out[,'mup[2]']))
pqu1 <- plogis(mean(out[,'mup[1]']) + 2*sd(out[,'mup[1]']))
pql1 <- plogis(mean(out[,'mup[1]']) - 2*sd(out[,'mup[1]']))
pqu2 <- plogis(mean(out[,'mup[2]']) + 2*sd(out[,'mup[2]']))
pql2 <- plogis(mean(out[,'mup[2]']) - 2*sd(out[,'mup[2]']))

eps <- data.frame(
  param = epsmean, 
  qlo = epsql,
  qup = epsqu)

gam <- data.frame(
  param = gammean, 
  qlo = gamql,
  qup = gamqu)

det <- data.frame(period = c(1,2),
  param = c(pmean1,pmean2),
  qlo = c(pql1,pql2),
  qup = c(pqu1,pqu2))

eps
gam
det

Let’s map the estimated distribution of monk seals. First, compute realized occupancy per site and per period, and put altogether in a big table:

zzz <- out[,grepl("z", colnames(out))]
zz <- array(NA, dim = c(nrow(zzz), ncol(zzz)/2, 2))
zz[,,1] <- zzz[1:nrow(zzz),1:(ncol(zzz)/2)]
zz[,,2] <- zzz[1:nrow(zzz),(ncol(zzz)/2+1):ncol(zzz)]
meanz <- apply(zz,c(2,3),mean)
dim(meanz)
## [1] 2850    2
toplot <- data.frame(
  meanz = c(meanz[,c(1,2)]), 
  UTM_NAME = rep(unique(grid$UTM_NAME),2), 
  yearr = c(rep('2000-2007',nrow(meanz)), rep('2013-2020',nrow(meanz)))) 
ring_occ <- left_join(grid,toplot)
nrow(meanz)
## [1] 2850
sum(meanz[,2] == meanz[,1])
## [1] 13
sum(meanz[,2] > meanz[,1])
## [1] 2819
sum(meanz[,2] < meanz[,1])
## [1] 18

Then, plot two maps, one for each multi-year period:

ggplot() + 
  geom_sf(data = greece, colour = "grey50", fill = "white") + 
  geom_sf(data = grid, colour = "grey50", fill = "white", lwd = 0.1) + 
  geom_sf(data = ring_occ, lwd = 0.1, aes(fill = as.numeric(meanz))) + 
  geom_sf(data = greece, colour = "grey50", fill = "white",lwd = 0.2) + 
  scale_fill_distiller(
    name = 'Pr(occupancy)', 
    direction = 1,
    palette = "Purples") + 
  labs(title = '', x = NULL, y = NULL) +
  facet_wrap(~ yearr, ncol = 2) + 
  theme(legend.position = 'bottom', 
        legend.title = element_text(size=8), 
        legend.text = element_text(size=8),
        legend.key.size = unit(0.5, "cm"))

ggsave(here::here("figures","map2pups.png"),dpi=600)

We’d also like to have two maps of the estimated occupied sites for the two periods. To do so, we say that all squares with prob of occupancy greater than \(25\%\) are occupied. This gives the following maps:

zzz <- out[,grepl("z", colnames(out))]
zz <- array(NA, dim = c(nrow(zzz), ncol(zzz)/2, 2))
zz[,,1] <- zzz[1:nrow(zzz),1:(ncol(zzz)/2)]
zz[,,2] <- zzz[1:nrow(zzz),(ncol(zzz)/2+1):ncol(zzz)]
sumz <- apply(zz,c(2,3),sum)
meanz <- apply(zz,c(2,3),sum)/dim(zz)[1]*100
z_occupied <- (meanz > 25) # 
toplot <- data.frame(
  z_occupied = c(z_occupied[,c(1,2)]), 
  UTM_NAME = rep(unique(grid$UTM_NAME),2), 
  yearr = c(rep('2000-2007',nrow(z_occupied)), rep('2013-2020',nrow(z_occupied)))) 
ring_occ <- left_join(grid,toplot) %>%
  mutate(z_occupied = if_else(z_occupied == TRUE, 'used', 'unused by the species'))

ggplot() + 
  geom_sf(data = greece, colour = "grey50", fill = "white") + 
  geom_sf(data = ring_occ, lwd = 0.1, aes(fill = z_occupied)) + 
    geom_sf(data = greece, colour = "grey50", fill = "white",lwd = 0.2) + 
    scale_fill_manual(values = c('white','blue'),
                    name = "Site is") + 
  labs(x = NULL, y = NULL) +
  facet_wrap(~ yearr, ncol = 2) + 
  theme(legend.position = "none")

ggsave(here::here("figures","map3pups.png"),dpi=600)

We’d like to consider the difference in occupancy probabilities.

delta <- as.matrix(meanz[,2]/100 - meanz[,1]/100)
ring_occ2 <- ring_occ %>%
  filter(yearr == '2013-2020')
ggplot() + 
  geom_sf(data = greece, colour = "grey50", fill = "white") + 
  geom_sf(data = grid, colour = "grey50", fill = "white", lwd = 0.1) + 
  geom_sf(data = ring_occ2, lwd = 0.1, aes(fill = delta)) + 
  geom_sf(data = greece, colour = "grey50", fill = "white",lwd = 0.2) + 
  scale_fill_gradientn(colours = RColorBrewer::brewer.pal(15,"RdYlGn"),
                       name = '') + 
  labs(title = '', x = NULL, y = NULL) + 
  theme(legend.position = 'bottom', 
        legend.title = element_text(size=8), 
        legend.text = element_text(size=8),
        legend.key.size = unit(0.5, "cm"))

ggsave(here::here("figures","map4pups.png"),dpi=600)

Look at where the high probability of presence on monk seals pups coincides with marine protected areas, for the period 2013-2020.

ggplot() + 
  geom_sf(data = greece, colour = "grey50", fill = "white") + 
  geom_sf(data = ring_occ2, lwd = 0.1, aes(fill = z_occupied)) +
  geom_sf(data = greece, colour = "grey50", fill = "white",lwd = 0.2) + 
    scale_fill_manual(values = c('white','blue'),
                    name = "Site is") + 
  geom_sf(data = pa, colour = "red", fill = "transparent", lwd = 0.1) +
  xlab("") + ylab("") +
  theme(legend.position = "none")

ggsave(here::here("figures","mpamonksealspups.png"),dpi=600)

Out of the total number of cells with high probability of occurrence of monk seals pups, this number intersects with marine protected areas.

mask <- ring_occ %>%
  filter(z_occupied == 'used') %>%
  filter(yearr == '2013-2020') %>%
  st_intersects(pa) %>%
  lengths > 0
sum(mask)
## [1] 62
ring_occ2$map <- ring_occ2$z_occupied
mask2 <- (ring_occ2$map == 'used')
ring_occ2$map[mask2][mask] <- 'used, overlap w/ mpa'
ring_occ2$map[mask2][!mask] <- 'used, no overlap w/ mpa'

On a map, this gives.

ggplot() + 
  geom_sf(data = greece, colour = "grey50", fill = "white") + 
  geom_sf(data = ring_occ2, lwd = 0.1, aes(fill = map)) +
  geom_sf(data = greece, colour = "grey50", fill = "white",lwd = 0.2) + 
    scale_fill_manual(values = c('white','blue','red'),
                    name = "Site is") + 
  geom_sf(data = pa, colour = "red", fill = "transparent", lwd = 0.1) +
  xlab("") + ylab("") +
  theme(legend.position = "bottom")

ggsave(here::here("figures","mpamonksealsdetailspups.png"),dpi=600)