Eira barbara Model Run

Author

Florencia Grattarola

Published

July 4, 2023

Model run for Eira barbara with the data prepared in the previous step (including species and covariates data).

library(sf)
library(R2jags) # interface to JAGS
library(tidyverse)

Data

# Presence-absence data
ebarbara_expert_blob_time1 <- readRDS('data/ebarbara_expert_blob_time1.rds')
ebarbara_expert_blob_time2 <- readRDS('data/ebarbara_expert_blob_time2.rds')

PA_time1 <- readRDS('data/data_ebarbara_PA_time1.rds') %>%
  cbind(expert=ebarbara_expert_blob_time1) %>%
  filter(!is.na(env.bio_10) &!is.na(env.bio_17) & !is.na(env.npp) & !is.na(env.nontree) & !is.na(expert)) # remove NA's
PA_time2 <- readRDS('data/data_ebarbara_PA_time2.rds') %>%
  cbind(expert=ebarbara_expert_blob_time2) %>% 
  filter(!is.na(env.bio_10) &!is.na(env.bio_17) & !is.na(env.npp) & !is.na(env.nontree) & !is.na(expert)) # remove NA's

# Presence-only data
ebarbara_expert_gridcell <- readRDS('data/ebarbara_expert_gridcell.rds')

PO_time1 <- readRDS('data/data_ebarbara_PO_time1.rds') %>%
  cbind(expert=ebarbara_expert_gridcell$dist_exprt) %>% 
  filter(!is.na(env.bio_10) &!is.na(env.bio_17) & !is.na(env.npp) & !is.na(env.nontree) & !is.na(acce) & !is.na(count) & !is.na(expert)) # remove NA's
PO_time2  <- readRDS('data/data_ebarbara_PO_time2.rds')  %>%
  cbind(expert=ebarbara_expert_gridcell$dist_exprt) %>% 
  filter(!is.na(env.bio_10) &!is.na(env.bio_17) & !is.na(env.npp) & !is.na(env.nontree) & !is.na(acce) & !is.na(count) & !is.na(expert)) # remove NA's

PA_time1_time2 <- rbind(PA_time1 %>% mutate(time=1), PA_time2 %>% mutate(time=2)) 
PO_time1_time2 <- rbind(PO_time1 %>% mutate(time=1), PO_time2 %>% mutate(time=2)) 

Splines

  • Set the k parameter: the number of basis functions.
  • Set the model formula.
  • Fit the GAM model.
Code
k = 9

gam.formula <- formula(count ~ env.bio_10 + 
                               env.bio_17 + 
                               env.npp + 
                               env.nontree +
                               expert +
                               offset(log(area)) +
                               s(X, Y, by=as.factor(time), k=k))

model.gam <- mgcv::gam(gam.formula, 
                       data = PO_time1_time2,
                       family = poisson)

summary(model.gam)

Family: poisson 
Link function: log 

Formula:
count ~ env.bio_10 + env.bio_17 + env.npp + env.nontree + expert + 
    offset(log(area)) + s(X, Y, by = as.factor(time), k = k)

Parametric coefficients:
             Estimate Std. Error  z value Pr(>|z|)    
(Intercept) -25.19472    0.11437 -220.297  < 2e-16 ***
env.bio_10    0.25347    0.02550    9.940  < 2e-16 ***
env.bio_17   -0.34785    0.13781   -2.524   0.0116 *  
env.npp       0.78088    0.04351   17.948  < 2e-16 ***
env.nontree   0.24501    0.04958    4.942 7.75e-07 ***
expert      -23.08424    3.87910   -5.951 2.67e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Approximate significance of smooth terms:
                          edf Ref.df Chi.sq p-value    
s(X,Y):as.factor(time)1 7.576  7.951  127.8  <2e-16 ***
s(X,Y):as.factor(time)2 7.958  7.999  811.3  <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

R-sq.(adj) =  0.101   Deviance explained = 42.4%
UBRE = 0.72304  Scale est. = 1         n = 4360
  • Get the jagam splines values: using mgcv::jagam.
Code
jagam.out <- mgcv::jagam(gam.formula, 
                         data = PO_time1_time2,
                         family = poisson,
                         file='') # we will not need this file
model {
  eta <- X %*% b + offset ## linear predictor
  for (i in 1:n) { mu[i] <-  exp(eta[i]) } ## expected response
  for (i in 1:n) { y[i] ~ dpois(mu[i]) } ## response 
  ## Parametric effect priors CHECK tau=1/30^2 is appropriate!
  for (i in 1:6) { b[i] ~ dnorm(0,0.0011) }
  ## prior for s(X,Y):as.factor(time)1... 
  K1 <- S1[1:8,1:8] * lambda[1]  + S1[1:8,9:16] * lambda[2]
  b[7:14] ~ dmnorm(zero[7:14],K1) 
  ## prior for s(X,Y):as.factor(time)2... 
  K2 <- S2[1:8,1:8] * lambda[3]  + S2[1:8,9:16] * lambda[4]
  b[15:22] ~ dmnorm(zero[15:22],K2) 
  ## smoothing parameter priors CHECK...
  for (i in 1:4) {
    lambda[i] ~ dgamma(.05,.005)
    rho[i] <- log(lambda[i])
  }
}
Code
smooth1 <- jagam.out$pregam$smooth[[1]]
smooth2 <- jagam.out$pregam$smooth[[2]]

# presence-only data
jagam.PO.time1 <- mgcv::PredictMat(object = smooth1, data = PO_time1_time2)
jagam.PO.time2 <- mgcv::PredictMat(object = smooth2, data = PO_time1_time2)

# presence-absence data
jagam.PA.time1 <- mgcv::PredictMat(object = smooth1, data = PA_time1_time2)
jagam.PA.time2 <- mgcv::PredictMat(object = smooth2, data = PA_time1_time2)

Prepare data for JAGS

Code
PA.X <- cbind('(Intercept)'= 1, 
              'env.bio_10' = PA_time1_time2$env.bio_10,
              'env.bio_17' = PA_time1_time2$env.bio_17,
              'env.npp' = PA_time1_time2$env.npp,
              'env.nontree' = PA_time1_time2$env.nontree,
              'expert' = PA_time1_time2$expert,
              jagam.PA.time1, 
              jagam.PA.time2)

PO.X <- cbind('(Intercept)'= 1, 
              'env.bio_10' = PO_time1_time2$env.bio_10,
              'env.bio_17' = PO_time1_time2$env.bio_17,
              'env.npp' = PO_time1_time2$env.npp,
              'env.nontree' = PO_time1_time2$env.nontree,
              'expert' = PO_time1_time2$expert,
              jagam.PO.time1, 
              jagam.PO.time2)

# number of all columns in X
n.X = ncol(PA.X)

# number of columns in X of spline basis functions
n.spl = ncol(jagam.PA.time1)

# number of columns in X of env. predictors + intercept
n.par = n.X - ncol(jagam.PA.time1)*2

# number of factors of time in X 
n.fac = length(unique(as.factor(PO_time1_time2$time)))*2

# country as an indexed variable
PO.country  <- as.numeric(as.factor(PO_time1_time2$country))

#number of countries
n.country <- length(unique(PO.country))

#global effort time1/time2 (see Range_map_offset.qmd)
global.effort <- rep(0.2715625, nrow(PO_time1_time2))

# for AUC
thr <- seq(0, 1, 0.05)

jags.data <- list(n.PA = nrow(PA_time1_time2),
                  y.PA = PA_time1_time2$presabs, 
                  X.PA = PA.X,
                  area.PA = PA_time1_time2$area,
                  effort = PA_time1_time2$effort,
                  n.PO = nrow(PO_time1_time2),
                  n.PO.half = nrow(PO_time1_time2)/2,
                  y.PO = PO_time1_time2$count, 
                  X.PO = PO.X,
                  area.PO = PO_time1_time2$area, 
                  acce = PO_time1_time2$acce,
                  country = PO.country,
                  global.effort = global.effort,
                  n.X = n.X,
                  n.cntr = n.country,
                  n.par = n.par,
                  n.fac = n.fac,
                  n.spl = n.spl,
                  Z = rep(0, length(jagam.out$jags.data$zero)),
                  S.time1 = jagam.out$jags.data$S1,
                  S.time2 = jagam.out$jags.data$S2, 
                  thr = thr)  

#jags.data

Specify the model in BUGS language

Code
cat('model
  { 
    # PRIORS --------------------------------------------------
    
    ## Thinning at locations with complete accessibility in PO data
      
      # intercept of the decay function for each country of origin. 
      # It needs a flat prior between 0 and 1
      for (c in 1:n.cntr) 
      {
        alpha0[c] ~ dbeta(1, 1)  
      }

      # steepness of the decaying distance-P.ret relationship in PO data
      alpha1 ~ dgamma(0.5, 0.05)   
      
    ## Effect of sampling effort in PA data
      beta ~ dnorm(0, 0.01)
  
    ## Parametric effects of environment driving the point process intensity
     # (it also includes an intercept)
     
      for (r in 1:n.par)
      { 
        b[r] ~ dnorm(0,0.01) 
      }
      
    ## Splines (imported and adjusted form output of mgcv::jagam)
    
      ## prior for s(X,Y):as.factor(time)1 
      sigma.time1 <- S.time1[1:n.spl, 1:n.spl] * gamma[1]  + 
                     S.time1[1:n.spl, (n.spl + 1):(n.spl * 2)] * gamma[2]
      b[(n.par+1):(n.spl + n.par)] ~ dmnorm(Z[(n.par+1):(n.spl + n.par)], sigma.time1) 
     
      ## prior for s(X,Y):as.factor(time)2
      sigma.time2 <- S.time2[1:n.spl, 1:n.spl] * gamma[3]  + 
                     S.time2[1:n.spl, (n.spl + 1):(n.spl * 2)] * gamma[4]
      b[(n.X - n.spl + 1):(n.X)] ~ dmnorm(Z[(n.X - n.spl + 1):(n.X)], sigma.time2) 
     
      ## Priors for smoothing parameter 
      for (f in 1:n.fac) 
      {
        gamma[f] ~ dgamma(.5,.5)
        rho[f] <- log(gamma[f])
      }

    # LIKELIHOOD --------------------------------------------------
    
      ## --- Presence-Absence (PA) data ---
      
       eta.PA <- X.PA %*% b ## linear predictor
         
        
       for (i in 1:n.PA) 
       { 
         # the probability of presence
         cloglog(psi[i]) <- eta.PA[i] + log(area.PA[i]) + beta*log(effort[i])
        
         # presences and absences come from a Bernoulli distribution
         y.PA[i] ~ dbern(psi[i]*0.9999) 
       }
  
      ## --- Presence-Only (PO) data --- 
  
      eta.PO <- X.PO %*% b  ## linear predictor

      for (j in 1:n.PO)
      {
        # cell-specific probability of retainin (observing) a point is a function of accessibility
        P.ret[j] <- alpha0[country[j]] * exp( (-alpha1) * acce[j]) 
        
        # true mean number (nu) of points per cell i is the true intensity multiplied by cell area
        log(nu[j]) <- eta.PO[j] + log(area.PO[j])

        # thinning: the true lambda
        lambda[j] <- ifelse(j > n.PO.half,
                                nu[j] * P.ret[j],                    #time1
                                nu[j] * P.ret[j] * global.effort[j]) #time2

        # counts of observed points come from a Poisson distribution
        y.PO[j] ~ dpois(lambda[j]) 
      }
  
    # PREDICTIONS -------------------------------------------------

    eta.pred <- X.PO %*% b

    for (j in 1:n.PO)
    {
      # predicted probability of occurrence in grid cell j
      cloglog(P.pred[j]) <- eta.pred[j] + log(area.PO[j])
    }

    # POSTERIOR PREDICTIVE CHECK  --------------------------------
    
    # for PA
    for (i in 1:n.PA)
    {
      # Fit assessments: Tjur R-Squared (fit statistic for logistic regression)
      pres[i] <- ifelse(y.PA[i] > 0, psi[i], 0)
      absc[i] <- ifelse(y.PA[i] == 0, psi[i], 0)
    }
    
    # Discrepancy measures for entire PA data set
    pres.n <- sum(y.PA[] > 0)
    absc.n <- sum(y.PA[] == 0)
    r2_tjur <- abs(sum(pres[])/pres.n - sum(absc[])/absc.n)

    # for PO
    for (j in 1:n.PO)
    {
      # Fit assessments: Posterior predictive check and data for DHARMA
      y.PO.new[j] ~ dpois(lambda[j]) 

    }
    
    # AUC
    for (t in 1:length(thr)) {
        sens[t] <- sum((psi > thr[t]) && (y.PA==1))/pres.n   #tpr (sensitivity)
        spec[t] <- sum((psi < thr[t]) && (y.PA==0))/absc.n   
        fpr[t] <- 1 - spec[t]                                #fpr (1-specificity)
    }
    auc <- sum((sens[2:length(sens)]+sens[1:(length(sens)-1)])/2 * 
    -(fpr[2:length(fpr)] - fpr[1:(length(fpr)-1)]))

    # DERIVED QUANTITIES ------------------------------------------
    
    # area in each time period, and temporal change of area
    A.time1 <- sum(P.pred[1:n.PO.half])
    A.time2 <- sum(P.pred[(n.PO.half+1):n.PO])
    delta.A <- A.time2 - A.time1
    
    # uncertainty for the temporal change
    for (j in 1:n.PO.half)
    {
      delta.Grid[j] <- P.pred[n.PO.half+j] - P.pred[j]
    }
  }
', file = 'models/ebarbara_model.txt')

Inits function

Code
jags.inits <- function(model = model.gam)
{
  return(list(b=rnorm(n.X, mean=model$coefficients, sd=1)))
}

Fit the model

Code
start.time <- Sys.time()

ebarbara_model <- R2jags::jags(data=jags.data,
                                model.file='models/ebarbara_model.txt',
                                parameters.to.save=c('b', 'P.pred', 
                                            'A.time1', 'A.time2', 'delta.A',
                                            'alpha0', 'alpha1', 'beta',
                                            'lambda', 'P.ret', 'psi',
                                            'y.PO.new', 'r2_tjur', 
                                            'auc', 'sens', 'fpr',
                                            'delta.Grid'),
                                inits = jags.inits,
                                n.chains=3,
                                n.iter=100000,
                                n.thin=10,
                                n.burnin=10000,
                                DIC = FALSE)
Compiling model graph
   Resolving undeclared variables
   Allocating nodes
Graph information:
   Observed stochastic nodes: 5402
   Unobserved stochastic nodes: 4400
   Total graph size: 213779

Initializing model
Code
end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken
Time difference of 7.836536 hours
Code
saveRDS(ebarbara_model, 'D:/Flo/JAGS_models/ebarbara_model.rds')