Nasua nasua Model Run

Author

Florencia Grattarola

Published

July 30, 2023

Model run for Nasua nasua with the data prepared in the previous step (including species and covariates data).

R Libraries

library(sf)
library(R2jags) # interface to JAGS
library(tidyverse)

Data

# Presence-absence data
nnasua_expert_blob_time1 <- readRDS('data/nnasua_expert_blob_time1.rds')
nnasua_expert_blob_time2 <- readRDS('data/nnasua_expert_blob_time2.rds')

PA_time1 <- readRDS('data/data_nnasua_PA_time1.rds') %>%
  cbind(expert=nnasua_expert_blob_time1) %>%
  filter(!is.na(env.bio_10) &!is.na(env.bio_13) & !is.na(env.npp) & !is.na(env.nontree) & !is.na(expert)) # remove NA's
PA_time2 <- readRDS('data/data_nnasua_PA_time2.rds') %>%
  cbind(expert=nnasua_expert_blob_time2) %>% 
  filter(!is.na(env.bio_10) &!is.na(env.bio_13) & !is.na(env.npp) & !is.na(env.nontree) & !is.na(expert)) # remove NA's

# Presence-only data
nnasua_expert_gridcell <- readRDS('data/nnasua_expert_gridcell.rds')

PO_time1 <- readRDS('data/data_nnasua_PO_time1.rds') %>%
  cbind(expert=nnasua_expert_gridcell$dist_exprt) %>% 
  filter(!is.na(env.bio_10) &!is.na(env.bio_13) & !is.na(env.npp) & !is.na(env.nontree) & !is.na(acce) & !is.na(count) & !is.na(expert)) # remove NA's
PO_time2  <- readRDS('data/data_nnasua_PO_time2.rds')  %>%
  cbind(expert=nnasua_expert_gridcell$dist_exprt) %>% 
  filter(!is.na(env.bio_10) &!is.na(env.bio_13) & !is.na(env.npp) & !is.na(env.nontree) & !is.na(acce) & !is.na(count) & !is.na(expert)) # remove NA's

PA_time1_time2 <- rbind(PA_time1 %>% mutate(time=1), PA_time2 %>% mutate(time=2)) 
PO_time1_time2 <- rbind(PO_time1 %>% mutate(time=1), PO_time2 %>% mutate(time=2))

Splines

Set the k parameter: the number of basis functions.
Set the model formula.
Fit the GAM model.

Code

k = 6

gam.formula <- formula(count ~ env.bio_10 + 
                               env.bio_13 + 
                               env.npp + 
                               env.nontree +
                               expert +
                               offset(log(area)) +
                               s(X, Y, by=as.factor(time), k=k))

model.gam <- mgcv::gam(gam.formula, 
                       data = PO_time1_time2,
                       family = poisson)

summary(model.gam)


Family: poisson 
Link function: log 

Formula:
count ~ env.bio_10 + env.bio_13 + env.npp + env.nontree + expert + 
    offset(log(area)) + s(X, Y, by = as.factor(time), k = k)

Parametric coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept) -20.84803    0.33203 -62.790  < 2e-16 ***
env.bio_10    0.19990    0.03730   5.359 8.38e-08 ***
env.bio_13   -0.49783    0.09523  -5.228 1.72e-07 ***
env.npp       0.60349    0.04525  13.337  < 2e-16 ***
env.nontree   0.33929    0.05453   6.222 4.92e-10 ***
expert      -46.80762    2.19666 -21.309  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Approximate significance of smooth terms:
                          edf Ref.df Chi.sq p-value    
s(X,Y):as.factor(time)1 4.966  4.999  234.1  <2e-16 ***
s(X,Y):as.factor(time)2 4.978  5.000 1523.5  <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

R-sq.(adj) =  0.0442   Deviance explained = 33.7%
UBRE =  1.523  Scale est. = 1         n = 4360

Get the jagam splines values: using mgcv::jagam.

Code

jagam.out <- mgcv::jagam(gam.formula, 
                         data = PO_time1_time2,
                         family = poisson,
                         file='') # we will not need this file

model {
  eta <- X %*% b + offset ## linear predictor
  for (i in 1:n) { mu[i] <-  exp(eta[i]) } ## expected response
  for (i in 1:n) { y[i] ~ dpois(mu[i]) } ## response 
  ## Parametric effect priors CHECK tau=1/95^2 is appropriate!
  for (i in 1:6) { b[i] ~ dnorm(0,0.00011) }
  ## prior for s(X,Y):as.factor(time)1... 
  K1 <- S1[1:5,1:5] * lambda[1]  + S1[1:5,6:10] * lambda[2]
  b[7:11] ~ dmnorm(zero[7:11],K1) 
  ## prior for s(X,Y):as.factor(time)2... 
  K2 <- S2[1:5,1:5] * lambda[3]  + S2[1:5,6:10] * lambda[4]
  b[12:16] ~ dmnorm(zero[12:16],K2) 
  ## smoothing parameter priors CHECK...
  for (i in 1:4) {
    lambda[i] ~ dgamma(.05,.005)
    rho[i] <- log(lambda[i])
  }
}

Code

smooth1 <- jagam.out$pregam$smooth[[1]]
smooth2 <- jagam.out$pregam$smooth[[2]]

# presence-only data
jagam.PO.time1 <- mgcv::PredictMat(object = smooth1, data = PO_time1_time2)
jagam.PO.time2 <- mgcv::PredictMat(object = smooth2, data = PO_time1_time2)

# presence-absence data
jagam.PA.time1 <- mgcv::PredictMat(object = smooth1, data = PA_time1_time2)
jagam.PA.time2 <- mgcv::PredictMat(object = smooth2, data = PA_time1_time2)

Prepare data for JAGS

Code

PA.X <- cbind('(Intercept)'= 1, 
              'env.bio_10' = PA_time1_time2$env.bio_10,
              'env.bio_13' = PA_time1_time2$env.bio_13,
              'env.npp' = PA_time1_time2$env.npp,
              'env.nontree' = PA_time1_time2$env.nontree,
              'expert' = PA_time1_time2$expert,
              jagam.PA.time1, 
              jagam.PA.time2)

PO.X <- cbind('(Intercept)'= 1, 
              'env.bio_10' = PO_time1_time2$env.bio_10,
              'env.bio_13' = PO_time1_time2$env.bio_13,
              'env.npp' = PO_time1_time2$env.npp,
              'env.nontree' = PO_time1_time2$env.nontree,
              'expert' = PO_time1_time2$expert,
              jagam.PO.time1, 
              jagam.PO.time2)

# number of all columns in X
n.X = ncol(PA.X)

# number of columns in X of spline basis functions
n.spl = ncol(jagam.PA.time1)

# number of columns in X of env. predictors + intercept
n.par = n.X - ncol(jagam.PA.time1)*2

# number of factors of time in X 
n.fac = length(unique(as.factor(PO_time1_time2$time)))*2

# country as an indexed variable
PO.country  <- as.numeric(as.factor(PO_time1_time2$country))

#number of countries
n.country <- length(unique(PO.country))

#global effort time1/time2 (see Range_map_offset.qmd)
global.effort <- rep(0.2715625, nrow(PO_time1_time2))

# for AUC
thr <- seq(0, 1, 0.05)

jags.data <- list(n.PA = nrow(PA_time1_time2),
                  y.PA = PA_time1_time2$presabs, 
                  X.PA = PA.X,
                  area.PA = PA_time1_time2$area,
                  effort = PA_time1_time2$effort,
                  n.PO = nrow(PO_time1_time2),
                  n.PO.half = nrow(PO_time1_time2)/2,
                  y.PO = PO_time1_time2$count, 
                  X.PO = PO.X,
                  area.PO = PO_time1_time2$area, 
                  acce = PO_time1_time2$acce,
                  country = PO.country,
                  global.effort = global.effort,
                  n.X = n.X,
                  n.cntr = n.country,
                  n.par = n.par,
                  n.fac = n.fac,
                  n.spl = n.spl,
                  Z = rep(0, length(jagam.out$jags.data$zero)),
                  S.time1 = jagam.out$jags.data$S1,
                  S.time2 = jagam.out$jags.data$S2, 
                  thr = thr)  

#jags.data

Specify the model in BUGS language

Code

cat('model
  { 
    # PRIORS --------------------------------------------------
    
    ## Thinning at locations with complete accessibility in PO data
      
      # intercept of the decay function for each country of origin. 
      # It needs a flat prior between 0 and 1
      for (c in 1:n.cntr) 
      {
        alpha0[c] ~ dbeta(1, 1)  
      }

      # steepness of the decaying distance-P.ret relationship in PO data
      alpha1 ~ dgamma(0.5, 0.05)   
      
    ## Effect of sampling effort in PA data
      beta ~ dnorm(0, 0.01)
  
    ## Parametric effects of environment driving the point process intensity
     # (it also includes an intercept)
     
      for (r in 1:n.par)
      { 
        b[r] ~ dnorm(0,0.01) 
      }
      
    ## Splines (imported and adjusted form output of mgcv::jagam)
    
      ## prior for s(X,Y):as.factor(time)1 
      sigma.time1 <- S.time1[1:n.spl, 1:n.spl] * gamma[1]  + 
                     S.time1[1:n.spl, (n.spl + 1):(n.spl * 2)] * gamma[2]
      b[(n.par+1):(n.spl + n.par)] ~ dmnorm(Z[(n.par+1):(n.spl + n.par)], sigma.time1) 
     
      ## prior for s(X,Y):as.factor(time)2
      sigma.time2 <- S.time2[1:n.spl, 1:n.spl] * gamma[3]  + 
                     S.time2[1:n.spl, (n.spl + 1):(n.spl * 2)] * gamma[4]
      b[(n.X - n.spl + 1):(n.X)] ~ dmnorm(Z[(n.X - n.spl + 1):(n.X)], sigma.time2) 
     
      ## Priors for smoothing parameter 
      for (f in 1:n.fac) 
      {
        gamma[f] ~ dgamma(.5,.5)
        rho[f] <- log(gamma[f])
      }

    # LIKELIHOOD --------------------------------------------------
    
      ## --- Presence-Absence (PA) data ---
      
       eta.PA <- X.PA %*% b ## linear predictor
         
        
       for (i in 1:n.PA) 
       { 
         # the probability of presence
         cloglog(psi[i]) <- eta.PA[i] + log(area.PA[i]) + beta*log(effort[i])
        
         # presences and absences come from a Bernoulli distribution
         y.PA[i] ~ dbern(psi[i]*0.9999) 
       }
  
      ## --- Presence-Only (PO) data --- 
  
      eta.PO <- X.PO %*% b  ## linear predictor

      for (j in 1:n.PO)
      {
        # cell-specific probability of retainin (observing) a point is a function of accessibility
        P.ret[j] <- alpha0[country[j]] * exp( (-alpha1) * acce[j]) 
        
        # true mean number (nu) of points per cell i is the true intensity multiplied by cell area
        log(nu[j]) <- eta.PO[j] + log(area.PO[j])

        # thinning: the true lambda
        lambda[j] <- ifelse(j > n.PO.half,
                                nu[j] * P.ret[j],                    #time1
                                nu[j] * P.ret[j] * global.effort[j]) #time2

        # counts of observed points come from a Poisson distribution
        y.PO[j] ~ dpois(lambda[j]) 
      }
  
    # PREDICTIONS -------------------------------------------------

    eta.pred <- X.PO %*% b

    for (j in 1:n.PO)
    {
      # predicted probability of occurrence in grid cell j
      cloglog(P.pred[j]) <- eta.pred[j] + log(area.PO[j])
    }

    # POSTERIOR PREDICTIVE CHECK  --------------------------------
    
    # for PA
    for (i in 1:n.PA)
    {
      # Fit assessments: Tjur R-Squared (fit statistic for logistic regression)
      pres[i] <- ifelse(y.PA[i] > 0, psi[i], 0)
      absc[i] <- ifelse(y.PA[i] == 0, psi[i], 0)
    }
    
    # Discrepancy measures for entire PA data set
    pres.n <- sum(y.PA[] > 0)
    absc.n <- sum(y.PA[] == 0)
    r2_tjur <- abs(sum(pres[])/pres.n - sum(absc[])/absc.n)

    # for PO
    for (j in 1:n.PO)
    {
      # Fit assessments: Posterior predictive check and data for DHARMA
      y.PO.new[j] ~ dpois(lambda[j]) 

    }
    
    # AUC
    for (t in 1:length(thr)) {
        sens[t] <- sum((psi > thr[t]) && (y.PA==1))/pres.n   #tpr (sensitivity)
        spec[t] <- sum((psi < thr[t]) && (y.PA==0))/absc.n   
        fpr[t] <- 1 - spec[t]                                #fpr (1-specificity)
    }
    auc <- sum((sens[2:length(sens)]+sens[1:(length(sens)-1)])/2 * 
    -(fpr[2:length(fpr)] - fpr[1:(length(fpr)-1)]))

    # DERIVED QUANTITIES ------------------------------------------
    
    # area in each time period, and temporal change of area
    A.time1 <- sum(P.pred[1:n.PO.half])
    A.time2 <- sum(P.pred[(n.PO.half+1):n.PO])
    delta.A <- A.time2 - A.time1
    
    # uncertainty for the temporal change
    for (j in 1:n.PO.half)
    {
      delta.Grid[j] <- P.pred[n.PO.half+j] - P.pred[j]
    }
  }
', file = 'models/nnasua_model.txt')

Inits function

Code

jags.inits <- function(model = model.gam)
{
  return(list(b=rnorm(n.X, mean=model$coefficients, sd=1)))
}

Fit the model

Code

start.time <- Sys.time()

nnasua_model <- R2jags::jags(data=jags.data,
                                model.file='models/nnasua_model.txt',
                                parameters.to.save=c('b', 'P.pred', 
                                            'A.time1', 'A.time2', 'delta.A',
                                            'alpha0', 'alpha1', 'beta',
                                            'lambda', 'P.ret', 'psi',
                                            'y.PO.new', 'r2_tjur', 
                                            'auc', 'sens', 'fpr',
                                            'delta.Grid'),
                                inits = jags.inits,
                                n.chains=3,
                                n.iter=220000,
                                n.thin=10,
                                n.burnin=22000,
                                DIC = FALSE)

Compiling model graph
   Resolving undeclared variables
   Allocating nodes
Graph information:
   Observed stochastic nodes: 5358
   Unobserved stochastic nodes: 4400
   Total graph size: 180003

Initializing model

Code

end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken

Time difference of 17.17962 hours

Code

saveRDS(nnasua_model, 'D:/Flo/JAGS_models/nnasua_model.rds')