library(sf)
library(R2jags) # interface to JAGS
library(tidyverse)
Eira barbara Model Run
Model run for Eira barbara with the data prepared in the previous step (including species and covariates data).
- R Libraries
Data
# Presence-absence data
<- readRDS('data/ebarbara_expert_blob_time1.rds')
ebarbara_expert_blob_time1 <- readRDS('data/ebarbara_expert_blob_time2.rds')
ebarbara_expert_blob_time2
<- readRDS('data/data_ebarbara_PA_time1.rds') %>%
PA_time1 cbind(expert=ebarbara_expert_blob_time1) %>%
filter(!is.na(env.bio_10) &!is.na(env.bio_17) & !is.na(env.npp) & !is.na(env.nontree) & !is.na(expert)) # remove NA's
<- readRDS('data/data_ebarbara_PA_time2.rds') %>%
PA_time2 cbind(expert=ebarbara_expert_blob_time2) %>%
filter(!is.na(env.bio_10) &!is.na(env.bio_17) & !is.na(env.npp) & !is.na(env.nontree) & !is.na(expert)) # remove NA's
# Presence-only data
<- readRDS('data/ebarbara_expert_gridcell.rds')
ebarbara_expert_gridcell
<- readRDS('data/data_ebarbara_PO_time1.rds') %>%
PO_time1 cbind(expert=ebarbara_expert_gridcell$dist_exprt) %>%
filter(!is.na(env.bio_10) &!is.na(env.bio_17) & !is.na(env.npp) & !is.na(env.nontree) & !is.na(acce) & !is.na(count) & !is.na(expert)) # remove NA's
<- readRDS('data/data_ebarbara_PO_time2.rds') %>%
PO_time2 cbind(expert=ebarbara_expert_gridcell$dist_exprt) %>%
filter(!is.na(env.bio_10) &!is.na(env.bio_17) & !is.na(env.npp) & !is.na(env.nontree) & !is.na(acce) & !is.na(count) & !is.na(expert)) # remove NA's
<- rbind(PA_time1 %>% mutate(time=1), PA_time2 %>% mutate(time=2))
PA_time1_time2 <- rbind(PO_time1 %>% mutate(time=1), PO_time2 %>% mutate(time=2)) PO_time1_time2
Splines
- Set the
k
parameter: the number of basis functions.
- Set the model formula.
- Fit the GAM model.
Code
= 9
k
<- formula(count ~ env.bio_10 +
gam.formula +
env.bio_17 +
env.npp +
env.nontree +
expert offset(log(area)) +
s(X, Y, by=as.factor(time), k=k))
<- mgcv::gam(gam.formula,
model.gam data = PO_time1_time2,
family = poisson)
summary(model.gam)
Family: poisson
Link function: log
Formula:
count ~ env.bio_10 + env.bio_17 + env.npp + env.nontree + expert +
offset(log(area)) + s(X, Y, by = as.factor(time), k = k)
Parametric coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -25.19472 0.11437 -220.297 < 2e-16 ***
env.bio_10 0.25347 0.02550 9.940 < 2e-16 ***
env.bio_17 -0.34785 0.13781 -2.524 0.0116 *
env.npp 0.78088 0.04351 17.948 < 2e-16 ***
env.nontree 0.24501 0.04958 4.942 7.75e-07 ***
expert -23.08424 3.87910 -5.951 2.67e-09 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Approximate significance of smooth terms:
edf Ref.df Chi.sq p-value
s(X,Y):as.factor(time)1 7.576 7.951 127.8 <2e-16 ***
s(X,Y):as.factor(time)2 7.958 7.999 811.3 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
R-sq.(adj) = 0.101 Deviance explained = 42.4%
UBRE = 0.72304 Scale est. = 1 n = 4360
- Get the jagam splines values: using
mgcv::jagam
.
Code
<- mgcv::jagam(gam.formula,
jagam.out data = PO_time1_time2,
family = poisson,
file='') # we will not need this file
model {
eta <- X %*% b + offset ## linear predictor
for (i in 1:n) { mu[i] <- exp(eta[i]) } ## expected response
for (i in 1:n) { y[i] ~ dpois(mu[i]) } ## response
## Parametric effect priors CHECK tau=1/30^2 is appropriate!
for (i in 1:6) { b[i] ~ dnorm(0,0.0011) }
## prior for s(X,Y):as.factor(time)1...
K1 <- S1[1:8,1:8] * lambda[1] + S1[1:8,9:16] * lambda[2]
b[7:14] ~ dmnorm(zero[7:14],K1)
## prior for s(X,Y):as.factor(time)2...
K2 <- S2[1:8,1:8] * lambda[3] + S2[1:8,9:16] * lambda[4]
b[15:22] ~ dmnorm(zero[15:22],K2)
## smoothing parameter priors CHECK...
for (i in 1:4) {
lambda[i] ~ dgamma(.05,.005)
rho[i] <- log(lambda[i])
}
}
Code
<- jagam.out$pregam$smooth[[1]]
smooth1 <- jagam.out$pregam$smooth[[2]]
smooth2
# presence-only data
<- mgcv::PredictMat(object = smooth1, data = PO_time1_time2)
jagam.PO.time1 <- mgcv::PredictMat(object = smooth2, data = PO_time1_time2)
jagam.PO.time2
# presence-absence data
<- mgcv::PredictMat(object = smooth1, data = PA_time1_time2)
jagam.PA.time1 <- mgcv::PredictMat(object = smooth2, data = PA_time1_time2) jagam.PA.time2
Prepare data for JAGS
Code
<- cbind('(Intercept)'= 1,
PA.X 'env.bio_10' = PA_time1_time2$env.bio_10,
'env.bio_17' = PA_time1_time2$env.bio_17,
'env.npp' = PA_time1_time2$env.npp,
'env.nontree' = PA_time1_time2$env.nontree,
'expert' = PA_time1_time2$expert,
jagam.PA.time1,
jagam.PA.time2)
<- cbind('(Intercept)'= 1,
PO.X 'env.bio_10' = PO_time1_time2$env.bio_10,
'env.bio_17' = PO_time1_time2$env.bio_17,
'env.npp' = PO_time1_time2$env.npp,
'env.nontree' = PO_time1_time2$env.nontree,
'expert' = PO_time1_time2$expert,
jagam.PO.time1,
jagam.PO.time2)
# number of all columns in X
= ncol(PA.X)
n.X
# number of columns in X of spline basis functions
= ncol(jagam.PA.time1)
n.spl
# number of columns in X of env. predictors + intercept
= n.X - ncol(jagam.PA.time1)*2
n.par
# number of factors of time in X
= length(unique(as.factor(PO_time1_time2$time)))*2
n.fac
# country as an indexed variable
<- as.numeric(as.factor(PO_time1_time2$country))
PO.country
#number of countries
<- length(unique(PO.country))
n.country
#global effort time1/time2 (see Range_map_offset.qmd)
<- rep(0.2715625, nrow(PO_time1_time2))
global.effort
# for AUC
<- seq(0, 1, 0.05)
thr
<- list(n.PA = nrow(PA_time1_time2),
jags.data y.PA = PA_time1_time2$presabs,
X.PA = PA.X,
area.PA = PA_time1_time2$area,
effort = PA_time1_time2$effort,
n.PO = nrow(PO_time1_time2),
n.PO.half = nrow(PO_time1_time2)/2,
y.PO = PO_time1_time2$count,
X.PO = PO.X,
area.PO = PO_time1_time2$area,
acce = PO_time1_time2$acce,
country = PO.country,
global.effort = global.effort,
n.X = n.X,
n.cntr = n.country,
n.par = n.par,
n.fac = n.fac,
n.spl = n.spl,
Z = rep(0, length(jagam.out$jags.data$zero)),
S.time1 = jagam.out$jags.data$S1,
S.time2 = jagam.out$jags.data$S2,
thr = thr)
#jags.data
Specify the model in BUGS language
Code
cat('model
{
# PRIORS --------------------------------------------------
## Thinning at locations with complete accessibility in PO data
# intercept of the decay function for each country of origin.
# It needs a flat prior between 0 and 1
for (c in 1:n.cntr)
{
alpha0[c] ~ dbeta(1, 1)
}
# steepness of the decaying distance-P.ret relationship in PO data
alpha1 ~ dgamma(0.5, 0.05)
## Effect of sampling effort in PA data
beta ~ dnorm(0, 0.01)
## Parametric effects of environment driving the point process intensity
# (it also includes an intercept)
for (r in 1:n.par)
{
b[r] ~ dnorm(0,0.01)
}
## Splines (imported and adjusted form output of mgcv::jagam)
## prior for s(X,Y):as.factor(time)1
sigma.time1 <- S.time1[1:n.spl, 1:n.spl] * gamma[1] +
S.time1[1:n.spl, (n.spl + 1):(n.spl * 2)] * gamma[2]
b[(n.par+1):(n.spl + n.par)] ~ dmnorm(Z[(n.par+1):(n.spl + n.par)], sigma.time1)
## prior for s(X,Y):as.factor(time)2
sigma.time2 <- S.time2[1:n.spl, 1:n.spl] * gamma[3] +
S.time2[1:n.spl, (n.spl + 1):(n.spl * 2)] * gamma[4]
b[(n.X - n.spl + 1):(n.X)] ~ dmnorm(Z[(n.X - n.spl + 1):(n.X)], sigma.time2)
## Priors for smoothing parameter
for (f in 1:n.fac)
{
gamma[f] ~ dgamma(.5,.5)
rho[f] <- log(gamma[f])
}
# LIKELIHOOD --------------------------------------------------
## --- Presence-Absence (PA) data ---
eta.PA <- X.PA %*% b ## linear predictor
for (i in 1:n.PA)
{
# the probability of presence
cloglog(psi[i]) <- eta.PA[i] + log(area.PA[i]) + beta*log(effort[i])
# presences and absences come from a Bernoulli distribution
y.PA[i] ~ dbern(psi[i]*0.9999)
}
## --- Presence-Only (PO) data ---
eta.PO <- X.PO %*% b ## linear predictor
for (j in 1:n.PO)
{
# cell-specific probability of retainin (observing) a point is a function of accessibility
P.ret[j] <- alpha0[country[j]] * exp( (-alpha1) * acce[j])
# true mean number (nu) of points per cell i is the true intensity multiplied by cell area
log(nu[j]) <- eta.PO[j] + log(area.PO[j])
# thinning: the true lambda
lambda[j] <- ifelse(j > n.PO.half,
nu[j] * P.ret[j], #time1
nu[j] * P.ret[j] * global.effort[j]) #time2
# counts of observed points come from a Poisson distribution
y.PO[j] ~ dpois(lambda[j])
}
# PREDICTIONS -------------------------------------------------
eta.pred <- X.PO %*% b
for (j in 1:n.PO)
{
# predicted probability of occurrence in grid cell j
cloglog(P.pred[j]) <- eta.pred[j] + log(area.PO[j])
}
# POSTERIOR PREDICTIVE CHECK --------------------------------
# for PA
for (i in 1:n.PA)
{
# Fit assessments: Tjur R-Squared (fit statistic for logistic regression)
pres[i] <- ifelse(y.PA[i] > 0, psi[i], 0)
absc[i] <- ifelse(y.PA[i] == 0, psi[i], 0)
}
# Discrepancy measures for entire PA data set
pres.n <- sum(y.PA[] > 0)
absc.n <- sum(y.PA[] == 0)
r2_tjur <- abs(sum(pres[])/pres.n - sum(absc[])/absc.n)
# for PO
for (j in 1:n.PO)
{
# Fit assessments: Posterior predictive check and data for DHARMA
y.PO.new[j] ~ dpois(lambda[j])
}
# AUC
for (t in 1:length(thr)) {
sens[t] <- sum((psi > thr[t]) && (y.PA==1))/pres.n #tpr (sensitivity)
spec[t] <- sum((psi < thr[t]) && (y.PA==0))/absc.n
fpr[t] <- 1 - spec[t] #fpr (1-specificity)
}
auc <- sum((sens[2:length(sens)]+sens[1:(length(sens)-1)])/2 *
-(fpr[2:length(fpr)] - fpr[1:(length(fpr)-1)]))
# DERIVED QUANTITIES ------------------------------------------
# area in each time period, and temporal change of area
A.time1 <- sum(P.pred[1:n.PO.half])
A.time2 <- sum(P.pred[(n.PO.half+1):n.PO])
delta.A <- A.time2 - A.time1
# uncertainty for the temporal change
for (j in 1:n.PO.half)
{
delta.Grid[j] <- P.pred[n.PO.half+j] - P.pred[j]
}
}
', file = 'models/ebarbara_model.txt')
Inits function
Code
<- function(model = model.gam)
jags.inits
{return(list(b=rnorm(n.X, mean=model$coefficients, sd=1)))
}
Fit the model
Code
<- Sys.time()
start.time
<- R2jags::jags(data=jags.data,
ebarbara_model model.file='models/ebarbara_model.txt',
parameters.to.save=c('b', 'P.pred',
'A.time1', 'A.time2', 'delta.A',
'alpha0', 'alpha1', 'beta',
'lambda', 'P.ret', 'psi',
'y.PO.new', 'r2_tjur',
'auc', 'sens', 'fpr',
'delta.Grid'),
inits = jags.inits,
n.chains=3,
n.iter=100000,
n.thin=10,
n.burnin=10000,
DIC = FALSE)
Compiling model graph
Resolving undeclared variables
Allocating nodes
Graph information:
Observed stochastic nodes: 5402
Unobserved stochastic nodes: 4400
Total graph size: 213779
Initializing model
Code
<- Sys.time()
end.time <- end.time - start.time
time.taken time.taken
Time difference of 7.836536 hours
Code
saveRDS(ebarbara_model, 'D:/Flo/JAGS_models/ebarbara_model.rds')