Eira barbara Data Preparation

Author

Florencia Grattarola

Published

October 1, 2024

Data preparation for modelling for Eira barbara, with the covariates bio10, bio17, npp, and nontree (selected in the previous step).

library(knitr)
library(lubridate)
library(tmap)
library(leaflet)
library(patchwork)
library(terra)
library(sf)
sf::sf_use_s2(FALSE)
library(tidyverse)
equalareaCRS <-  '+proj=laea +lon_0=-73.125 +lat_0=0 +datum=WGS84 +units=m +no_defs'
latam <- st_read('data/latam.gpkg', layer = 'latam', quiet = T)
countries <- st_read('data/latam.gpkg', layer = 'countries', quiet = T)
latam_land <- st_read('data/latam.gpkg', layer = 'latam_land', quiet = T)
latam_raster <- rast('data/latam_raster.tif', lyrs='latam')
latam_countries <- rast('data/latam_raster.tif', lyrs='countries')

# IUCN range distribution
ebarbara_IUCN <- sf::st_read('big_data/ebarbara_IUCN.shp', quiet = T) %>% sf::st_transform(crs=equalareaCRS)

MAP.IUCN <- tm_graticules(alpha = 0.3) +
    tm_shape(countries) +
    tm_fill(col='grey90') +
    tm_borders(col='grey60', alpha = 0.4) +
    tm_shape(ebarbara_IUCN) +
    tm_fill(col='grey20', alpha = 0.4) +
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

MAP.IUCN

Data

Species data (PA & PO)

# presence-only rasters
PO_ebarbara_time1_raster <- terra::rast('data/species_POPA_data/PO_ebarbara_time1_raster.tif')
PO_ebarbara_time2_raster <- terra::rast('data/species_POPA_data/PO_ebarbara_time2_raster.tif')

# blobs
PA_ebarbara_time1_blobs <- readRDS('data/species_POPA_data/PA_ebarbara_time1_blobs.rds')
PA_ebarbara_time2_blobs <- readRDS('data/species_POPA_data/PA_ebarbara_time2_blobs.rds')

Covariates

The following covariates were chosen using the ‘variableSelection’ script:

  • bio10: Temperature Annual Range
  • bio17: Mean Temperature of Warmest Quarter
  • npp: Net Primary Production (npp)
  • nontree: Percentage of No Tree Cover
Code
bio <- rast(here::here('big_data/bio_high.tif'))
npp <- rast(here::here('big_data/npp_high.tif'))
npp <- resample(npp, bio, method='bilinear') 

|---------|---------|---------|---------|
=========================================
                                          
Code
names(npp) <- 'npp'
nontree <- rast(here::here('big_data/nontree_high.tif'))
nontree <- resample(nontree, bio, method='bilinear') 

|---------|---------|---------|---------|
=========================================
                                          
Code
names(nontree) <- 'nontree'

# environmental layers for PA
env <- c(bio[[c('bio_10','bio_17')]], nontree, npp) %>% 
  classify(cbind(NaN, NA))

|---------|---------|---------|---------|
=========================================
                                          
Code
# resample and scaled values for PO
env_100k_unscaled <- terra::resample(env, latam_raster, method='bilinear') %>% 
  classify(cbind(NaN, NA))

env_100k_resampled <- env_100k_unscaled %>% scale() # scaled values to 0 mean and sd of 1

Plots

nontree.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["nontree"]]) +
    tm_raster(palette = 'YlGn', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

npp.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["npp"]]) +
    tm_raster(palette = 'BuGn', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

bio_10.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["bio_10"]]) +
    tm_raster(palette = 'YlOrRd', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

bio_17.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["bio_17"]]) +
    tm_raster(palette = 'PuBu', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

nontree.plot 

npp.plot

bio_10.plot

bio_17.plot

Thinning parametres

  • acce: Accessibility
  • country: Country of origin
Code
acce_100k_resampled <- terra::rast('big_data/acce.tif') %>% 
  classify(cbind(NaN, NA)) # values are scaled

Plot

acce.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(acce_100k_resampled[["acce"]]) +
    tm_raster(palette = 'RdYlBu', midpoint = NA, style= "cont", n=10)  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

acce.plot

Data for the model

Presence-only data

Code
# calculate area in each raster cell
PO_ebarbara.area <- terra::cellSize(PO_ebarbara_time1_raster[[1]], unit='m', transform=F) %>% 
  classify(cbind(NaN, NA)) %>% terra::values()

# calculate coordinates for each raster cell
PO_ebarbara.coords <- terra::xyFromCell(PO_ebarbara_time1_raster, cell=1:ncell(PO_ebarbara_time1_raster)) %>% as.data.frame()
names(PO_ebarbara.coords) <- c('X', 'Y')

# number of cells in the rasters (both the same)
PO_ebarbara.cell <- 1:ncell(PO_ebarbara_time1_raster)

# with the environmental variables values for each cell
PO_ebarbara.env <- env_100k_resampled[] # scaled values

# with the mean accessibility value for each cell
PO_ebarbara.acce <- acce_100k_resampled # scaled values

# calculate point count in each raster cell
PO_ebarbara_time1.count <- PO_ebarbara_time1_raster %>%
    classify(cbind(NaN, NA)) %>% terra::values() %>% as_tibble %>% 
  dplyr::select(count)

PO_ebarbara_time2.count <- PO_ebarbara_time2_raster %>%
    classify(cbind(NaN, NA)) %>% terra::values() %>% as_tibble %>% 
  dplyr::select(count)

PO_ebarbara.countries <- latam_countries %>% classify(cbind(NaN, NA))

# save time1 and time2 datasets 
PO_ebarbara_time1.model.data <- data.frame(PO_ebarbara.coords,
                                            pixel = PO_ebarbara.cell[],
                                            area = PO_ebarbara.area,
                                            pt.count = PO_ebarbara_time1.count,
                                            env = PO_ebarbara.env,
                                            acce = PO_ebarbara.acce[], 
                                            country = PO_ebarbara.countries[])

PO_ebarbara_time2.model.data <- data.frame(PO_ebarbara.coords,
                                            pixel = PO_ebarbara.cell[],
                                            area = PO_ebarbara.area,
                                            pt.count = PO_ebarbara_time2.count,
                                            env = PO_ebarbara.env,
                                            acce = PO_ebarbara.acce[], 
                                            country = PO_ebarbara.countries[])
  • Check data
Code
head(PO_ebarbara_time1.model.data) %>% kable()
X Y pixel area count env.bio_10 env.bio_17 env.nontree env.npp acce countries
-4357686 3867905 1 1e+10 NA NA NA NA NA NA NA
-4257686 3867905 2 1e+10 NA NA NA NA NA NA NA
-4157686 3867905 3 1e+10 0 -1.690030 1.194141 0.4573118 -1.194929 0.0115694 47
-4057686 3867905 4 1e+10 0 -1.674205 2.200438 -0.6126802 -1.597718 0.0209173 47
-3957686 3867905 5 1e+10 NA -1.680337 2.434686 -0.9573888 -1.786366 0.0270081 NA
-3857686 3867905 6 1e+10 NA -1.600721 2.471932 -1.6611980 -1.858230 0.0456958 NA
Code
head(PO_ebarbara_time2.model.data) %>% kable()
X Y pixel area count env.bio_10 env.bio_17 env.nontree env.npp acce countries
-4357686 3867905 1 1e+10 NA NA NA NA NA NA NA
-4257686 3867905 2 1e+10 NA NA NA NA NA NA NA
-4157686 3867905 3 1e+10 0 -1.690030 1.194141 0.4573118 -1.194929 0.0115694 47
-4057686 3867905 4 1e+10 0 -1.674205 2.200438 -0.6126802 -1.597718 0.0209173 47
-3957686 3867905 5 1e+10 NA -1.680337 2.434686 -0.9573888 -1.786366 0.0270081 NA
-3857686 3867905 6 1e+10 NA -1.600721 2.471932 -1.6611980 -1.858230 0.0456958 NA

Presence-absence data

Code
# calculate area, coordinates, and point count in each raster cell
PA_ebarbara.area.time1 <- as.numeric(PA_ebarbara_time1_blobs$blobArea) 
PA_ebarbara.area.time2 <- as.numeric(PA_ebarbara_time2_blobs$blobArea) 

PA_ebarbara.coords.time1 <- st_coordinates(st_centroid(PA_ebarbara_time1_blobs)) %>% as_tibble()
PA_ebarbara.coords.time2 <- st_coordinates(st_centroid(PA_ebarbara_time2_blobs)) %>% as_tibble()

PA_ebarbara.pixel.time1 <- terra::cells(PO_ebarbara_time1_raster, vect(st_centroid(PA_ebarbara_time1_blobs))) 
PA_ebarbara.pixel.time2 <- terra::cells(PO_ebarbara_time2_raster, vect(st_centroid(PA_ebarbara_time2_blobs))) 

# mode to calculate the most frequent value for the blob - this needs to be changed to getting the %are for each landcover
mode_raster  <- function(x, na.rm = FALSE) {
  if(na.rm){ x = x[!is.na(x)]}
  ux <- unique(x)
  return(ux[which.max(tabulate(match(x, ux)))])
}

PA_ebarbara.env.time1 <- terra::extract(x = env, 
                                        y = vect(PA_ebarbara_time1_blobs), 
                                        fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) %>% scale()

PA_ebarbara.env.time2 <- terra::extract(x = env, 
                                        y = vect(PA_ebarbara_time2_blobs), 
                                        fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) %>% scale()

PA_ebarbara_time1.model.data <- data.frame(pixel = PA_ebarbara.pixel.time1, 
                                            PA_ebarbara.coords.time1,
                                            area = PA_ebarbara.area.time1,
                                            presabs = PA_ebarbara_time1_blobs$presence,
                                            effort = PA_ebarbara_time1_blobs$effort,
                                            env = PA_ebarbara.env.time1[,2:5])

PA_ebarbara_time2.model.data <- data.frame(pixel = PA_ebarbara.pixel.time2, 
                                            PA_ebarbara.coords.time2,
                                            area = PA_ebarbara.area.time2,
                                            presabs = PA_ebarbara_time2_blobs$presence,
                                            effort = PA_ebarbara_time2_blobs$effort,
                                            env = PA_ebarbara.env.time2[,2:5])
  • Check data
Code
head(PA_ebarbara_time1.model.data) %>% kable()
pixel.ID pixel.cell X Y area presabs effort env.bio_10 env.bio_17 env.nontree env.npp
1 1661 -1768526 1918184 241873943 0 1043 -0.4793405 -0.1603222 -0.9811847 0.8189866
2 1660 -1844290 1927648 537081878 1 2404 -0.0673708 -0.0641428 -0.7325427 0.3621302
3 1661 -1731415 1940145 2241827859 1 11078 0.3368660 -0.1110816 -0.8964063 0.8676071
4 1660 -1820735 1948513 162791738 0 1291 -0.4023207 0.0491335 -0.8337618 0.6329681
5 1661 -1782702 1959695 756798182 0 1779 -0.0803951 0.1679013 -0.9473701 0.7204713
6 1567 -2552096 2041811 19990863 0 3300 -1.5983222 2.1952419 0.3563696 -0.0070323
Code
head(PA_ebarbara_time2.model.data) %>% kable()
pixel.ID pixel.cell X Y area presabs effort env.bio_10 env.bio_17 env.nontree env.npp
1 3734 -817257.8 -439796.5 2.229446e+09 1 11095 -1.0349106 -1.072807 0.3606534 -0.7710085
2 2527 -1139813.8 954656.5 1.542295e+03 1 12276 0.5494966 -1.148757 -1.1677654 -0.2779545
3 2527 -1168048.8 958569.1 1.049520e-01 1 1585 0.8570783 -1.142916 -1.4965330 -0.5703401
4 2527 -1116769.5 965197.0 7.494574e+01 1 1440 0.5462587 -1.196240 -1.4304748 -0.0384092
5 2527 -1108298.2 967725.2 2.546836e+01 1 400 1.4481558 -1.343220 -1.2623775 0.0808761
6 2528 -1086336.4 1003862.9 2.398904e-01 1 7164 2.2519956 -1.614579 -0.9842253 0.5782478

Save data

saveRDS(PO_ebarbara_time1.model.data, 'data/species_POPA_data/data_ebarbara_PO_time1.rds')
saveRDS(PO_ebarbara_time2.model.data, 'data/species_POPA_data/data_ebarbara_PO_time2.rds')
saveRDS(PA_ebarbara_time1.model.data, 'data/species_POPA_data/data_ebarbara_PA_time1.rds')
saveRDS(PA_ebarbara_time2.model.data, 'data/species_POPA_data/data_ebarbara_PA_time2.rds')

Extra: Datasets in the covariate space

The range (and interquartile range) of values for each covariate that is sampled by each dataset

Code
getRangeIQR <- function(env_PX){
  env_PX.df <- tibble(env= character(),
                          range = numeric(),
                     iqr = numeric(),
                     stringsAsFactors=FALSE)
  
  for (i in 1:ncol(env_PX)){
    df <- tibble(env = names(env_PX[i]),
             range = range(env_PX[[i]], na.rm=T)[2],
             iqr=  IQR(env_PA[[i]], na.rm = T))
    env_PX.df <- rbind(env_PX.df, df)
  }
  return(env_PX.df)
}

# covariates for PA
env_PA <- terra::extract(x = env, 
                         y = rbind(vect(PA_ebarbara_time1_blobs), 
                                   vect(PA_ebarbara_time2_blobs)), 
                                     fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) 

summary(env_PA[-1])
     bio_10             bio_17          nontree            npp       
 Min.   :   5.066   Min.   : 8.486   Min.   :  2.45   Min.   :    0  
 1st Qu.: 372.000   1st Qu.:15.680   1st Qu.: 34.94   1st Qu.: 6963  
 Median : 476.471   Median :17.499   Median : 57.67   Median : 9498  
 Mean   : 477.946   Mean   :16.916   Mean   : 55.11   Mean   : 9781  
 3rd Qu.: 591.082   3rd Qu.:18.477   3rd Qu.: 69.74   3rd Qu.:12746  
 Max.   :1039.348   Max.   :32.448   Max.   :200.00   Max.   :21085  
Code
env_PA.RangeIQR <- getRangeIQR(env_PA[-1]) %>% mutate(data='PA')

for (i in 2:ncol(env_PA)){
    range <- range(env_PA[[i]], na.rm=T)[2]
    iqr <- IQR(env_PA[[i]], na.rm = T)
    print(str_glue('PA {names(env_PA[i])} -> range: {range} - IQR: {iqr}'))
}
PA bio_10 -> range: 1039.34758506756 - IQR: 219.082695007324
PA bio_17 -> range: 32.4479134503533 - IQR: 2.79695015047445
PA nontree -> range: 200 - IQR: 34.8002891540527
PA npp -> range: 21085.2897600446 - IQR: 5783.17029000419
Code
# covariates layers for PO
env_PO <- terra::resample(env, latam_raster, method='bilinear') %>% 
  classify(cbind(NaN, NA)) %>% as.data.frame()

summary(env_PO)
     bio_10              bio_17          nontree             npp           
 Min.   :   0.1098   Min.   : 8.357   Min.   :  6.068   Min.   :    1.935  
 1st Qu.: 205.2004   1st Qu.:12.711   1st Qu.: 32.278   1st Qu.: 4567.937  
 Median : 332.9163   Median :17.435   Median : 52.772   Median : 8232.417  
 Mean   : 351.7265   Mean   :18.803   Mean   : 49.836   Mean   : 7782.353  
 3rd Qu.: 479.6892   3rd Qu.:23.763   3rd Qu.: 66.518   3rd Qu.:10876.451  
 Max.   :1463.8673   Max.   :39.095   Max.   :124.838   Max.   :17393.445  
Code
env_PO.RangeIQR <- getRangeIQR(env_PO) %>% mutate(data='PO')

for (i in 1:ncol(env_PO)){
    range <- range(env_PO[[i]], na.rm=T)[2]
    iqr <- IQR(env_PO[[i]], na.rm = T)
    print(str_glue('PO {names(env_PO[i])} -> range: {range} - IQR: {iqr}'))
}
PO bio_10 -> range: 1463.86730957031 - IQR: 274.488872528076
PO bio_17 -> range: 39.0950622558594 - IQR: 11.0515887737274
PO nontree -> range: 124.83846282959 - IQR: 34.2391386032104
PO npp -> range: 17393.4453125 - IQR: 6308.51477050781