Nasua nasua Data Preparation

Author

Florencia Grattarola

Published

October 1, 2024

Data preparation for modelling for Nasua nasua, with the covariates bio_10, bio_13, npp, and nontree (selected in the previous step).

library(knitr)
library(lubridate)
library(tmap)
library(leaflet)
library(patchwork)
library(terra)
library(sf)
sf::sf_use_s2(FALSE)
library(tidyverse)
equalareaCRS <-  '+proj=laea +lon_0=-73.125 +lat_0=0 +datum=WGS84 +units=m +no_defs'
latam <- st_read('data/latam.gpkg', layer = 'latam', quiet = T)
countries <- st_read('data/latam.gpkg', layer = 'countries', quiet = T)
latam_land <- st_read('data/latam.gpkg', layer = 'latam_land', quiet = T)
latam_raster <- rast('data/latam_raster.tif', lyrs='latam')
latam_countries <- rast('data/latam_raster.tif', lyrs='countries')

# IUCN range distribution
nnasua_IUCN <- sf::st_read('big_data/nnasua_IUCN.shp', quiet = T) %>% sf::st_transform(crs=equalareaCRS)

MAP.IUCN <- tm_graticules(alpha = 0.3) +
    tm_shape(countries) +
    tm_fill(col='grey90') +
    tm_borders(col='grey60', alpha = 0.4) +
    tm_shape(nnasua_IUCN) +
    tm_fill(col='grey20', alpha = 0.4) +
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

MAP.IUCN

Data

Species data (PA & PO)

# presence-only rasters
PO_nnasua_time1_raster <- terra::rast('data/species_POPA_data/PO_nnasua_time1_raster.tif')
PO_nnasua_time2_raster <- terra::rast('data/species_POPA_data/PO_nnasua_time2_raster.tif')

# blobs
PA_nnasua_time1_blobs <- readRDS('data/species_POPA_data/PA_nnasua_time1_blobs.rds')
PA_nnasua_time2_blobs <- readRDS('data/species_POPA_data/PA_nnasua_time2_blobs.rds')

Covariates

The following covariates were chosen using the ‘variableSelection’ script:

  • bio_10: Mean Temperature of Warmest Quarter
  • bio_13: Precipitation of Wettest Month
  • npp: Net Primary Production (npp)
  • nontree: Percentage of No Tree Cover
Code
bio <- rast('big_data/bio_high.tif')
npp <- rast('big_data/npp_high.tif')
npp <- resample(npp, bio, method='bilinear') 
names(npp) <- 'npp'
nontree <- rast('big_data/nontree_high.tif')
nontree <- resample(nontree, bio, method='bilinear') 
names(nontree) <- 'nontree'

# environmental layers for PA
env <- c(bio[[c('bio_10','bio_13')]], nontree, npp) %>% 
  classify(cbind(NaN, NA))

# resample and scaled values for PO
env_100k_unscaled <- terra::resample(env, latam_raster, method='bilinear') %>% 
  classify(cbind(NaN, NA))

env_100k_resampled <- env_100k_unscaled %>% scale() # scaled values to 0 mean and sd of 1

Plots

nontree.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["nontree"]]) +
    tm_raster(palette = 'YlGn', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

npp.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["npp"]]) +
    tm_raster(palette = 'Greens', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

bio_10.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["bio_10"]]) +
    tm_raster(palette = 'YlOrRd', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

bio_13.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["bio_13"]]) +
    tm_raster(palette = 'RdBu', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

nontree.plot 

npp.plot

bio_10.plot

bio_13.plot

Thinning parametres

  • acce: Accessibility
  • country: Country of origin
Code
acce_100k_resampled <- terra::rast('big_data/acce.tif') %>% 
  classify(cbind(NaN, NA)) # values are scaled

Plot

acce.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(acce_100k_resampled[["acce"]]) +
    tm_raster(palette = 'RdYlBu', midpoint = NA, style= "cont", n=10)  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

acce.plot

Data for the model

Presence-only data

Code
# calculate area in each raster cell
PO_nnasua.area <- terra::cellSize(PO_nnasua_time1_raster[[1]], unit='m', transform=F) %>% 
  classify(cbind(NaN, NA)) %>% terra::values()

# calculate coordinates for each raster cell
PO_nnasua.coords <- terra::xyFromCell(PO_nnasua_time1_raster, cell=1:ncell(PO_nnasua_time1_raster)) %>% as.data.frame()
names(PO_nnasua.coords) <- c('X', 'Y')

# number of cells in the rasters (both the same)
PO_nnasua.cell <- 1:ncell(PO_nnasua_time1_raster)

# with the environmental variables values for each cell
PO_nnasua.env <- env_100k_resampled[] # scaled values

# with the mean accessibility value for each cell
PO_nnasua.acce <- acce_100k_resampled # scaled values

# calculate point count in each raster cell
PO_nnasua_time1.count <- PO_nnasua_time1_raster %>%
    classify(cbind(NaN, NA)) %>% terra::values() %>% as_tibble %>% 
  dplyr::select(count)

PO_nnasua_time2.count <- PO_nnasua_time2_raster %>%
    classify(cbind(NaN, NA)) %>% terra::values() %>% as_tibble %>% 
  dplyr::select(count)

PO_nnasua.countries <- latam_countries %>% classify(cbind(NaN, NA))

# save time1 and time2 datasets 
PO_nnasua_time1.model.data <- data.frame(PO_nnasua.coords,
                                            pixel = PO_nnasua.cell[],
                                            area = PO_nnasua.area,
                                            pt.count = PO_nnasua_time1.count,
                                            env = PO_nnasua.env,
                                            acce = PO_nnasua.acce[], 
                                            country = PO_nnasua.countries[])

PO_nnasua_time2.model.data <- data.frame(PO_nnasua.coords,
                                            pixel = PO_nnasua.cell[],
                                            area = PO_nnasua.area,
                                            pt.count = PO_nnasua_time2.count,
                                            env = PO_nnasua.env,
                                            acce = PO_nnasua.acce[], 
                                            country = PO_nnasua.countries[])
  • Check data
Code
head(PO_nnasua_time1.model.data) %>% kable()
X Y pixel area count env.bio_10 env.bio_13 env.nontree env.npp acce countries
-4357686 3867905 1 1e+10 NA NA NA NA NA NA NA
-4257686 3867905 2 1e+10 NA NA NA NA NA NA NA
-4157686 3867905 3 1e+10 0 -1.690030 -1.010717 0.4573118 -1.194929 0.0115694 47
-4057686 3867905 4 1e+10 0 -1.674205 -1.393871 -0.6126802 -1.597718 0.0209173 47
-3957686 3867905 5 1e+10 NA -1.680337 -1.661423 -0.9573888 -1.786366 0.0270081 NA
-3857686 3867905 6 1e+10 NA -1.600721 -1.702548 -1.6611980 -1.858230 0.0456958 NA
Code
head(PO_nnasua_time2.model.data) %>% kable()
X Y pixel area count env.bio_10 env.bio_13 env.nontree env.npp acce countries
-4357686 3867905 1 1e+10 NA NA NA NA NA NA NA
-4257686 3867905 2 1e+10 NA NA NA NA NA NA NA
-4157686 3867905 3 1e+10 0 -1.690030 -1.010717 0.4573118 -1.194929 0.0115694 47
-4057686 3867905 4 1e+10 0 -1.674205 -1.393871 -0.6126802 -1.597718 0.0209173 47
-3957686 3867905 5 1e+10 NA -1.680337 -1.661423 -0.9573888 -1.786366 0.0270081 NA
-3857686 3867905 6 1e+10 NA -1.600721 -1.702548 -1.6611980 -1.858230 0.0456958 NA

Presence-absence data

Code
# calculate area, coordinates, and point count in each raster cell
PA_nnasua.area.time1 <- as.numeric(PA_nnasua_time1_blobs$blobArea) 
PA_nnasua.area.time2 <- as.numeric(PA_nnasua_time2_blobs$blobArea) 

PA_nnasua.coords.time1 <- st_coordinates(st_centroid(PA_nnasua_time1_blobs)) %>% as_tibble()
PA_nnasua.coords.time2 <- st_coordinates(st_centroid(PA_nnasua_time2_blobs)) %>% as_tibble()

PA_nnasua.pixel.time1 <- terra::cells(PO_nnasua_time1_raster, vect(st_centroid(PA_nnasua_time1_blobs))) 
PA_nnasua.pixel.time2 <- terra::cells(PO_nnasua_time2_raster, vect(st_centroid(PA_nnasua_time2_blobs))) 

# mode to calculate the most frequent value for the blob - this needs to be changed to getting the %are for each landcover
mode_raster  <- function(x, na.rm = FALSE) {
  if(na.rm){ x = x[!is.na(x)]}
  ux <- unique(x)
  return(ux[which.max(tabulate(match(x, ux)))])
}

PA_nnasua.env.time1 <- terra::extract(x = env, 
                                      y = vect(PA_nnasua_time1_blobs), 
                                      fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) %>% scale()

PA_nnasua.env.time2 <- terra::extract(x = env, 
                                      y = vect(PA_nnasua_time2_blobs), 
                                      fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) %>% scale()

PA_nnasua_time1.model.data <- data.frame(pixel = PA_nnasua.pixel.time1, 
                                            PA_nnasua.coords.time1,
                                            area = PA_nnasua.area.time1,
                                            presabs = PA_nnasua_time1_blobs$presence,
                                            effort = PA_nnasua_time1_blobs$effort,
                                            env = PA_nnasua.env.time1[,2:5])

PA_nnasua_time2.model.data <- data.frame(pixel = PA_nnasua.pixel.time2, 
                                            PA_nnasua.coords.time2,
                                            area = PA_nnasua.area.time2,
                                            presabs = PA_nnasua_time2_blobs$presence,
                                            effort = PA_nnasua_time2_blobs$effort,
                                            env = PA_nnasua.env.time2[,2:5])
  • Check data
Code
head(PA_nnasua_time1.model.data) %>% kable()
pixel.ID pixel.cell X Y area presabs effort env.bio_10 env.bio_13 env.nontree env.npp
1 3735 -793965.8 -422603.44 2761279588 1 93771 -0.6582802 1.3973456 -0.0684109 0.3577977
2 3566 -415832.6 -207245.89 160668368 0 381 1.7712313 2.0850619 -1.2356842 1.0090800
3 3562 -831880.6 -202256.31 13742467 0 507 -0.0060631 0.7107286 -1.2251596 2.3411473
4 3562 -823912.8 -190615.86 3581363 0 676 0.4569488 1.1460762 0.0952659 0.9543768
5 3480 -444796.3 -112426.68 435868381 1 51168 2.6194916 2.1789813 -1.2337677 0.9974119
6 3305 -735006.6 39435.97 65695043 0 360 1.8257933 1.4444697 0.2464471 1.8368899
Code
head(PA_nnasua_time2.model.data) %>% kable()
pixel.ID pixel.cell X Y area presabs effort env.bio_10 env.bio_13 env.nontree env.npp
1 4429 -184981.4 -1276055 3998173 0 95 -1.0125026 1.335277 0.5655012 -0.4783321
2 4429 -190626.9 -1270354 3998173 1 92 -0.9909542 1.382950 0.3490164 0.0430310
3 4428 -230709.4 -1269344 10776792 0 269 -1.2143200 1.346246 -1.3522233 1.6159046
4 4429 -195064.8 -1269216 3998173 0 95 -1.0839638 1.382707 0.8009927 -1.2875682
5 4428 -208882.4 -1268383 147932384 1 3231 -1.1349351 1.505334 0.2990076 -1.4463497
6 4429 -169996.1 -1265947 6948021 1 177 -0.5177786 1.585803 0.6291421 1.7688228

Save data

saveRDS(PO_nnasua_time1.model.data, 'data/species_POPA_data/data_nnasua_PO_time1.rds')
saveRDS(PO_nnasua_time2.model.data, 'data/species_POPA_data/data_nnasua_PO_time2.rds')
saveRDS(PA_nnasua_time1.model.data, 'data/species_POPA_data/data_nnasua_PA_time1.rds')
saveRDS(PA_nnasua_time2.model.data, 'data/species_POPA_data/data_nnasua_PA_time2.rds')

Extra: Datasets in the covariate space

The range (and interquartile range) of values for each covariate that is sampled by each dataset

Code
getRangeIQR <- function(env_PX){
  env_PX.df <- tibble(env= character(),
                          range = numeric(),
                     iqr = numeric(),
                     stringsAsFactors=FALSE)
  
  for (i in 1:ncol(env_PX)){
    df <- tibble(env = names(env_PX[i]),
             range = range(env_PX[[i]], na.rm=T)[2],
             iqr=  IQR(env_PA[[i]], na.rm = T))
    env_PX.df <- rbind(env_PX.df, df)
  }
  return(env_PX.df)
}

# covariates for PA
env_PA <- terra::extract(x = env, 
                         y = rbind(vect(PA_nnasua_time1_blobs), 
                                   vect(PA_nnasua_time2_blobs)), 
                                     fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) 

summary(env_PA[-1])
     bio_10             bio_13         nontree            npp       
 Min.   :   5.066   Min.   :41.34   Min.   :  2.45   Min.   :    0  
 1st Qu.: 371.995   1st Qu.:61.88   1st Qu.: 35.29   1st Qu.: 6832  
 Median : 477.439   Median :66.53   Median : 58.29   Median : 9497  
 Mean   : 478.618   Mean   :66.52   Mean   : 55.47   Mean   : 9769  
 3rd Qu.: 590.625   3rd Qu.:70.62   3rd Qu.: 69.94   3rd Qu.:12812  
 Max.   :1039.348   Max.   :91.51   Max.   :200.00   Max.   :21085  
Code
env_PA.RangeIQR <- getRangeIQR(env_PA[-1]) %>% mutate(data='PA')

for (i in 2:ncol(env_PA)){
    range <- range(env_PA[[i]], na.rm=T)[2]
    iqr <- IQR(env_PA[[i]], na.rm = T)
    print(str_glue('PA {names(env_PA[i])} -> range: {range} - IQR: {iqr}'))
}
PA bio_10 -> range: 1039.34758506756 - IQR: 218.630131896113
PA bio_13 -> range: 91.5144882202148 - IQR: 8.74405288696289
PA nontree -> range: 200 - IQR: 34.6486096251881
PA npp -> range: 21085.2897600446 - IQR: 5980.34717087563
Code
# covariates layers for PO
env_PO <- terra::resample(env, latam_raster, method='bilinear') %>% 
  classify(cbind(NaN, NA)) %>% as.data.frame()

summary(env_PO)
     bio_10              bio_13         nontree             npp           
 Min.   :   0.1098   Min.   :35.58   Min.   :  6.068   Min.   :    1.935  
 1st Qu.: 205.2004   1st Qu.:54.05   1st Qu.: 32.278   1st Qu.: 4567.937  
 Median : 332.9163   Median :68.77   Median : 52.772   Median : 8232.417  
 Mean   : 351.7265   Mean   :66.56   Mean   : 49.836   Mean   : 7782.353  
 3rd Qu.: 479.6892   3rd Qu.:77.87   3rd Qu.: 66.518   3rd Qu.:10876.451  
 Max.   :1463.8673   Max.   :91.20   Max.   :124.838   Max.   :17393.445  
Code
env_PO.RangeIQR <- getRangeIQR(env_PO) %>% mutate(data='PO')

for (i in 1:ncol(env_PO)){
    range <- range(env_PO[[i]], na.rm=T)[2]
    iqr <- IQR(env_PO[[i]], na.rm = T)
    print(str_glue('PO {names(env_PO[i])} -> range: {range} - IQR: {iqr}'))
}
PO bio_10 -> range: 1463.86730957031 - IQR: 274.488872528076
PO bio_13 -> range: 91.2010192871094 - IQR: 23.8219661712646
PO nontree -> range: 124.83846282959 - IQR: 34.2391386032104
PO npp -> range: 17393.4453125 - IQR: 6308.51477050781