Pteronura brasiliensis Data Preparation

Author

Florencia Grattarola

Published

October 1, 2024

Data preparation for modelling for Pteronura brasiliensis, with the covariates bio_3, bio_5, woodysavanna, and wetland (selected in the previous step).

library(knitr)
library(lubridate)
library(tmap)
library(leaflet)
library(patchwork)
library(terra)
library(sf)
sf::sf_use_s2(FALSE)
library(tidyverse)
equalareaCRS <-  '+proj=laea +lon_0=-73.125 +lat_0=0 +datum=WGS84 +units=m +no_defs'
latam <- st_read('data/latam.gpkg', layer = 'latam', quiet = T)
countries <- st_read('data/latam.gpkg', layer = 'countries', quiet = T)
latam_land <- st_read('data/latam.gpkg', layer = 'latam_land', quiet = T)
latam_raster <- rast('data/latam_raster.tif', lyrs='latam')
latam_countries <- rast('data/latam_raster.tif', lyrs='countries')

# IUCN range distribution
pbrasiliensis_IUCN <- sf::st_read('big_data/pbrasiliensis_IUCN.shp', quiet = T) %>% sf::st_transform(crs=equalareaCRS)

MAP.IUCN <- tm_graticules(alpha = 0.3) +
    tm_shape(countries) +
    tm_fill(col='grey90') +
    tm_borders(col='grey60', alpha = 0.4) +
    tm_shape(pbrasiliensis_IUCN) +
    tm_fill(col='grey20', alpha = 0.4) +
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

MAP.IUCN

Data

Species data (PA & PO)

# presence-only rasters
PO_pbrasiliensis_time1_raster <- terra::rast('data/species_POPA_data/PO_pbrasiliensis_time1_raster.tif')
PO_pbrasiliensis_time2_raster <- terra::rast('data/species_POPA_data/PO_pbrasiliensis_time2_raster.tif')

# blobs
PA_pbrasiliensis_time1_blobs <- readRDS('data/species_POPA_data/PA_pbrasiliensis_time1_blobs.rds')
PA_pbrasiliensis_time2_blobs <- readRDS('data/species_POPA_data/PA_pbrasiliensis_time2_blobs.rds')

Covariates

The following covariates were chosen using the ‘variableSelection’ script:

  • bio_3: Isothermality
  • bio_5: Max Temperature of the Warmest Month
  • woodysavanna: Woody savannas
  • wetland: Permanent Wetlands
Code
bio <- rast(here::here('big_data/bio_high.tif'))
woodysavanna <- rast(here::here('big_data/woodysavanna_high.tif'))
woodysavanna <- resample(woodysavanna, bio, method='bilinear') 
names(woodysavanna) <- 'woodysavanna'
wetland <- rast(here::here('big_data/wetland_high.tif'))
wetland <- resample(wetland, bio, method='bilinear') 
names(wetland) <- 'wetland'

# environmental layers for PA
env <- c(bio[[c('bio_3','bio_5')]], wetland, woodysavanna) %>% 
  classify(cbind(NaN, NA))

# resample and scaled values for PO
env_100k_unscaled <- terra::resample(env, latam_raster, method='bilinear') %>% 
  classify(cbind(NaN, NA))

env_100k_resampled <- env_100k_unscaled %>% scale() # scaled values to 0 mean and sd of 1

Plots

wetland.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["wetland"]]) +
    tm_raster(palette = 'PuBuGn', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

woodysavanna.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["woodysavanna"]]) +
    tm_raster(palette = 'Greens', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

bio_3.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["bio_3"]]) +
    tm_raster(palette = 'RdYlBu', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

bio_5.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["bio_5"]]) +
    tm_raster(palette = 'OrRd', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

wetland.plot 

woodysavanna.plot

bio_3.plot

bio_5.plot

Thinning parametres

  • acce: Accessibility
  • country: Country of origin
Code
acce_100k_resampled <- terra::rast('big_data/acce.tif') %>% 
  classify(cbind(NaN, NA)) # values are scaled

Plot

acce.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(acce_100k_resampled[["acce"]]) +
    tm_raster(palette = 'RdYlBu', midpoint = NA, style= "cont", n=10)  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

acce.plot

Data for the model

Presence-only data

Code
# calculate area in each raster cell
PO_pbrasiliensis.area <- terra::cellSize(PO_pbrasiliensis_time1_raster[[1]], unit='m', transform=F) %>% 
  classify(cbind(NaN, NA)) %>% terra::values()

# calculate coordinates for each raster cell
PO_pbrasiliensis.coords <- terra::xyFromCell(PO_pbrasiliensis_time1_raster, cell=1:ncell(PO_pbrasiliensis_time1_raster)) %>% as.data.frame()
names(PO_pbrasiliensis.coords) <- c('X', 'Y')

# number of cells in the rasters (both the same)
PO_pbrasiliensis.cell <- 1:ncell(PO_pbrasiliensis_time1_raster)

# with the environmental variables values for each cell
PO_pbrasiliensis.env <- env_100k_resampled[] # scaled values

# with the mean accessibility value for each cell
PO_pbrasiliensis.acce <- acce_100k_resampled # scaled values

# calculate point count in each raster cell
PO_pbrasiliensis_time1.count <- PO_pbrasiliensis_time1_raster %>%
    classify(cbind(NaN, NA)) %>% terra::values() %>% as_tibble %>% 
  dplyr::select(count)

PO_pbrasiliensis_time2.count <- PO_pbrasiliensis_time2_raster %>%
    classify(cbind(NaN, NA)) %>% terra::values() %>% as_tibble %>% 
  dplyr::select(count)

PO_pbrasiliensis.countries <- latam_countries %>% classify(cbind(NaN, NA))

# save time1 and time2 datasets 
PO_pbrasiliensis_time1.model.data <- data.frame(PO_pbrasiliensis.coords,
                                            pixel = PO_pbrasiliensis.cell[],
                                            area = PO_pbrasiliensis.area,
                                            pt.count = PO_pbrasiliensis_time1.count,
                                            env = PO_pbrasiliensis.env,
                                            acce = PO_pbrasiliensis.acce[], 
                                            country = PO_pbrasiliensis.countries[])

PO_pbrasiliensis_time2.model.data <- data.frame(PO_pbrasiliensis.coords,
                                            pixel = PO_pbrasiliensis.cell[],
                                            area = PO_pbrasiliensis.area,
                                            pt.count = PO_pbrasiliensis_time2.count,
                                            env = PO_pbrasiliensis.env,
                                            acce = PO_pbrasiliensis.acce[], 
                                            country = PO_pbrasiliensis.countries[])
  • Check data
Code
head(PO_pbrasiliensis_time1.model.data) %>% kable()
X Y pixel area count env.bio_3 env.bio_5 env.wetland env.woodysavanna acce countries
-4357686 3867905 1 1e+10 NA NA NA NA NA NA NA
-4257686 3867905 2 1e+10 NA NA NA NA NA NA NA
-4157686 3867905 3 1e+10 0 -0.9266831 -1.326677 -0.2565469 -0.5336031 0.0115694 47
-4057686 3867905 4 1e+10 0 -0.8504784 -1.614458 -0.3109350 -0.5346973 0.0209173 47
-3957686 3867905 5 1e+10 NA -0.6461347 -1.811086 -0.3109350 -0.5358385 0.0270081 NA
-3857686 3867905 6 1e+10 NA -0.7281267 -1.748408 -0.3109350 -0.5358385 0.0456958 NA
Code
head(PO_pbrasiliensis_time2.model.data) %>% kable()
X Y pixel area count env.bio_3 env.bio_5 env.wetland env.woodysavanna acce countries
-4357686 3867905 1 1e+10 NA NA NA NA NA NA NA
-4257686 3867905 2 1e+10 NA NA NA NA NA NA NA
-4157686 3867905 3 1e+10 0 -0.9266831 -1.326677 -0.2565469 -0.5336031 0.0115694 47
-4057686 3867905 4 1e+10 0 -0.8504784 -1.614458 -0.3109350 -0.5346973 0.0209173 47
-3957686 3867905 5 1e+10 NA -0.6461347 -1.811086 -0.3109350 -0.5358385 0.0270081 NA
-3857686 3867905 6 1e+10 NA -0.7281267 -1.748408 -0.3109350 -0.5358385 0.0456958 NA

Presence-absence data

Code
# calculate area, coordinates, and point count in each raster cell
PA_pbrasiliensis.area.time1 <- as.numeric(PA_pbrasiliensis_time1_blobs$blobArea) 
PA_pbrasiliensis.area.time2 <- as.numeric(PA_pbrasiliensis_time2_blobs$blobArea) 

PA_pbrasiliensis.coords.time1 <- st_coordinates(st_centroid(PA_pbrasiliensis_time1_blobs)) %>% as_tibble()
PA_pbrasiliensis.coords.time2 <- st_coordinates(st_centroid(PA_pbrasiliensis_time2_blobs)) %>% as_tibble()

PA_pbrasiliensis.pixel.time1 <- terra::cells(PO_pbrasiliensis_time1_raster, vect(st_centroid(PA_pbrasiliensis_time1_blobs))) 
PA_pbrasiliensis.pixel.time2 <- terra::cells(PO_pbrasiliensis_time2_raster, vect(st_centroid(PA_pbrasiliensis_time2_blobs))) 

# mode to calculate the most frequent value for the blob - this needs to be changed to getting the %are for each landcover
mode_raster  <- function(x, na.rm = FALSE) {
  if(na.rm){ x = x[!is.na(x)]}
  ux <- unique(x)
  return(ux[which.max(tabulate(match(x, ux)))])
}

PA_pbrasiliensis.env.time1 <- terra::extract(x = env, 
                                             y = vect(PA_pbrasiliensis_time1_blobs), 
                                             fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) %>% scale()

PA_pbrasiliensis.env.time2 <- terra::extract(x = env, 
                                             y = vect(PA_pbrasiliensis_time2_blobs), 
                                             fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) %>% scale()

PA_pbrasiliensis_time1.model.data <- data.frame(pixel = PA_pbrasiliensis.pixel.time1, 
                                            PA_pbrasiliensis.coords.time1,
                                            area = PA_pbrasiliensis.area.time1,
                                            presabs = PA_pbrasiliensis_time1_blobs$presence,
                                            effort = PA_pbrasiliensis_time1_blobs$effort,
                                            env = PA_pbrasiliensis.env.time1[,2:5])

PA_pbrasiliensis_time2.model.data <- data.frame(pixel = PA_pbrasiliensis.pixel.time2, 
                                            PA_pbrasiliensis.coords.time2,
                                            area = PA_pbrasiliensis.area.time2,
                                            presabs = PA_pbrasiliensis_time2_blobs$presence,
                                            effort = PA_pbrasiliensis_time2_blobs$effort,
                                            env = PA_pbrasiliensis.env.time2[,2:5])
  • Check data
Code
head(PA_pbrasiliensis_time1.model.data) %>% kable()
pixel.ID pixel.cell X Y area presabs effort env.bio_3 env.bio_5 env.wetland env.woodysavanna
1 1661 -1768526 1918184 241873943 0 1043 0.4280534 -0.2193149 -0.2676262 -0.3643529
2 1660 -1844290 1927648 537081878 0 2404 0.7360965 0.8959666 -0.2676262 1.0643543
3 1661 -1731415 1940145 2241827859 0 11078 0.4621341 -0.4271541 -0.2676262 -0.3345440
4 1660 -1820735 1948513 162791738 0 1291 0.5886073 0.2749585 -0.2676262 -0.3643529
5 1661 -1782702 1959695 756798182 0 1779 0.4013839 -0.1483101 -0.2676262 -0.3643529
6 1567 -2552096 2041811 19990863 0 3300 0.3010246 -1.7674651 -0.2676262 -0.3643529
Code
head(PA_pbrasiliensis_time2.model.data) %>% kable()
pixel.ID pixel.cell X Y area presabs effort env.bio_3 env.bio_5 env.wetland env.woodysavanna
1 3734 -817257.8 -439796.5 2.229446e+09 0 11095 0.8090478 -1.244423 -0.1604796 0.4088218
2 2527 -1139813.8 954656.5 1.542295e+03 0 12276 1.4459121 1.898276 -0.1604796 -0.3922837
3 2527 -1168048.8 958569.1 1.049520e-01 0 1585 1.4047103 4.903049 5.4792939 -0.3922837
4 2527 -1116769.5 965197.0 7.494574e+01 0 1440 1.0963723 4.929227 -0.1604796 -0.3922837
5 2527 -1108298.2 967725.2 2.546836e+01 0 400 1.4769910 4.140425 -0.1604796 0.1371643
6 2528 -1086336.4 1003862.9 2.398904e-01 0 7164 -0.9841150 3.668389 -0.1604796 -0.3922837

Save data

saveRDS(PO_pbrasiliensis_time1.model.data, 'data/species_POPA_data/data_pbrasiliensis_PO_time1.rds')
saveRDS(PO_pbrasiliensis_time2.model.data, 'data/species_POPA_data/data_pbrasiliensis_PO_time2.rds')
saveRDS(PA_pbrasiliensis_time1.model.data, 'data/species_POPA_data/data_pbrasiliensis_PA_time1.rds')
saveRDS(PA_pbrasiliensis_time2.model.data, 'data/species_POPA_data/data_pbrasiliensis_PA_time2.rds')

Extra: Datasets in the covariate space

The range (and interquartile range) of values for each covariate that is sampled by each dataset

Code
getRangeIQR <- function(env_PX){
  env_PX.df <- tibble(env= character(),
                          range = numeric(),
                     iqr = numeric(),
                     stringsAsFactors=FALSE)
  
  for (i in 1:ncol(env_PX)){
    df <- tibble(env = names(env_PX[i]),
             range = range(env_PX[[i]], na.rm=T)[2],
             iqr=  IQR(env_PA[[i]], na.rm = T))
    env_PX.df <- rbind(env_PX.df, df)
  }
  return(env_PX.df)
}

# covariates for PA
env_PA <- terra::extract(x = env, 
                         y = rbind(vect(PA_pbrasiliensis_time1_blobs), 
                                   vect(PA_pbrasiliensis_time2_blobs)), 
                                     fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) 

summary(env_PA[-1])
     bio_3             bio_5            wetland         woodysavanna    
 Min.   : 0.1059   Min.   :  2.509   Min.   :0.00000   Min.   :0.00000  
 1st Qu.:16.5121   1st Qu.:200.933   1st Qu.:0.00000   1st Qu.:0.00000  
 Median :19.4354   Median :245.688   Median :0.00000   Median :0.00000  
 Mean   :19.7459   Mean   :251.894   Mean   :0.01943   Mean   :0.04786  
 3rd Qu.:23.5625   3rd Qu.:294.838   3rd Qu.:0.00000   3rd Qu.:0.02628  
 Max.   :27.4998   Max.   :637.351   Max.   :0.99773   Max.   :1.00000  
Code
env_PA.RangeIQR <- getRangeIQR(env_PA[-1]) %>% mutate(data='PA')

for (i in 2:ncol(env_PA)){
    range <- range(env_PA[[i]], na.rm=T)[2]
    iqr <- IQR(env_PA[[i]], na.rm = T)
    print(str_glue('PA {names(env_PA[i])} -> range: {range} - IQR: {iqr}'))
}
PA bio_3 -> range: 27.4998134613037 - IQR: 7.05035022519669
PA bio_5 -> range: 637.351318359375 - IQR: 93.9053077697754
PA wetland -> range: 0.997725650668144 - IQR: 0
PA woodysavanna -> range: 1 - IQR: 0.0262835070025176
Code
# covariates layers for PO
env_PO <- terra::resample(env, latam_raster, method='bilinear') %>% 
  classify(cbind(NaN, NA)) %>% as.data.frame()

summary(env_PO)
     bio_3            bio_5            wetland           woodysavanna      
 Min.   :-2.701   Min.   :  1.181   Min.   :0.0000000   Min.   :0.0000000  
 1st Qu.:12.658   1st Qu.:132.409   1st Qu.:0.0000518   1st Qu.:0.0001956  
 Median :21.209   Median :227.126   Median :0.0008525   Median :0.0092380  
 Mean   :18.395   Mean   :223.743   Mean   :0.0080587   Mean   :0.0751433  
 3rd Qu.:24.919   3rd Qu.:315.360   3rd Qu.:0.0056681   3rd Qu.:0.0766570  
 Max.   :27.235   Max.   :720.784   Max.   :0.4137636   Max.   :0.8848116  
Code
env_PO.RangeIQR <- getRangeIQR(env_PO) %>% mutate(data='PO')

for (i in 1:ncol(env_PO)){
    range <- range(env_PO[[i]], na.rm=T)[2]
    iqr <- IQR(env_PO[[i]], na.rm = T)
    print(str_glue('PO {names(env_PO[i])} -> range: {range} - IQR: {iqr}'))
}
PO bio_3 -> range: 27.2350292205811 - IQR: 12.2605299949646
PO bio_5 -> range: 720.784484863281 - IQR: 182.950798034668
PO wetland -> range: 0.413763552904129 - IQR: 0.00561631976961507
PO woodysavanna -> range: 0.884811639785767 - IQR: 0.0764613711871789