Herpailurus yagouaroundi Data Preparation

Author

Florencia Grattarola

Published

October 1, 2024

Data preparation for modelling for Herpailurus yagouaroundi, with the covariates bio7, bio15, npp, and elev (selected in the previous step).

library(knitr)
library(lubridate)
library(tmap)
library(leaflet)
library(patchwork)
library(terra)
library(sf)
sf::sf_use_s2(FALSE)
library(tidyverse)
equalareaCRS <-  '+proj=laea +lon_0=-73.125 +lat_0=0 +datum=WGS84 +units=m +no_defs'
latam <- st_read('data/latam.gpkg', layer = 'latam', quiet = T)
countries <- st_read('data/latam.gpkg', layer = 'countries', quiet = T)
latam_land <- st_read('data/latam.gpkg', layer = 'latam_land', quiet = T)
latam_raster <- rast('data/latam_raster.tif', lyrs='latam')
latam_countries <- rast('data/latam_raster.tif', lyrs='countries')

# IUCN range distribution
hyagouaroundi_IUCN <- sf::st_read('big_data/hyagouaroundi_IUCN.shp', quiet = T) %>% sf::st_transform(crs=equalareaCRS)

MAP.IUCN <- tm_graticules(alpha = 0.3) +
    tm_shape(countries) +
    tm_fill(col='grey90') +
    tm_borders(col='grey60', alpha = 0.4) +
    tm_shape(hyagouaroundi_IUCN) +
    tm_fill(col='grey20', alpha = 0.4) +
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

MAP.IUCN

Data

Species data (PA & PO)

# presence-only rasters
PO_hyagouaroundi_time1_raster <- terra::rast('data/species_POPA_data/PO_hyagouaroundi_time1_raster.tif')
PO_hyagouaroundi_time2_raster <- terra::rast('data/species_POPA_data/PO_hyagouaroundi_time2_raster.tif')

# blobs
PA_hyagouaroundi_time1_blobs <- readRDS('data/species_POPA_data/PA_hyagouaroundi_time1_blobs.rds')
PA_hyagouaroundi_time2_blobs <- readRDS('data/species_POPA_data/PA_hyagouaroundi_time2_blobs.rds')

Covariates

The following covariates were chosen using the ‘variableSelection’ script:

  • bio7: Temperature Annual Range
  • bio15: Mean Temperature of Warmest Quarter
  • npp: Net Primary Production (NPP)
  • elev: Percentage of No Tree Cover
Code
bio <- rast(here::here('big_data/bio_high.tif'))
elev <- rast(here::here('big_data/elev_high.tif'))
npp <- rast(here::here('big_data/npp_high.tif'))
npp <- resample(npp, bio, method='bilinear') 
names(npp) <- 'npp'

# environmental layers for PA
env <- c(bio[[c('bio_7','bio_15')]], npp, elev) %>% 
  classify(cbind(NaN, NA))

# resample and scaled values for PO
env_100k_unscaled <- terra::resample(env, latam_raster, method='bilinear') %>% 
  classify(cbind(NaN, NA))

env_100k_resampled <- env_100k_unscaled %>% scale() # scaled values to 0 mean and sd of 1

Plots

elev.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["elev"]]) +
    tm_raster(palette = 'BrBG', midpoint = NA, style= "cont", n=10)  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

npp.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["npp"]]) +
    tm_raster(palette = 'BuGn', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

bio_7.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["bio_7"]]) +
    tm_raster(palette = 'RdYlBu', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

bio_15.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["bio_15"]]) +
    tm_raster(palette = 'PuOr', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

elev.plot 

npp.plot

bio_7.plot

bio_15.plot

Thinning parametres

  • acce: Accessibility
  • country: Country of origin
Code
acce_100k_resampled <- terra::rast('big_data/acce.tif') %>% 
  classify(cbind(NaN, NA)) # values are scaled

Plot

acce.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(acce_100k_resampled[["acce"]]) +
    tm_raster(palette = 'RdYlBu', midpoint = NA, style= "cont", n=10)  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

acce.plot

Data for the model

Presence-only data

Code
# calculate area in each raster cell
PO_hyagouaroundi.area <- terra::cellSize(PO_hyagouaroundi_time1_raster[[1]], unit='m', transform=F) %>% 
  classify(cbind(NaN, NA)) %>% terra::values()

# calculate coordinates for each raster cell
PO_hyagouaroundi.coords <- terra::xyFromCell(PO_hyagouaroundi_time1_raster, cell=1:ncell(PO_hyagouaroundi_time1_raster)) %>% as.data.frame()
names(PO_hyagouaroundi.coords) <- c('X', 'Y')

# number of cells in the rasters (both the same)
PO_hyagouaroundi.cell <- 1:ncell(PO_hyagouaroundi_time1_raster)

# with the environmental variables values for each cell
PO_hyagouaroundi.env <- env_100k_resampled[] # scaled values

# with the mean accessibility value for each cell
PO_hyagouaroundi.acce <- acce_100k_resampled # scaled values

# calculate point count in each raster cell
PO_hyagouaroundi_time1.count <- PO_hyagouaroundi_time1_raster %>%
    classify(cbind(NaN, NA)) %>% terra::values() %>% as_tibble %>% 
  dplyr::select(count)

PO_hyagouaroundi_time2.count <- PO_hyagouaroundi_time2_raster %>%
    classify(cbind(NaN, NA)) %>% terra::values() %>% as_tibble %>% 
  dplyr::select(count)

PO_hyagouaroundi.countries <- latam_countries %>% classify(cbind(NaN, NA))

# save time1 and time2 datasets 
PO_hyagouaroundi_time1.model.data <- data.frame(PO_hyagouaroundi.coords,
                                            pixel = PO_hyagouaroundi.cell[],
                                            area = PO_hyagouaroundi.area,
                                            pt.count = PO_hyagouaroundi_time1.count,
                                            env = PO_hyagouaroundi.env,
                                            acce = PO_hyagouaroundi.acce[], 
                                            country = PO_hyagouaroundi.countries[])

PO_hyagouaroundi_time2.model.data <- data.frame(PO_hyagouaroundi.coords,
                                            pixel = PO_hyagouaroundi.cell[],
                                            area = PO_hyagouaroundi.area,
                                            pt.count = PO_hyagouaroundi_time2.count,
                                            env = PO_hyagouaroundi.env,
                                            acce = PO_hyagouaroundi.acce[], 
                                            country = PO_hyagouaroundi.countries[])
  • Check data
Code
head(PO_hyagouaroundi_time1.model.data) %>% kable()
X Y pixel area count env.bio_7 env.bio_15 env.npp env.elev acce countries
-4357686 3867905 1 1e+10 NA NA NA NA NA NA NA
-4257686 3867905 2 1e+10 NA NA NA NA NA NA NA
-4157686 3867905 3 1e+10 0 0.9201098 0.2185891 -1.194929 -0.0964722 0.0115694 47
-4057686 3867905 4 1e+10 0 0.2354014 1.7084783 -1.597718 -0.2009575 0.0209173 47
-3957686 3867905 5 1e+10 NA -0.2380332 2.4191863 -1.786366 -0.4685130 0.0270081 NA
-3857686 3867905 6 1e+10 NA -0.2966836 2.3485775 -1.858230 -0.2928835 0.0456958 NA
Code
head(PO_hyagouaroundi_time2.model.data) %>% kable()
X Y pixel area count env.bio_7 env.bio_15 env.npp env.elev acce countries
-4357686 3867905 1 1e+10 NA NA NA NA NA NA NA
-4257686 3867905 2 1e+10 NA NA NA NA NA NA NA
-4157686 3867905 3 1e+10 0 0.9201098 0.2185891 -1.194929 -0.0964722 0.0115694 47
-4057686 3867905 4 1e+10 0 0.2354014 1.7084783 -1.597718 -0.2009575 0.0209173 47
-3957686 3867905 5 1e+10 NA -0.2380332 2.4191863 -1.786366 -0.4685130 0.0270081 NA
-3857686 3867905 6 1e+10 NA -0.2966836 2.3485775 -1.858230 -0.2928835 0.0456958 NA

Presence-absence data

Code
# calculate area, coordinates, and point count in each raster cell
PA_hyagouaroundi.area.time1 <- as.numeric(PA_hyagouaroundi_time1_blobs$blobArea) 
PA_hyagouaroundi.area.time2 <- as.numeric(PA_hyagouaroundi_time2_blobs$blobArea) 

PA_hyagouaroundi.coords.time1 <- st_coordinates(st_centroid(PA_hyagouaroundi_time1_blobs)) %>% as_tibble()
PA_hyagouaroundi.coords.time2 <- st_coordinates(st_centroid(PA_hyagouaroundi_time2_blobs)) %>% as_tibble()

PA_hyagouaroundi.pixel.time1 <- terra::cells(PO_hyagouaroundi_time1_raster, vect(st_centroid(PA_hyagouaroundi_time1_blobs))) 
PA_hyagouaroundi.pixel.time2 <- terra::cells(PO_hyagouaroundi_time2_raster, vect(st_centroid(PA_hyagouaroundi_time2_blobs))) 

# mode to calculate the most frequent value for the blob - this needs to be changed to getting the %are for each landcover
mode_raster  <- function(x, na.rm = FALSE) {
  if(na.rm){ x = x[!is.na(x)]}
  ux <- unique(x)
  return(ux[which.max(tabulate(match(x, ux)))])
}

PA_hyagouaroundi.env.time1 <- terra::extract(x = env, 
                                             y = vect(PA_hyagouaroundi_time1_blobs), 
                                             fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) %>% scale()

PA_hyagouaroundi.env.time2 <- terra::extract(x = env, 
                                             y = vect(PA_hyagouaroundi_time2_blobs), 
                                             fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) %>% scale()

PA_hyagouaroundi_time1.model.data <- data.frame(pixel = PA_hyagouaroundi.pixel.time1, 
                                            PA_hyagouaroundi.coords.time1,
                                            area = PA_hyagouaroundi.area.time1,
                                            presabs = PA_hyagouaroundi_time1_blobs$presence,
                                            effort = PA_hyagouaroundi_time1_blobs$effort,
                                            env = PA_hyagouaroundi.env.time1[,2:5])

PA_hyagouaroundi_time2.model.data <- data.frame(pixel = PA_hyagouaroundi.pixel.time2, 
                                            PA_hyagouaroundi.coords.time2,
                                            area = PA_hyagouaroundi.area.time2,
                                            presabs = PA_hyagouaroundi_time2_blobs$presence,
                                            effort = PA_hyagouaroundi_time2_blobs$effort,
                                            env = PA_hyagouaroundi.env.time2[,2:5])
  • Check data
Code
head(PA_hyagouaroundi_time1.model.data) %>% kable()
pixel.ID pixel.cell X Y area presabs effort env.bio_7 env.bio_15 env.npp env.elev
1 1661 -1768526 1918184 241873943 0 1043 -0.1425514 0.7402521 0.8189866 -0.4108728
2 1660 -1844290 1927648 537081878 1 2404 0.1760225 1.4651008 0.3621302 -0.8686578
3 1661 -1731415 1940145 2241827859 1 11078 -0.1478967 0.7728603 0.8676071 -0.5862306
4 1660 -1820735 1948513 162791738 1 1291 0.0855441 1.2870676 0.6329681 -0.5554242
5 1661 -1782702 1959695 756798182 0 1779 -0.0120086 1.0139513 0.7204713 -0.2293519
6 1567 -2552096 2041811 19990863 1 3300 1.4353538 2.5136590 -0.0070323 1.2443439
Code
head(PA_hyagouaroundi_time2.model.data) %>% kable()
pixel.ID pixel.cell X Y area presabs effort env.bio_7 env.bio_15 env.npp env.elev
1 3734 -817257.8 -439796.5 2.229446e+09 0 11095 3.5626612 0.3534435 -0.7710085 -0.4185148
2 2527 -1139813.8 954656.5 1.542295e+03 1 12276 -0.2985580 1.1812480 -0.2779545 -0.9466047
3 2527 -1168048.8 958569.1 1.049520e-01 1 1585 0.0462369 1.1414007 -0.5703401 -0.9208443
4 2527 -1116769.5 965197.0 7.494574e+01 1 1440 0.2086100 0.7011316 -0.0384092 -0.4651267
5 2527 -1108298.2 967725.2 2.546836e+01 1 400 -0.1381646 1.1098521 0.0808761 -0.9303053
6 2528 -1086336.4 1003862.9 2.398904e-01 1 7164 0.2276377 -2.2790065 0.5782478 1.8938811

Save data

saveRDS(PO_hyagouaroundi_time1.model.data, 'data/species_POPA_data/data_hyagouaroundi_PO_time1.rds')
saveRDS(PO_hyagouaroundi_time2.model.data, 'data/species_POPA_data/data_hyagouaroundi_PO_time2.rds')
saveRDS(PA_hyagouaroundi_time1.model.data, 'data/species_POPA_data/data_hyagouaroundi_PA_time1.rds')
saveRDS(PA_hyagouaroundi_time2.model.data, 'data/species_POPA_data/data_hyagouaroundi_PA_time2.rds')

Extra: Datasets in the covariate space

The range (and interquartile range) of values for each covariate that is sampled by each dataset

Code
getRangeIQR <- function(env_PX){
  env_PX.df <- tibble(env= character(),
                          range = numeric(),
                     iqr = numeric(),
                     stringsAsFactors=FALSE)
  
  for (i in 1:ncol(env_PX)){
    df <- tibble(env = names(env_PX[i]),
             range = range(env_PX[[i]], na.rm=T)[2],
             iqr=  IQR(env_PA[[i]], na.rm = T))
    env_PX.df <- rbind(env_PX.df, df)
  }
  return(env_PX.df)
}

# covariates for PA
env_PA <- terra::extract(x = env, 
                         y = rbind(vect(PA_hyagouaroundi_time1_blobs), 
                                   vect(PA_hyagouaroundi_time2_blobs)), 
                                     fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) 

summary(env_PA[-1])
     bio_7             bio_15           npp             elev         
 Min.   :  6.456   Min.   :12.07   Min.   :    0   Min.   :   3.276  
 1st Qu.: 46.754   1st Qu.:28.38   1st Qu.: 6963   1st Qu.: 194.318  
 Median : 64.065   Median :30.68   Median : 9498   Median : 404.787  
 Mean   : 61.137   Mean   :30.00   Mean   : 9781   Mean   : 557.009  
 3rd Qu.: 76.893   3rd Qu.:32.22   3rd Qu.:12746   3rd Qu.: 814.676  
 Max.   :170.527   Max.   :37.80   Max.   :21085   Max.   :4867.706  
Code
env_PA.RangeIQR <- getRangeIQR(env_PA[-1]) %>% mutate(data='PA')

for (i in 2:ncol(env_PA)){
    range <- range(env_PA[[i]], na.rm=T)[2]
    iqr <- IQR(env_PA[[i]], na.rm = T)
    print(str_glue('PA {names(env_PA[i])} -> range: {range} - IQR: {iqr}'))
}
PA bio_7 -> range: 170.527460734049 - IQR: 30.1394629781462
PA bio_15 -> range: 37.8010549545288 - IQR: 3.83620950153896
PA npp -> range: 21085.2897600446 - IQR: 5783.17029000419
PA elev -> range: 4867.70556640625 - IQR: 620.358253610545
Code
# covariates layers for PO
env_PO <- terra::resample(env, latam_raster, method='bilinear') %>% 
  classify(cbind(NaN, NA)) %>% as.data.frame()

summary(env_PO)
     bio_7             bio_15           npp                 elev         
 Min.   :  6.735   Min.   :10.44   Min.   :    1.935   Min.   :   2.195  
 1st Qu.: 41.522   1st Qu.:29.95   1st Qu.: 4567.937   1st Qu.: 140.181  
 Median : 60.394   Median :31.86   Median : 8232.417   Median : 294.976  
 Mean   : 60.435   Mean   :30.66   Mean   : 7782.353   Mean   : 604.102  
 3rd Qu.: 78.506   3rd Qu.:33.26   3rd Qu.:10876.451   3rd Qu.: 692.125  
 Max.   :154.105   Max.   :41.97   Max.   :17393.445   Max.   :4462.538  
Code
env_PO.RangeIQR <- getRangeIQR(env_PO) %>% mutate(data='PO')

for (i in 1:ncol(env_PO)){
    range <- range(env_PO[[i]], na.rm=T)[2]
    iqr <- IQR(env_PO[[i]], na.rm = T)
    print(str_glue('PO {names(env_PO[i])} -> range: {range} - IQR: {iqr}'))
}
PO bio_7 -> range: 154.105056762695 - IQR: 36.9837703704834
PO bio_15 -> range: 41.9655418395996 - IQR: 3.31285381317139
PO npp -> range: 17393.4453125 - IQR: 6308.51477050781
PO elev -> range: 4462.5380859375 - IQR: 551.944019317627