Leopardus pardalis Data Preparation

Author

Florencia Grattarola

Published

October 1, 2024

Data preparation for modelling for Leopardus pardalis, with the covariates bio_10, bio_17, npp, and tree (selected in the previous step).

library(knitr)
library(lubridate)
library(tmap)
library(leaflet)
library(patchwork)
library(terra)
library(sf)
sf::sf_use_s2(FALSE)
library(tidyverse)
equalareaCRS <-  '+proj=laea +lon_0=-73.125 +lat_0=0 +datum=WGS84 +units=m +no_defs'
latam <- st_read('data/latam.gpkg', layer = 'latam', quiet = T)
countries <- st_read('data/latam.gpkg', layer = 'countries', quiet = T)
latam_land <- st_read('data/latam.gpkg', layer = 'latam_land', quiet = T)
latam_raster <- rast('data/latam_raster.tif', lyrs='latam')
latam_countries <- rast('data/latam_raster.tif', lyrs='countries')

# IUCN range distribution
lpardalis_IUCN <- sf::st_read('big_data/lpardalis_IUCN.shp', quiet = T) %>% sf::st_transform(crs=equalareaCRS)

MAP.IUCN <- tm_graticules(alpha = 0.3) +
    tm_shape(countries) +
    tm_fill(col='grey90') +
    tm_borders(col='grey60', alpha = 0.4) +
    tm_shape(lpardalis_IUCN) +
    tm_fill(col='grey20', alpha = 0.4) +
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

MAP.IUCN

Data

Species data (PA & PO)

# presence-only rasters
PO_lpardalis_time1_raster <- terra::rast('data/species_POPA_data/PO_lpardalis_time1_raster.tif')
PO_lpardalis_time2_raster <- terra::rast('data/species_POPA_data/PO_lpardalis_time2_raster.tif')

# blobs
PA_lpardalis_time1_blobs <- readRDS('data/species_POPA_data/PA_lpardalis_time1_blobs.rds')
PA_lpardalis_time2_blobs <- readRDS('data/species_POPA_data/PA_lpardalis_time2_blobs.rds')

Covariates

The following covariates were chosen using the ‘variableSelection’ script:

  • bio_10: Mean Temperature of Warmest Quarter
  • bio_17: Precipitation of Driest Quarter
  • npp: Net Primary Production (npp)
  • tree: Percentage of Tree Cover
Code
bio <- rast(here::here('big_data/bio_high.tif'))
tree <- rast(here::here('big_data/tree_high.tif'))
tree <- resample(tree, bio, method='bilinear') 
names(tree) <- 'tree'
npp <- rast(here::here('big_data/npp_high.tif'))
npp <- resample(npp, bio, method='bilinear') 
names(npp) <- 'npp'

# environmental layers for PA
env <- c(bio[[c('bio_10','bio_17')]], tree, npp) %>% 
  classify(cbind(NaN, NA))

# resample and scaled values for PO
env_100k_unscaled <- terra::resample(env, latam_raster, method='bilinear') %>% 
  classify(cbind(NaN, NA))

env_100k_resampled <- env_100k_unscaled %>% scale() # scaled values to 0 mean and sd of 1

Plots

Code
# use unscaled values

tree.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["tree"]]) +
    tm_raster(palette = 'BrBG', midpoint = NA, style= "cont", n=10)  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

npp.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["npp"]]) +
    tm_raster(palette = 'BuGn', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

bio_10.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["bio_10"]]) +
    tm_raster(palette = 'RdYlBu', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

bio_17.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["bio_17"]]) +
    tm_raster(palette = 'PuOr', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

tree.plot 

Code
npp.plot

Code
bio_10.plot

Code
bio_17.plot

Thinning parametres

  • acce: Accessibility
  • country: Country of origin
Code
acce_100k_resampled <- terra::rast('big_data/acce.tif') %>% 
  classify(cbind(NaN, NA)) # values are scaled

Plot

acce.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(acce_100k_resampled[["acce"]]) +
    tm_raster(palette = 'RdYlBu', midpoint = NA, style= "cont", n=10)  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

acce.plot

Data for the model

Presence-only data

Code
# calculate area in each raster cell
PO_lpardalis.area <- terra::cellSize(PO_lpardalis_time1_raster[[1]], unit='m', transform=F) %>% 
  classify(cbind(NaN, NA)) %>% terra::values()

# calculate coordinates for each raster cell
PO_lpardalis.coords <- terra::xyFromCell(PO_lpardalis_time1_raster, cell=1:ncell(PO_lpardalis_time1_raster)) %>% as.data.frame()
names(PO_lpardalis.coords) <- c('X', 'Y')

# number of cells in the rasters (both the same)
PO_lpardalis.cell <- 1:ncell(PO_lpardalis_time1_raster)

# with the environmental variables values for each cell
PO_lpardalis.env <- env_100k_resampled[] # scaled values

# with the mean accessibility value for each cell
PO_lpardalis.acce <- acce_100k_resampled # scaled values

# calculate point count in each raster cell
PO_lpardalis_time1.count <- PO_lpardalis_time1_raster %>%
    classify(cbind(NaN, NA)) %>% terra::values() %>% as_tibble %>% 
  dplyr::select(count)

PO_lpardalis_time2.count <- PO_lpardalis_time2_raster %>%
    classify(cbind(NaN, NA)) %>% terra::values() %>% as_tibble %>% 
  dplyr::select(count)

PO_lpardalis.countries <- latam_countries %>% classify(cbind(NaN, NA))

# save time1 and time2 datasets 
PO_lpardalis_time1.model.data <- data.frame(PO_lpardalis.coords,
                                            pixel = PO_lpardalis.cell[],
                                            area = PO_lpardalis.area,
                                            pt.count = PO_lpardalis_time1.count,
                                            env = PO_lpardalis.env,
                                            acce = PO_lpardalis.acce[], 
                                            country = PO_lpardalis.countries[])

PO_lpardalis_time2.model.data <- data.frame(PO_lpardalis.coords,
                                            pixel = PO_lpardalis.cell[],
                                            area = PO_lpardalis.area,
                                            pt.count = PO_lpardalis_time2.count,
                                            env = PO_lpardalis.env,
                                            acce = PO_lpardalis.acce[], 
                                            country = PO_lpardalis.countries[])
  • Check data
Code
head(PO_lpardalis_time1.model.data) %>% kable()
X Y pixel area count env.bio_10 env.bio_17 env.tree env.npp acce countries
-4357686 3867905 1 1e+10 NA NA NA NA NA NA NA
-4257686 3867905 2 1e+10 NA NA NA NA NA NA NA
-4157686 3867905 3 1e+10 0 -1.690030 1.194141 -1.055997 -1.194929 0.0115694 47
-4057686 3867905 4 1e+10 0 -1.674205 2.200438 -1.303418 -1.597718 0.0209173 47
-3957686 3867905 5 1e+10 NA -1.680337 2.434686 -1.391160 -1.786366 0.0270081 NA
-3857686 3867905 6 1e+10 NA -1.600721 2.471932 -1.407916 -1.858230 0.0456958 NA
Code
head(PO_lpardalis_time2.model.data) %>% kable()
X Y pixel area count env.bio_10 env.bio_17 env.tree env.npp acce countries
-4357686 3867905 1 1e+10 NA NA NA NA NA NA NA
-4257686 3867905 2 1e+10 NA NA NA NA NA NA NA
-4157686 3867905 3 1e+10 0 -1.690030 1.194141 -1.055997 -1.194929 0.0115694 47
-4057686 3867905 4 1e+10 0 -1.674205 2.200438 -1.303418 -1.597718 0.0209173 47
-3957686 3867905 5 1e+10 NA -1.680337 2.434686 -1.391160 -1.786366 0.0270081 NA
-3857686 3867905 6 1e+10 NA -1.600721 2.471932 -1.407916 -1.858230 0.0456958 NA

Presence-absence data

Code
# calculate area, coordinates, and point count in each raster cell
PA_lpardalis.area.time1 <- as.numeric(PA_lpardalis_time1_blobs$blobArea) 
PA_lpardalis.area.time2 <- as.numeric(PA_lpardalis_time2_blobs$blobArea) 

PA_lpardalis.coords.time1 <- st_coordinates(st_centroid(PA_lpardalis_time1_blobs)) %>% as_tibble()
PA_lpardalis.coords.time2 <- st_coordinates(st_centroid(PA_lpardalis_time2_blobs)) %>% as_tibble()

PA_lpardalis.pixel.time1 <- terra::cells(PO_lpardalis_time1_raster, vect(st_centroid(PA_lpardalis_time1_blobs))) 
PA_lpardalis.pixel.time2 <- terra::cells(PO_lpardalis_time2_raster, vect(st_centroid(PA_lpardalis_time2_blobs))) 

# mode to calculate the most frequent value for the blob - this needs to be changed to getting the %are for each landcover
mode_raster  <- function(x, na.rm = FALSE) {
  if(na.rm){ x = x[!is.na(x)]}
  ux <- unique(x)
  return(ux[which.max(tabulate(match(x, ux)))])
}

PA_lpardalis.env.time1 <- terra::extract(x = env, 
                                         y = vect(PA_lpardalis_time1_blobs), 
                                         fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) %>% scale()

PA_lpardalis.env.time2 <- terra::extract(x = env, 
                                         y = vect(PA_lpardalis_time2_blobs), 
                                         fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) %>% scale()

PA_lpardalis_time1.model.data <- data.frame(pixel = PA_lpardalis.pixel.time1, 
                                            PA_lpardalis.coords.time1,
                                            area = PA_lpardalis.area.time1,
                                            presabs = PA_lpardalis_time1_blobs$presence,
                                            effort = PA_lpardalis_time1_blobs$effort,
                                            env = PA_lpardalis.env.time1[,2:5])

PA_lpardalis_time2.model.data <- data.frame(pixel = PA_lpardalis.pixel.time2, 
                                            PA_lpardalis.coords.time2,
                                            area = PA_lpardalis.area.time2,
                                            presabs = PA_lpardalis_time2_blobs$presence,
                                            effort = PA_lpardalis_time2_blobs$effort,
                                            env = PA_lpardalis.env.time2[,2:5])
  • Check data
Code
head(PA_lpardalis_time1.model.data) %>% kable()
pixel.ID pixel.cell X Y area presabs effort env.bio_10 env.bio_17 env.tree env.npp
1 1661 -1768526 1918184 241873943 1 1043 -0.4793405 -0.1603222 0.4985362 0.8189866
2 1660 -1844290 1927648 537081878 1 2404 -0.0673708 -0.0641428 0.2921552 0.3621302
3 1661 -1731415 1940145 2241827859 1 11078 0.3368660 -0.1110816 0.4185537 0.8676071
4 1660 -1820735 1948513 162791738 1 1291 -0.4023207 0.0491335 0.4079050 0.6329681
5 1661 -1782702 1959695 756798182 1 1779 -0.0803951 0.1679013 0.4614976 0.7204713
6 1567 -2552096 2041811 19990863 0 3300 -1.5983222 2.1952419 -1.0809188 -0.0070323
Code
head(PA_lpardalis_time2.model.data) %>% kable()
pixel.ID pixel.cell X Y area presabs effort env.bio_10 env.bio_17 env.tree env.npp
1 3734 -817257.8 -439796.5 2.229446e+09 1 11095 -1.0349106 -1.072807 -0.5731781 -0.7710085
2 2527 -1139813.8 954656.5 1.542295e+03 1 12276 0.5494966 -1.148757 1.1800694 -0.2779545
3 2527 -1168048.8 958569.1 1.049520e-01 1 1585 0.8570783 -1.142916 1.6631377 -0.5703401
4 2527 -1116769.5 965197.0 7.494574e+01 1 1440 0.5462587 -1.196240 1.4473626 -0.0384092
5 2527 -1108298.2 967725.2 2.546836e+01 1 400 1.4481558 -1.343220 1.2961172 0.0808761
6 2528 -1086336.4 1003862.9 2.398904e-01 1 7164 2.2519956 -1.614579 1.0532113 0.5782478

Save data

saveRDS(PO_lpardalis_time1.model.data, 'data/species_POPA_data/data_lpardalis_PO_time1.rds')
saveRDS(PO_lpardalis_time2.model.data, 'data/species_POPA_data/data_lpardalis_PO_time2.rds')
saveRDS(PA_lpardalis_time1.model.data, 'data/species_POPA_data/data_lpardalis_PA_time1.rds')
saveRDS(PA_lpardalis_time2.model.data, 'data/species_POPA_data/data_lpardalis_PA_time2.rds')

Extra: Datasets in the covariate space

The range (and interquartile range) of values for each covariate that is sampled by each dataset

Code
getRangeIQR <- function(env_PX){
  env_PX.df <- tibble(env= character(),
                          range = numeric(),
                     iqr = numeric(),
                     stringsAsFactors=FALSE)
  
  for (i in 1:ncol(env_PX)){
    df <- tibble(env = names(env_PX[i]),
             range = range(env_PX[[i]], na.rm=T)[2],
             iqr=  IQR(env_PA[[i]], na.rm = T))
    env_PX.df <- rbind(env_PX.df, df)
  }
  return(env_PX.df)
}

# covariates for PA
env_PA <- terra::extract(x = env, 
                         y = rbind(vect(PA_lpardalis_time1_blobs), 
                                   vect(PA_lpardalis_time2_blobs)), 
                                     fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) 

summary(env_PA[-1])
     bio_10             bio_17            tree               npp       
 Min.   :   5.066   Min.   : 8.486   Min.   :  0.2019   Min.   :    0  
 1st Qu.: 372.000   1st Qu.:15.680   1st Qu.: 19.7279   1st Qu.: 6963  
 Median : 476.471   Median :17.499   Median : 37.9586   Median : 9498  
 Mean   : 477.946   Mean   :16.916   Mean   : 44.8148   Mean   : 9781  
 3rd Qu.: 591.082   3rd Qu.:18.477   3rd Qu.: 65.9958   3rd Qu.:12746  
 Max.   :1039.348   Max.   :32.448   Max.   :200.0000   Max.   :21085  
Code
env_PA.RangeIQR <- getRangeIQR(env_PA[-1]) %>% mutate(data='PA')

for (i in 2:ncol(env_PA)){
    range <- range(env_PA[[i]], na.rm=T)[2]
    iqr <- IQR(env_PA[[i]], na.rm = T)
    print(str_glue('PA {names(env_PA[i])} -> range: {range} - IQR: {iqr}'))
}
PA bio_10 -> range: 1039.34758506756 - IQR: 219.082695007324
PA bio_17 -> range: 32.4479134503533 - IQR: 2.79695015047445
PA tree -> range: 200 - IQR: 46.2679435287136
PA npp -> range: 21085.2897600446 - IQR: 5783.17029000419
Code
# covariates layers for PO
env_PO <- terra::resample(env, latam_raster, method='bilinear') %>% 
  classify(cbind(NaN, NA)) %>% as.data.frame()

summary(env_PO)
     bio_10              bio_17            tree               npp           
 Min.   :   0.1098   Min.   : 8.357   Min.   :  0.1423   Min.   :    1.935  
 1st Qu.: 205.2004   1st Qu.:12.711   1st Qu.: 14.4756   1st Qu.: 4567.937  
 Median : 332.9163   Median :17.435   Median : 29.7706   Median : 8232.417  
 Mean   : 351.7265   Mean   :18.803   Mean   : 35.7296   Mean   : 7782.353  
 3rd Qu.: 479.6892   3rd Qu.:23.763   3rd Qu.: 57.8680   3rd Qu.:10876.451  
 Max.   :1463.8673   Max.   :39.095   Max.   :111.9899   Max.   :17393.445  
Code
env_PO.RangeIQR <- getRangeIQR(env_PO) %>% mutate(data='PO')

for (i in 1:ncol(env_PO)){
    range <- range(env_PO[[i]], na.rm=T)[2]
    iqr <- IQR(env_PO[[i]], na.rm = T)
    print(str_glue('PO {names(env_PO[i])} -> range: {range} - IQR: {iqr}'))
}
PO bio_10 -> range: 1463.86730957031 - IQR: 274.488872528076
PO bio_17 -> range: 39.0950622558594 - IQR: 11.0515887737274
PO tree -> range: 111.989860534668 - IQR: 43.3923239707947
PO npp -> range: 17393.4453125 - IQR: 6308.51477050781