Leopardus wiedii Data Preparation

Author

Florencia Grattarola

Published

October 1, 2024

Data preparation for modelling for Leopardus wiedii, with the covariates bio_7, bio_10, npp, and nontree (selected in the previous step).

library(knitr)
library(lubridate)
library(tmap)
library(leaflet)
library(patchwork)
library(terra)
library(sf)
sf::sf_use_s2(FALSE)
library(tidyverse)
equalareaCRS <-  '+proj=laea +lon_0=-73.125 +lat_0=0 +datum=WGS84 +units=m +no_defs'
latam <- st_read('data/latam.gpkg', layer = 'latam', quiet = T)
countries <- st_read('data/latam.gpkg', layer = 'countries', quiet = T)
latam_land <- st_read('data/latam.gpkg', layer = 'latam_land', quiet = T)
latam_raster <- rast('data/latam_raster.tif', lyrs='latam')
latam_countries <- rast('data/latam_raster.tif', lyrs='countries')

# IUCN range distribution
lwiedii_IUCN <- sf::st_read('big_data/lwiedii_IUCN.shp', quiet = T) %>% sf::st_transform(crs=equalareaCRS)

MAP.IUCN <- tm_graticules(alpha = 0.3) +
    tm_shape(countries) +
    tm_fill(col='grey90') +
    tm_borders(col='grey60', alpha = 0.4) +
    tm_shape(lwiedii_IUCN) +
    tm_fill(col='grey20', alpha = 0.4) +
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

MAP.IUCN

Data

Species data (PA & PO)

# presence-only rasters
PO_lwiedii_time1_raster <- terra::rast('data/species_POPA_data/PO_lwiedii_time1_raster.tif')
PO_lwiedii_time2_raster <- terra::rast('data/species_POPA_data/PO_lwiedii_time2_raster.tif')

# blobs
PA_lwiedii_time1_blobs <- readRDS('data/species_POPA_data/PA_lwiedii_time1_blobs.rds')
PA_lwiedii_time2_blobs <- readRDS('data/species_POPA_data/PA_lwiedii_time2_blobs.rds')

Covariates

The following covariates were chosen using the ‘variableSelection’ script:

  • bio_7: Temperature Annual Range
  • bio_10: Mean Temperature of Driest Quarter
  • npp: Net Primary Production (npp)
  • nontree: Percentage of No Tree Cover
Code
bio <- rast('big_data/bio_high.tif')
npp <- rast('big_data/npp_high.tif')
npp <- resample(npp, bio, method='bilinear') 
names(npp) <- 'npp'
nontree <- rast('big_data/nontree_high.tif')
nontree <- resample(nontree, bio, method='bilinear') 
names(nontree) <- 'nontree'

# environmental layers for PA
env <- c(bio[[c('bio_7','bio_10')]], nontree, npp) %>% 
  classify(cbind(NaN, NA))

# resample and scaled values for PO
env_100k_unscaled <- terra::resample(env, latam_raster, method='bilinear') %>% 
  classify(cbind(NaN, NA))

env_100k_resampled <- env_100k_unscaled %>% scale() # scaled values to 0 mean and sd of 1

Plots

nontree.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["nontree"]]) +
    tm_raster(palette = 'YlGn', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

npp.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["npp"]]) +
    tm_raster(palette = 'Greens', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

bio_7.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["bio_7"]]) +
    tm_raster(palette = 'RdBu', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

bio_10.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["bio_10"]]) +
    tm_raster(palette = 'YlOrBr', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

nontree.plot 

npp.plot

bio_7.plot

bio_10.plot

Thinning parametres

  • acce: Accessibility
  • country: Country of origin
Code
acce_100k_resampled <- terra::rast('big_data/acce.tif') %>% 
  classify(cbind(NaN, NA)) # values are scaled

Plot

acce.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(acce_100k_resampled[["acce"]]) +
    tm_raster(palette = 'RdYlBu', midpoint = NA, style= "cont", n=10)  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

acce.plot

Data for the model

Presence-only data

Code
# calculate area in each raster cell
PO_lwiedii.area <- terra::cellSize(PO_lwiedii_time1_raster[[1]], unit='m', transform=F) %>% 
  classify(cbind(NaN, NA)) %>% terra::values()

# calculate coordinates for each raster cell
PO_lwiedii.coords <- terra::xyFromCell(PO_lwiedii_time1_raster, cell=1:ncell(PO_lwiedii_time1_raster)) %>% as.data.frame()
names(PO_lwiedii.coords) <- c('X', 'Y')

# number of cells in the rasters (both the same)
PO_lwiedii.cell <- 1:ncell(PO_lwiedii_time1_raster)

# with the environmental variables values for each cell
PO_lwiedii.env <- env_100k_resampled[] # scaled values

# with the mean accessibility value for each cell
PO_lwiedii.acce <- acce_100k_resampled # scaled values

# calculate point count in each raster cell
PO_lwiedii_time1.count <- PO_lwiedii_time1_raster %>%
    classify(cbind(NaN, NA)) %>% terra::values() %>% as_tibble %>% 
  dplyr::select(count)

PO_lwiedii_time2.count <- PO_lwiedii_time2_raster %>%
    classify(cbind(NaN, NA)) %>% terra::values() %>% as_tibble %>% 
  dplyr::select(count)

PO_lwiedii.countries <- latam_countries %>% classify(cbind(NaN, NA))

# save time1 and time2 datasets 
PO_lwiedii_time1.model.data <- data.frame(PO_lwiedii.coords,
                                            pixel = PO_lwiedii.cell[],
                                            area = PO_lwiedii.area,
                                            pt.count = PO_lwiedii_time1.count,
                                            env = PO_lwiedii.env,
                                            acce = PO_lwiedii.acce[], 
                                            country = PO_lwiedii.countries[])

PO_lwiedii_time2.model.data <- data.frame(PO_lwiedii.coords,
                                            pixel = PO_lwiedii.cell[],
                                            area = PO_lwiedii.area,
                                            pt.count = PO_lwiedii_time2.count,
                                            env = PO_lwiedii.env,
                                            acce = PO_lwiedii.acce[], 
                                            country = PO_lwiedii.countries[])
  • Check data
Code
head(PO_lwiedii_time1.model.data) %>% kable()
X Y pixel area count env.bio_7 env.bio_10 env.nontree env.npp acce countries
-4357686 3867905 1 1e+10 NA NA NA NA NA NA NA
-4257686 3867905 2 1e+10 NA NA NA NA NA NA NA
-4157686 3867905 3 1e+10 0 0.9201098 -1.690030 0.4573118 -1.194929 0.0115694 47
-4057686 3867905 4 1e+10 0 0.2354014 -1.674205 -0.6126802 -1.597718 0.0209173 47
-3957686 3867905 5 1e+10 NA -0.2380332 -1.680337 -0.9573888 -1.786366 0.0270081 NA
-3857686 3867905 6 1e+10 NA -0.2966836 -1.600721 -1.6611980 -1.858230 0.0456958 NA
Code
head(PO_lwiedii_time2.model.data) %>% kable()
X Y pixel area count env.bio_7 env.bio_10 env.nontree env.npp acce countries
-4357686 3867905 1 1e+10 NA NA NA NA NA NA NA
-4257686 3867905 2 1e+10 NA NA NA NA NA NA NA
-4157686 3867905 3 1e+10 0 0.9201098 -1.690030 0.4573118 -1.194929 0.0115694 47
-4057686 3867905 4 1e+10 0 0.2354014 -1.674205 -0.6126802 -1.597718 0.0209173 47
-3957686 3867905 5 1e+10 NA -0.2380332 -1.680337 -0.9573888 -1.786366 0.0270081 NA
-3857686 3867905 6 1e+10 NA -0.2966836 -1.600721 -1.6611980 -1.858230 0.0456958 NA

Presence-absence data

Code
# calculate area, coordinates, and point count in each raster cell
PA_lwiedii.area.time1 <- as.numeric(PA_lwiedii_time1_blobs$blobArea) 
PA_lwiedii.area.time2 <- as.numeric(PA_lwiedii_time2_blobs$blobArea) 

PA_lwiedii.coords.time1 <- st_coordinates(st_centroid(PA_lwiedii_time1_blobs)) %>% as_tibble()
PA_lwiedii.coords.time2 <- st_coordinates(st_centroid(PA_lwiedii_time2_blobs)) %>% as_tibble()

PA_lwiedii.pixel.time1 <- terra::cells(PO_lwiedii_time1_raster, vect(st_centroid(PA_lwiedii_time1_blobs))) 
PA_lwiedii.pixel.time2 <- terra::cells(PO_lwiedii_time2_raster, vect(st_centroid(PA_lwiedii_time2_blobs))) 

# mode to calculate the most frequent value for the blob - this needs to be changed to getting the %are for each landcover
mode_raster  <- function(x, na.rm = FALSE) {
  if(na.rm){ x = x[!is.na(x)]}
  ux <- unique(x)
  return(ux[which.max(tabulate(match(x, ux)))])
}

PA_lwiedii.env.time1 <- terra::extract(x = env, 
                                       y = vect(PA_lwiedii_time1_blobs), 
                                       fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) %>% scale()

PA_lwiedii.env.time2 <- terra::extract(x = env, 
                                       y = vect(PA_lwiedii_time2_blobs), 
                                       fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) %>% scale()

PA_lwiedii_time1.model.data <- data.frame(pixel = PA_lwiedii.pixel.time1, 
                                            PA_lwiedii.coords.time1,
                                            area = PA_lwiedii.area.time1,
                                            presabs = PA_lwiedii_time1_blobs$presence,
                                            effort = PA_lwiedii_time1_blobs$effort,
                                            env = PA_lwiedii.env.time1[,2:5])

PA_lwiedii_time2.model.data <- data.frame(pixel = PA_lwiedii.pixel.time2, 
                                            PA_lwiedii.coords.time2,
                                            area = PA_lwiedii.area.time2,
                                            presabs = PA_lwiedii_time2_blobs$presence,
                                            effort = PA_lwiedii_time2_blobs$effort,
                                            env = PA_lwiedii.env.time2[,2:5])
  • Check data
Code
head(PA_lwiedii_time1.model.data) %>% kable()
pixel.ID pixel.cell X Y area presabs effort env.bio_7 env.bio_10 env.nontree env.npp
1 1661 -1768526 1918184 241873943 0 1043 -0.1425514 -0.4793405 -0.9811847 0.8189866
2 1660 -1844290 1927648 537081878 1 2404 0.1760225 -0.0673708 -0.7325427 0.3621302
3 1661 -1731415 1940145 2241827859 1 11078 -0.1478967 0.3368660 -0.8964063 0.8676071
4 1660 -1820735 1948513 162791738 1 1291 0.0855441 -0.4023207 -0.8337618 0.6329681
5 1661 -1782702 1959695 756798182 1 1779 -0.0120086 -0.0803951 -0.9473701 0.7204713
6 1567 -2552096 2041811 19990863 0 3300 1.4353538 -1.5983222 0.3563696 -0.0070323
Code
head(PA_lwiedii_time2.model.data) %>% kable()
pixel.ID pixel.cell X Y area presabs effort env.bio_7 env.bio_10 env.nontree env.npp
1 3734 -817257.8 -439796.5 2.229446e+09 1 11095 3.5626612 -1.0349106 0.3606534 -0.7710085
2 2527 -1139813.8 954656.5 1.542295e+03 1 12276 -0.2985580 0.5494966 -1.1677654 -0.2779545
3 2527 -1168048.8 958569.1 1.049520e-01 1 1585 0.0462369 0.8570783 -1.4965330 -0.5703401
4 2527 -1116769.5 965197.0 7.494574e+01 1 1440 0.2086100 0.5462587 -1.4304748 -0.0384092
5 2527 -1108298.2 967725.2 2.546836e+01 0 400 -0.1381646 1.4481558 -1.2623775 0.0808761
6 2528 -1086336.4 1003862.9 2.398904e-01 1 7164 0.2276377 2.2519956 -0.9842253 0.5782478

Save data

saveRDS(PO_lwiedii_time1.model.data, 'data/species_POPA_data/data_lwiedii_PO_time1.rds')
saveRDS(PO_lwiedii_time2.model.data, 'data/species_POPA_data/data_lwiedii_PO_time2.rds')
saveRDS(PA_lwiedii_time1.model.data, 'data/species_POPA_data/data_lwiedii_PA_time1.rds')
saveRDS(PA_lwiedii_time2.model.data, 'data/species_POPA_data/data_lwiedii_PA_time2.rds')

Extra: Datasets in the covariate space

The range (and interquartile range) of values for each covariate that is sampled by each dataset

Code
getRangeIQR <- function(env_PX){
  env_PX.df <- tibble(env= character(),
                          range = numeric(),
                     iqr = numeric(),
                     stringsAsFactors=FALSE)
  
  for (i in 1:ncol(env_PX)){
    df <- tibble(env = names(env_PX[i]),
             range = range(env_PX[[i]], na.rm=T)[2],
             iqr=  IQR(env_PA[[i]], na.rm = T))
    env_PX.df <- rbind(env_PX.df, df)
  }
  return(env_PX.df)
}

# covariates for PA
env_PA <- terra::extract(x = env, 
                         y = rbind(vect(PA_lwiedii_time1_blobs), 
                                   vect(PA_lwiedii_time2_blobs)), 
                                     fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) 

summary(env_PA[-1])
     bio_7             bio_10            nontree            npp       
 Min.   :  6.456   Min.   :   5.066   Min.   :  2.45   Min.   :    0  
 1st Qu.: 46.754   1st Qu.: 372.000   1st Qu.: 34.94   1st Qu.: 6963  
 Median : 64.065   Median : 476.471   Median : 57.67   Median : 9498  
 Mean   : 61.137   Mean   : 477.946   Mean   : 55.11   Mean   : 9781  
 3rd Qu.: 76.893   3rd Qu.: 591.082   3rd Qu.: 69.74   3rd Qu.:12746  
 Max.   :170.527   Max.   :1039.348   Max.   :200.00   Max.   :21085  
Code
env_PA.RangeIQR <- getRangeIQR(env_PA[-1]) %>% mutate(data='PA')

for (i in 2:ncol(env_PA)){
    range <- range(env_PA[[i]], na.rm=T)[2]
    iqr <- IQR(env_PA[[i]], na.rm = T)
    print(str_glue('PA {names(env_PA[i])} -> range: {range} - IQR: {iqr}'))
}
PA bio_7 -> range: 170.527460734049 - IQR: 30.1394629781462
PA bio_10 -> range: 1039.34758506756 - IQR: 219.082695007324
PA nontree -> range: 200 - IQR: 34.8002891540527
PA npp -> range: 21085.2897600446 - IQR: 5783.17029000419
Code
# covariates layers for PO
env_PO <- terra::resample(env, latam_raster, method='bilinear') %>% 
  classify(cbind(NaN, NA)) %>% as.data.frame()

summary(env_PO)
     bio_7             bio_10             nontree             npp           
 Min.   :  6.735   Min.   :   0.1098   Min.   :  6.068   Min.   :    1.935  
 1st Qu.: 41.522   1st Qu.: 205.2004   1st Qu.: 32.278   1st Qu.: 4567.937  
 Median : 60.394   Median : 332.9163   Median : 52.772   Median : 8232.417  
 Mean   : 60.435   Mean   : 351.7265   Mean   : 49.836   Mean   : 7782.353  
 3rd Qu.: 78.506   3rd Qu.: 479.6892   3rd Qu.: 66.518   3rd Qu.:10876.451  
 Max.   :154.105   Max.   :1463.8673   Max.   :124.838   Max.   :17393.445  
Code
env_PO.RangeIQR <- getRangeIQR(env_PO) %>% mutate(data='PO')

for (i in 1:ncol(env_PO)){
    range <- range(env_PO[[i]], na.rm=T)[2]
    iqr <- IQR(env_PO[[i]], na.rm = T)
    print(str_glue('PO {names(env_PO[i])} -> range: {range} - IQR: {iqr}'))
}
PO bio_7 -> range: 154.105056762695 - IQR: 36.9837703704834
PO bio_10 -> range: 1463.86730957031 - IQR: 274.488872528076
PO nontree -> range: 124.83846282959 - IQR: 34.2391386032104
PO npp -> range: 17393.4453125 - IQR: 6308.51477050781