Eira barbara Data Preparation

Author

Florencia Grattarola

Published

October 1, 2024

Data preparation for modelling for Eira barbara, with the covariates bio10, bio17, npp, and nontree (selected in the previous step).

R Libraries

library(knitr)
library(lubridate)
library(tmap)
library(leaflet)
library(patchwork)
library(terra)
library(sf)
sf::sf_use_s2(FALSE)
library(tidyverse)

Basemaps

equalareaCRS <-  '+proj=laea +lon_0=-73.125 +lat_0=0 +datum=WGS84 +units=m +no_defs'
latam <- st_read('data/latam.gpkg', layer = 'latam', quiet = T)
countries <- st_read('data/latam.gpkg', layer = 'countries', quiet = T)
latam_land <- st_read('data/latam.gpkg', layer = 'latam_land', quiet = T)
latam_raster <- rast('data/latam_raster.tif', lyrs='latam')
latam_countries <- rast('data/latam_raster.tif', lyrs='countries')

# IUCN range distribution
ebarbara_IUCN <- sf::st_read('big_data/ebarbara_IUCN.shp', quiet = T) %>% sf::st_transform(crs=equalareaCRS)

MAP.IUCN <- tm_graticules(alpha = 0.3) +
    tm_shape(countries) +
    tm_fill(col='grey90') +
    tm_borders(col='grey60', alpha = 0.4) +
    tm_shape(ebarbara_IUCN) +
    tm_fill(col='grey20', alpha = 0.4) +
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

MAP.IUCN

Data

Species data (PA & PO)

# presence-only rasters
PO_ebarbara_time1_raster <- terra::rast('data/species_POPA_data/PO_ebarbara_time1_raster.tif')
PO_ebarbara_time2_raster <- terra::rast('data/species_POPA_data/PO_ebarbara_time2_raster.tif')

# blobs
PA_ebarbara_time1_blobs <- readRDS('data/species_POPA_data/PA_ebarbara_time1_blobs.rds')
PA_ebarbara_time2_blobs <- readRDS('data/species_POPA_data/PA_ebarbara_time2_blobs.rds')

Covariates

The following covariates were chosen using the ‘variableSelection’ script:

bio10: Temperature Annual Range
bio17: Mean Temperature of Warmest Quarter
npp: Net Primary Production (npp)
nontree: Percentage of No Tree Cover

Code

bio <- rast(here::here('big_data/bio_high.tif'))
npp <- rast(here::here('big_data/npp_high.tif'))
npp <- resample(npp, bio, method='bilinear')


|---------|---------|---------|---------|
=========================================

Code

names(npp) <- 'npp'
nontree <- rast(here::here('big_data/nontree_high.tif'))
nontree <- resample(nontree, bio, method='bilinear')


|---------|---------|---------|---------|
=========================================

Code

names(nontree) <- 'nontree'

# environmental layers for PA
env <- c(bio[[c('bio_10','bio_17')]], nontree, npp) %>% 
  classify(cbind(NaN, NA))


|---------|---------|---------|---------|
=========================================

Code

# resample and scaled values for PO
env_100k_unscaled <- terra::resample(env, latam_raster, method='bilinear') %>% 
  classify(cbind(NaN, NA))

env_100k_resampled <- env_100k_unscaled %>% scale() # scaled values to 0 mean and sd of 1

Plots

nontree.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["nontree"]]) +
    tm_raster(palette = 'YlGn', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

npp.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["npp"]]) +
    tm_raster(palette = 'BuGn', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

bio_10.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["bio_10"]]) +
    tm_raster(palette = 'YlOrRd', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

bio_17.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(env_100k_unscaled[["bio_17"]]) +
    tm_raster(palette = 'PuBu', midpoint = NA, style= "cont")  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

nontree.plot

npp.plot

bio_10.plot

bio_17.plot

Thinning parametres

acce: Accessibility
country: Country of origin

Code

acce_100k_resampled <- terra::rast('big_data/acce.tif') %>% 
  classify(cbind(NaN, NA)) # values are scaled

Plot

acce.plot <- tm_graticules(alpha = 0.3) +  
    tm_shape(acce_100k_resampled[["acce"]]) +
    tm_raster(palette = 'RdYlBu', midpoint = NA, style= "cont", n=10)  + 
    tm_shape(latam) +
    tm_borders(alpha = 0.3) + 
    tm_layout(legend.outside = T, frame.lwd = 0.3, scale=1.2, legend.outside.size = 0.1)

acce.plot

Data for the model

Presence-only data

Code

# calculate area in each raster cell
PO_ebarbara.area <- terra::cellSize(PO_ebarbara_time1_raster[[1]], unit='m', transform=F) %>% 
  classify(cbind(NaN, NA)) %>% terra::values()

# calculate coordinates for each raster cell
PO_ebarbara.coords <- terra::xyFromCell(PO_ebarbara_time1_raster, cell=1:ncell(PO_ebarbara_time1_raster)) %>% as.data.frame()
names(PO_ebarbara.coords) <- c('X', 'Y')

# number of cells in the rasters (both the same)
PO_ebarbara.cell <- 1:ncell(PO_ebarbara_time1_raster)

# with the environmental variables values for each cell
PO_ebarbara.env <- env_100k_resampled[] # scaled values

# with the mean accessibility value for each cell
PO_ebarbara.acce <- acce_100k_resampled # scaled values

# calculate point count in each raster cell
PO_ebarbara_time1.count <- PO_ebarbara_time1_raster %>%
    classify(cbind(NaN, NA)) %>% terra::values() %>% as_tibble %>% 
  dplyr::select(count)

PO_ebarbara_time2.count <- PO_ebarbara_time2_raster %>%
    classify(cbind(NaN, NA)) %>% terra::values() %>% as_tibble %>% 
  dplyr::select(count)

PO_ebarbara.countries <- latam_countries %>% classify(cbind(NaN, NA))

# save time1 and time2 datasets 
PO_ebarbara_time1.model.data <- data.frame(PO_ebarbara.coords,
                                            pixel = PO_ebarbara.cell[],
                                            area = PO_ebarbara.area,
                                            pt.count = PO_ebarbara_time1.count,
                                            env = PO_ebarbara.env,
                                            acce = PO_ebarbara.acce[], 
                                            country = PO_ebarbara.countries[])

PO_ebarbara_time2.model.data <- data.frame(PO_ebarbara.coords,
                                            pixel = PO_ebarbara.cell[],
                                            area = PO_ebarbara.area,
                                            pt.count = PO_ebarbara_time2.count,
                                            env = PO_ebarbara.env,
                                            acce = PO_ebarbara.acce[], 
                                            country = PO_ebarbara.countries[])

Check data

Code

head(PO_ebarbara_time1.model.data) %>% kable()

X	Y	pixel	area	count	env.bio_10	env.bio_17	env.nontree	env.npp	acce	countries
-4357686	3867905	1	1e+10	NA	NA	NA	NA	NA	NA	NA
-4257686	3867905	2	1e+10	NA	NA	NA	NA	NA	NA	NA
-4157686	3867905	3	1e+10	0	-1.690030	1.194141	0.4573118	-1.194929	0.0115694	47
-4057686	3867905	4	1e+10	0	-1.674205	2.200438	-0.6126802	-1.597718	0.0209173	47
-3957686	3867905	5	1e+10	NA	-1.680337	2.434686	-0.9573888	-1.786366	0.0270081	NA
-3857686	3867905	6	1e+10	NA	-1.600721	2.471932	-1.6611980	-1.858230	0.0456958	NA

Code

head(PO_ebarbara_time2.model.data) %>% kable()

X	Y	pixel	area	count	env.bio_10	env.bio_17	env.nontree	env.npp	acce	countries
-4357686	3867905	1	1e+10	NA	NA	NA	NA	NA	NA	NA
-4257686	3867905	2	1e+10	NA	NA	NA	NA	NA	NA	NA
-4157686	3867905	3	1e+10	0	-1.690030	1.194141	0.4573118	-1.194929	0.0115694	47
-4057686	3867905	4	1e+10	0	-1.674205	2.200438	-0.6126802	-1.597718	0.0209173	47
-3957686	3867905	5	1e+10	NA	-1.680337	2.434686	-0.9573888	-1.786366	0.0270081	NA
-3857686	3867905	6	1e+10	NA	-1.600721	2.471932	-1.6611980	-1.858230	0.0456958	NA

Presence-absence data

Code

# calculate area, coordinates, and point count in each raster cell
PA_ebarbara.area.time1 <- as.numeric(PA_ebarbara_time1_blobs$blobArea) 
PA_ebarbara.area.time2 <- as.numeric(PA_ebarbara_time2_blobs$blobArea) 

PA_ebarbara.coords.time1 <- st_coordinates(st_centroid(PA_ebarbara_time1_blobs)) %>% as_tibble()
PA_ebarbara.coords.time2 <- st_coordinates(st_centroid(PA_ebarbara_time2_blobs)) %>% as_tibble()

PA_ebarbara.pixel.time1 <- terra::cells(PO_ebarbara_time1_raster, vect(st_centroid(PA_ebarbara_time1_blobs))) 
PA_ebarbara.pixel.time2 <- terra::cells(PO_ebarbara_time2_raster, vect(st_centroid(PA_ebarbara_time2_blobs))) 

# mode to calculate the most frequent value for the blob - this needs to be changed to getting the %are for each landcover
mode_raster  <- function(x, na.rm = FALSE) {
  if(na.rm){ x = x[!is.na(x)]}
  ux <- unique(x)
  return(ux[which.max(tabulate(match(x, ux)))])
}

PA_ebarbara.env.time1 <- terra::extract(x = env, 
                                        y = vect(PA_ebarbara_time1_blobs), 
                                        fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) %>% scale()

PA_ebarbara.env.time2 <- terra::extract(x = env, 
                                        y = vect(PA_ebarbara_time2_blobs), 
                                        fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) %>% scale()

PA_ebarbara_time1.model.data <- data.frame(pixel = PA_ebarbara.pixel.time1, 
                                            PA_ebarbara.coords.time1,
                                            area = PA_ebarbara.area.time1,
                                            presabs = PA_ebarbara_time1_blobs$presence,
                                            effort = PA_ebarbara_time1_blobs$effort,
                                            env = PA_ebarbara.env.time1[,2:5])

PA_ebarbara_time2.model.data <- data.frame(pixel = PA_ebarbara.pixel.time2, 
                                            PA_ebarbara.coords.time2,
                                            area = PA_ebarbara.area.time2,
                                            presabs = PA_ebarbara_time2_blobs$presence,
                                            effort = PA_ebarbara_time2_blobs$effort,
                                            env = PA_ebarbara.env.time2[,2:5])

Check data

Code

head(PA_ebarbara_time1.model.data) %>% kable()

pixel.ID	pixel.cell	X	Y	area	presabs	effort	env.bio_10	env.bio_17	env.nontree	env.npp
1	1661	-1768526	1918184	241873943	0	1043	-0.4793405	-0.1603222	-0.9811847	0.8189866
2	1660	-1844290	1927648	537081878	1	2404	-0.0673708	-0.0641428	-0.7325427	0.3621302
3	1661	-1731415	1940145	2241827859	1	11078	0.3368660	-0.1110816	-0.8964063	0.8676071
4	1660	-1820735	1948513	162791738	0	1291	-0.4023207	0.0491335	-0.8337618	0.6329681
5	1661	-1782702	1959695	756798182	0	1779	-0.0803951	0.1679013	-0.9473701	0.7204713
6	1567	-2552096	2041811	19990863	0	3300	-1.5983222	2.1952419	0.3563696	-0.0070323

Code

head(PA_ebarbara_time2.model.data) %>% kable()

pixel.ID	pixel.cell	X	Y	area	presabs	effort	env.bio_10	env.bio_17	env.nontree	env.npp
1	3734	-817257.8	-439796.5	2.229446e+09	1	11095	-1.0349106	-1.072807	0.3606534	-0.7710085
2	2527	-1139813.8	954656.5	1.542295e+03	1	12276	0.5494966	-1.148757	-1.1677654	-0.2779545
3	2527	-1168048.8	958569.1	1.049520e-01	1	1585	0.8570783	-1.142916	-1.4965330	-0.5703401
4	2527	-1116769.5	965197.0	7.494574e+01	1	1440	0.5462587	-1.196240	-1.4304748	-0.0384092
5	2527	-1108298.2	967725.2	2.546836e+01	1	400	1.4481558	-1.343220	-1.2623775	0.0808761
6	2528	-1086336.4	1003862.9	2.398904e-01	1	7164	2.2519956	-1.614579	-0.9842253	0.5782478

Save data

saveRDS(PO_ebarbara_time1.model.data, 'data/species_POPA_data/data_ebarbara_PO_time1.rds')
saveRDS(PO_ebarbara_time2.model.data, 'data/species_POPA_data/data_ebarbara_PO_time2.rds')
saveRDS(PA_ebarbara_time1.model.data, 'data/species_POPA_data/data_ebarbara_PA_time1.rds')
saveRDS(PA_ebarbara_time2.model.data, 'data/species_POPA_data/data_ebarbara_PA_time2.rds')

Extra: Datasets in the covariate space

The range (and interquartile range) of values for each covariate that is sampled by each dataset

Code

getRangeIQR <- function(env_PX){
  env_PX.df <- tibble(env= character(),
                          range = numeric(),
                     iqr = numeric(),
                     stringsAsFactors=FALSE)
  
  for (i in 1:ncol(env_PX)){
    df <- tibble(env = names(env_PX[i]),
             range = range(env_PX[[i]], na.rm=T)[2],
             iqr=  IQR(env_PA[[i]], na.rm = T))
    env_PX.df <- rbind(env_PX.df, df)
  }
  return(env_PX.df)
}

# covariates for PA
env_PA <- terra::extract(x = env, 
                         y = rbind(vect(PA_ebarbara_time1_blobs), 
                                   vect(PA_ebarbara_time2_blobs)), 
                                     fun = mean, na.rm=TRUE) %>% 
  mutate(across(where(is.numeric), ~ifelse(is.nan(.), NA, .))) 

summary(env_PA[-1])

     bio_10             bio_17          nontree            npp       
 Min.   :   5.066   Min.   : 8.486   Min.   :  2.45   Min.   :    0  
 1st Qu.: 372.000   1st Qu.:15.680   1st Qu.: 34.94   1st Qu.: 6963  
 Median : 476.471   Median :17.499   Median : 57.67   Median : 9498  
 Mean   : 477.946   Mean   :16.916   Mean   : 55.11   Mean   : 9781  
 3rd Qu.: 591.082   3rd Qu.:18.477   3rd Qu.: 69.74   3rd Qu.:12746  
 Max.   :1039.348   Max.   :32.448   Max.   :200.00   Max.   :21085

Code

env_PA.RangeIQR <- getRangeIQR(env_PA[-1]) %>% mutate(data='PA')

for (i in 2:ncol(env_PA)){
    range <- range(env_PA[[i]], na.rm=T)[2]
    iqr <- IQR(env_PA[[i]], na.rm = T)
    print(str_glue('PA {names(env_PA[i])} -> range: {range} - IQR: {iqr}'))
}

PA bio_10 -> range: 1039.34758506756 - IQR: 219.082695007324
PA bio_17 -> range: 32.4479134503533 - IQR: 2.79695015047445
PA nontree -> range: 200 - IQR: 34.8002891540527
PA npp -> range: 21085.2897600446 - IQR: 5783.17029000419

Code

# covariates layers for PO
env_PO <- terra::resample(env, latam_raster, method='bilinear') %>% 
  classify(cbind(NaN, NA)) %>% as.data.frame()

summary(env_PO)

     bio_10              bio_17          nontree             npp           
 Min.   :   0.1098   Min.   : 8.357   Min.   :  6.068   Min.   :    1.935  
 1st Qu.: 205.2004   1st Qu.:12.711   1st Qu.: 32.278   1st Qu.: 4567.937  
 Median : 332.9163   Median :17.435   Median : 52.772   Median : 8232.417  
 Mean   : 351.7265   Mean   :18.803   Mean   : 49.836   Mean   : 7782.353  
 3rd Qu.: 479.6892   3rd Qu.:23.763   3rd Qu.: 66.518   3rd Qu.:10876.451  
 Max.   :1463.8673   Max.   :39.095   Max.   :124.838   Max.   :17393.445

Code

env_PO.RangeIQR <- getRangeIQR(env_PO) %>% mutate(data='PO')

for (i in 1:ncol(env_PO)){
    range <- range(env_PO[[i]], na.rm=T)[2]
    iqr <- IQR(env_PO[[i]], na.rm = T)
    print(str_glue('PO {names(env_PO[i])} -> range: {range} - IQR: {iqr}'))
}

PO bio_10 -> range: 1463.86730957031 - IQR: 274.488872528076
PO bio_17 -> range: 39.0950622558594 - IQR: 11.0515887737274
PO nontree -> range: 124.83846282959 - IQR: 34.2391386032104
PO npp -> range: 17393.4453125 - IQR: 6308.51477050781