library(patchwork)
library(knitr)
library(lubridate)
library(terra)
library(sf)
sf::sf_use_s2(FALSE)
library(tidyverse)Nasua nasua Data Generation
Data generation for Nasua nasua (blobs and rasters).
- R Libraries
 
- Basemaps
 
equalareaCRS <-  '+proj=laea +lon_0=-73.125 +lat_0=0 +datum=WGS84 +units=m +no_defs'
latam <- st_read('data/latam.gpkg', layer = 'latam', quiet = T)
countries <- st_read('data/latam.gpkg', layer = 'countries', quiet = T)
latam_land <- st_read('data/latam.gpkg', layer = 'latam_land', quiet = T)
latam_raster <- rast('data/latam_raster.tif', lyrs='latam')
latam_countries <- rast('data/latam_raster.tif', lyrs='countries')
nnasua_IUCN <- sf::st_read('big_data/nnasua_IUCN.shp', quiet = T) %>% sf::st_transform(crs=equalareaCRS)- Dataset
 
data_carnivores_PO <- read_csv('data/data_PO.csv')
data_carnivores_PA <- read_csv('data/data_PA.csv')Check data for each years on example species
Code
# Presence-only
data_carnivores_PO %>% filter(species == 'Nasua nasua') %>% filter(year>=2000) %>% 
  filter(!(countryCode=='UY'& stateProvince=='Maldonado')) %>% # these two records are not wild
  filter(!(countryCode=='CR' | countryCode=='GT')) %>% # these records are not reliable
  mutate(period=ifelse(year<=2013, 'first period: 2000-2013', 'second period: 2014-2021')) %>%
  group_by(species, period) %>% count() %>% pivot_wider(names_from = 'period', values_from = n) %>% kable()| species | first period: 2000-2013 | second period: 2014-2021 | 
|---|---|---|
| Nasua nasua | 465 | 1513 | 
Code
# Presence-absence
data_carnivores_PA %>% filter(species == 'Nasua nasua' & presence ==1) %>% 
  filter(countryCode!='MX') %>%  # this record is not reliable
  mutate(period=ifelse(year(date_start)<=2013, 'first period: 2000-2013', 'second period: 2014-2021')) %>%
  group_by(species, period) %>% count() %>% pivot_wider(names_from = 'period', values_from = n) %>% kable()| species | first period: 2000-2013 | second period: 2014-2021 | 
|---|---|---|
| Nasua nasua | 878 | 1028 | 
Presence-absence data
Create blobs
Blobs contain data on total effort, total temporal span, a list of the records IDs that are included for the blob and an area size of the combined areas.
Code
data_PA_GIS <- data_carnivores_PA %>% 
  filter(species == 'Nasua nasua') %>% 
  filter(!(countryCode=='CR' | countryCode=='GT' | countryCode=='MX' | countryCode=='PA')) %>% # these records are not reliable
  mutate(year=lubridate::year(date_end)) %>% 
  filter(year>=2000, year<=2021) %>% 
  as.data.frame() %>% 
  sf::st_as_sf(coords=c('decimalLongitude', 'decimalLatitude')) %>% 
  st_set_crs(4326) 
PA <- st_transform(data_PA_GIS, crs=equalareaCRS)
# create a blob for all sites
# blobs are definded as areas where some sampling was done over the entire period
periodStart <- ymd('2000-01-01')
periodEnd <- ymd('2014-01-01')
periodMax <- ymd('2020-12-31')
PA_allsites <- PA %>%
  mutate(start_date=as_date(ifelse(date_end-date_start<0, date_end, date_start)),
         end_date=as_date(ifelse(date_end-date_start<0, date_start, date_end))) %>%
  mutate(span=interval(date_start, date_end),
         target= interval(periodEnd, today()),
         period=ifelse(span %within% target, 'time2', 'time1')) %>% 
  distinct(independentLocation, period, .keep_all = T) 
PA_allsites_buff <- st_buffer(PA_allsites, sqrt(PA_allsites$area_m2/pi))
blobs_time1 <- PA_allsites_buff %>% filter(period=='time1') %>% 
  st_union() %>% st_cast('POLYGON') %>% st_sf('ID'= 1:length(.)) 
blobs_time2 <- PA_allsites_buff %>% filter(period=='time2') %>% 
  st_union() %>% st_cast('POLYGON') %>% st_sf('ID'= 1:length(.)) 
# to calculate the effort in days for each blob, we need to differenciate the two periods
PA_allsites_effort_and_time  <- PA %>%
  mutate(start_date=as_date(ifelse(date_end-date_start<0, date_end, date_start)),
         end_date=as_date(ifelse(date_end-date_start<0, date_start, date_end))) %>%
  mutate(span=interval(date_start, date_end),
         target= interval(periodEnd, today()),
         period=ifelse(span %within% target, 'time2', 'time1')) %>% 
  group_by(independentLocation, independentYearSpan, area_m2) %>%
  summarise(effort=max(effort),
            period=paste(unique(period)),
            recordIDs=paste(unique(recordID), collapse = ';'),
            maxEndDate=max(end_date),
            minStartDate=min(start_date)) %>% 
  mutate(maxTemporalSpan= maxEndDate-minStartDate)
# to calculate the effort in days for each blob, we need to differenciate the two periods
blobs_efforttime_time1 <- st_join(blobs_time1, 
                            PA_allsites_effort_and_time %>% filter(period=='time1'),
                            left=TRUE)  %>% 
  group_by(ID) %>% 
  summarise(effort=sum(effort),
            temporalSpan=sum(maxTemporalSpan),
            recordIDs=paste(unique(recordIDs), collapse = ';')) %>% 
  mutate(blobArea=st_area(.)) 
blobs_efforttime_time2 <- st_join(blobs_time2, 
                                PA_allsites_effort_and_time %>% filter(period=='time2'),
                                left=TRUE)  %>% 
  group_by(ID) %>% 
  summarise(effort=sum(effort),
            temporalSpan=sum(maxTemporalSpan),
            recordIDs=paste(unique(recordIDs), collapse = ';')) %>% 
  mutate(blobArea=st_area(.))Calculate PA blobs
Code
# function  to create blobs for time1 and time2 for each species
calculate_PA_sp_blobs <- function(PA, blobs, sp, time){
  periodStart <- ymd('2000-01-01')
  periodEnd <- ymd('2014-01-01')
  df_period <- PA %>% filter(species==sp & presence==1) %>% 
    filter(date_start>=periodStart) %>% 
    mutate(span=interval(date_start, date_end),
           target=interval(periodEnd, today()),
           period=ifelse(span %within% target, 'time2', 'time1')) %>% 
    filter(period==time) %>% select(species, presence)
  
  df_period_blobs <- st_join(blobs, df_period,
                             left=TRUE, join = st_contains) %>%
    group_by(ID) %>% 
    summarise(presence=max(presence),
              temporalSpan=max(temporalSpan),
              effort=max(effort),
              blobArea=max(blobArea)) %>% 
    mutate(presence=ifelse(is.na(presence), 0, presence))
  return(df_period_blobs)
}
# Nasua nasua
PA_nnasua_time1_blobs <- calculate_PA_sp_blobs(PA, blobs_efforttime_time1, 'Nasua nasua', 'time1')
PA_nnasua_time2_blobs <- calculate_PA_sp_blobs(PA, blobs_efforttime_time2, 'Nasua nasua', 'time2')Plots
Code
data_PA_time1_time2 <- rbind(PA_nnasua_time1_blobs %>% mutate(period='time1'), 
                             PA_nnasua_time2_blobs %>% mutate(period='time2'))
gg_PA_time1 <- ggplot() + 
    geom_sf(data=countries, fill='#fafaf8', col='#bfc5c7', size=0.2) +
    geom_sf(data=nnasua_IUCN, fill='grey60', alpha=0.5, col='grey90', size=0.1) +
    geom_sf(data=data_PA_time1_time2 %>% filter(period=='time1') %>% st_buffer(20000), 
            aes(fill=factor(presence)), col=NA, show.legend = F) +
    coord_sf(xlim = c(-3500000, 4100000), ylim = c(-4400000, 3000000)) + # st_bbox(nnasua_IUCN)
    scale_fill_manual(values = c("#474545","#E31A1C"))+
    scale_color_manual(values = c("#474545","#E31A1C")) +
    labs(title='PA', subtitle='time1: 2000-2013', col='') +
    theme_bw() +    
    theme(text=element_text(size = 14)) 
gg_PA_time2 <- ggplot() + 
    geom_sf(data=countries, fill='#fafaf8', col='#bfc5c7', size=0.2) +
    geom_sf(data=nnasua_IUCN, fill='grey60', alpha=0.5, col='grey90', size=0.1) +
    geom_sf(data=data_PA_time1_time2 %>% filter(period=='time2') %>% st_buffer(20000), 
            aes(fill=factor(presence), col=factor(presence)), show.legend = F) +
    coord_sf(xlim = c(-3500000, 4100000), ylim = c(-4400000, 3000000)) + # st_bbox(nnasua_IUCN)
    scale_fill_manual(values = c("#434445","#1F78B4"))+
    scale_color_manual(values = c("#434445","#1F78B4")) +
    labs(title='', subtitle='time2: 2014 to 2021', col='') +
    theme_bw() +    
    theme(text=element_text(size = 14)) 
gg_PA_time1 + gg_PA_time2Presence-only data
Calculate PO rasters
Code
# rasterisation of the presence-only data
data_PO_GIS <- data_carnivores_PO %>% 
  filter(species == 'Nasua nasua') %>%
  filter(!(countryCode=='UY'& stateProvince=='Maldonado')) %>% # these two records are not wild
  filter(!(countryCode=='CR' | countryCode=='GT')) %>% # these records are not reliable
  mutate(year=lubridate::year(eventDate)) %>% 
  filter(year>=2000, year<=2021) %>% 
  as.data.frame() %>% 
  sf::st_as_sf(coords=c('decimalLongitude', 'decimalLatitude')) %>% 
  st_set_crs(4326) 
PO <- st_transform(data_PO_GIS, crs=equalareaCRS)
# functions to create presence-only rasters for time1 and time2 for each species
calculate_PO_sp_period_raster <- function(PO, sp, raster, time){
  periodStart <- 2000
  periodEnd <- 2014
  df_period <- PO %>% filter(species==sp & !is.na(year)) %>% 
    filter(year>=periodStart) %>% 
    mutate(period=ifelse(year>=periodEnd , 'time2', 'time1'),
           occurrence=1) %>% filter(period==time)
  
  df_period$countryCode <- as.factor(df_period$countryCode)
  
  df_period_raster <- terra::rasterize(x = vect(df_period),
                                       y = raster,
                                       field = 'occurrence',
                                       fun = 'count', na.rm=T,
                                       sum = T, background=0) %>% mask(., raster)
  
  
  df_country_raster <- terra::rasterize(x = vect(df_period),
                                        y = raster,
                                        field = 'countryCode',
                                        fun = first) %>% mask(., raster)
  
  df_country_raster <- as.factor(df_country_raster)
  
  levels(df_country_raster) <- data.frame(
    ID = 0:(length(levels(df_period$countryCode))-1),
    countryCode = levels(df_period$countryCode)
  )
  
  names(df_period_raster) <- 'count'
  names(df_country_raster) <- 'country'
  df_raster <- c(df_period_raster, df_country_raster)
  return(df_raster)
}
# Nasua nasua
PO_nnasua_time1_raster <- calculate_PO_sp_period_raster(PO, 'Nasua nasua', latam_raster, 'time1')
PO_nnasua_time2_raster <- calculate_PO_sp_period_raster(PO, 'Nasua nasua', latam_raster, 'time2')Plots
Code
data_PO_time1_time2 <- data_PO_GIS %>% 
  mutate(period=ifelse(year>=2014 , 'time2', 'time1'), occurrence=1) 
gg_PO_time1 <- ggplot() + 
    geom_sf(data=countries, fill='#fafaf8', col='#bfc5c7', size=0.2) +
    geom_sf(data=nnasua_IUCN, fill='grey60', alpha=0.5, col='grey90', size=0.1) +
    geom_sf(data=data_PO_time1_time2 %>% filter(period=='time1'), col="#E31A1C", size=0.5, show.legend = F) +
    coord_sf(xlim = c(-3500000, 4100000), ylim = c(-4400000, 3000000)) + # st_bbox(nnasua_IUCN)
    labs(title='PO', subtitle='time1: 2000-2013', col='') +
    theme_bw() +
    theme(text=element_text(size = 14))
gg_PO_time2 <- ggplot() + 
    geom_sf(data=countries, fill='#fafaf8', col='#bfc5c7', size=0.2) +
    geom_sf(data=nnasua_IUCN, fill='grey60', alpha=0.5, col='grey90', size=0.1) +
    geom_sf(data=data_PO_time1_time2 %>% filter(period=='time2'), col="#1F78B4", size=0.5, show.legend = F) +
    coord_sf(xlim = c(-3500000, 4100000), ylim = c(-4400000, 3000000)) + # st_bbox(nnasua_IUCN)
    labs(title='PO', subtitle='time2: 2014 to 2021', col='') +
    theme_bw() +
    theme(text=element_text(size = 14))
    
gg_PO_time1 + gg_PO_time2Save data
# blobs
saveRDS(PA_nnasua_time1_blobs, 'data/species_POPA_data/PA_nnasua_time1_blobs.rds')
saveRDS(PA_nnasua_time2_blobs, 'data/species_POPA_data/PA_nnasua_time2_blobs.rds')
# presence-only rasters
terra::writeRaster(PO_nnasua_time1_raster, 'data/species_POPA_data/PO_nnasua_time1_raster.tif', overwrite=TRUE)
terra::writeRaster(PO_nnasua_time2_raster, 'data/species_POPA_data/PO_nnasua_time2_raster.tif', overwrite=TRUE)
saveRDS(data_PA_time1_time2, 'data/species_POPA_data/data_nnasua_PA.rds')
saveRDS(data_PO_time1_time2, 'data/species_POPA_data/data_nnasua_PO.rds')