12_relative-abundance-envtChange.Rmd

---
editor_options: 
  chunk_output_type: console
---

# Species relative abundance as a function of environmental changes

In this script, we will model changes in relative abundance as a function of environmental changes across the historical resurvey locations (environmental changes are calculated as differences in values of a predictor between modern vs. historical time period)

## Load necessary libraries
```{r}
# load libs
library(sf)
library(readr)
library(dplyr)
library(terra)
library(tidyr)
library(mapview)
library(sjPlot)
library(glmmTMB)
library(ggplot2)
library(effects)
library(lme4)
library(DHARMa)
library(ggpubr)
library(bbmle)
library(betareg)
library(extrafont)
library(ggeffects)
library(sjPlot)
library(emmeans)
library(scales)

# function to z-transform data
scale_z <- function(x){
  (x - mean(x, na.rm=TRUE))/sd(x, na.rm=TRUE)
}
```

## Load processed land cover rasters
```{r}
lc_1848 <- terra::rast("results/landcover/1848.tif")
lc_2018 <- terra::rast("results/landcover/2018reclassified.tif")
```

## Load modern resurvey locations
```{r}
# load sites
sites <- read_csv("data/list-of-resurvey-locations.csv")
sites <- distinct(sites, modern_site_code, historical_site_code, longitude, latitude)
sites <- sites[-c(1:50),]
```

## Create spatial data within a 500m buffer around each site
```{r}
# make spatial data from sites, with a 500m buffer
sites_sf = st_as_sf(
  sites, coords = c("longitude", "latitude"),
  crs = 4326
) |> 
  st_transform(32643) |> 
  st_buffer(1000)
```

## Extract land cover data across locations from the 1848 and the 2018 rasters
```{r}
# first, we will polygonize the rasters 
vect1848 <- as.polygons(lc_1848)
vect2018 <- as.polygons(lc_2018)

# get intersection of the historical resurvey sites with 1848 and 2018 vector data
lc_sites <- lapply(
  list(vect1848, vect2018), function(x) {
    st_intersection(st_as_sf(x), sites_sf)
  }
)

# calculate total area for each lc type for sites based on historical site code for each time period
lc_sites <- lapply(lc_sites, function(df) {
  df |> 
    mutate(
      area = as.numeric(st_area(df))
    ) |> 
    st_drop_geometry() |> 
    group_by(modern_site_code, name) |> 
    summarise(total_area = sum(area))
})

# add year and merge
lc_sites <- Map(
  lc_sites, c(1848, 2018),
  f = function(df, y) {
    df$year = y
    df
  }
) |> 
  bind_rows()

# calculate proportion of land cover
lc_sites <- lc_sites %>%
  group_by(modern_site_code, year) %>%
  mutate(site_total = sum(total_area))

lc_sites <- lc_sites %>%
  group_by(modern_site_code, name, year) %>%
  mutate(lcProp = total_area/site_total)
```

## Calculate change in proportions of land cover between time periods

For this calculation, we essentially subtract the proportion of each land cover between time periods to obtain the change
```{r}
# get lc change sites
lc_change_dat <- lc_sites |> 
  pivot_wider(
    id_cols = c("modern_site_code", "name"),
    names_from = c("year"),
    values_from = "lcProp",
    values_fn = mean
  ) |> 
  rename(
    year_1848 = `1848`,
    year_2018 = `2018`
  ) %>%
  left_join(., sites_sf, by="modern_site_code") %>%
  replace(is.na(.), 0) %>%
  mutate("lc_prop_change" = (year_2018 - year_1848))

# save as is for further processing
write_csv(
  lc_change_dat, file = "results/lc_change.csv"
)
```

## Calculate change in climate values at sampling sites
```{r}
# load climate data
raster_temp = list.files("results/climate/", pattern = "t_mean", full.names = T) |> 
  lapply(rast)

raster_rain = list.files("results/climate/", pattern = "ppt", full.names = T) |> 
  lapply(rast)

# get difference between first and last files
climate_change = lapply(
  list(raster_temp, raster_rain),
  function(le) {
    lapply(le, function(le2) {
      le2[[length(names(le2))]] - le2[[1]]
    })
  }
) |> 
  Reduce(f = c) # convert to single list

# get raster names
climate_change_names = lapply(climate_change, function(ra) {
  lapply(ra, function(r) {
    r |> names() |> stringr::str_extract("(.*?)(?=_\\d+)")
  })
}) |> unlist()

# get mean change within 1000m buffer of resurvey locs
climate_change_sites = lapply(
  climate_change, function(ra) {
    terra::extract(
      ra, 
      y = terra::vect(
        sites_sf |> 
          st_transform(4326)
      ), fun = mean
    )  
  }
)

# combine all metrics
climate_change_sites = Reduce(f = left_join, x = climate_change_sites)

# attach to sampling sites
sites <- mutate(sites, ID = seq(nrow(sites)))
clim_change_dat <- left_join(sites, climate_change_sites)

# save site data
write_csv(
  clim_change_dat, file = "results/climate_change.csv"
)
```

## Load species relative abundance
```{r}
relAbun <- read.csv("results/species-relative-abundance-siteLevel.csv")
```

## Load species trait data
```{r}
trait_dat <- read.csv("data/species-trait-dat.csv")
```

## Is there a significant association between change in relative abundance and change in an environmental covariate across time periods?

We will first prepare the dataframes necessary for running linear mixed models.
```{r}
## preparing the relative abundance dataframe for a mixed effects model
names(relAbun) <- c("common_name","historical_site_code","year","relAbundance")

pivot_1850 <- relAbun %>%
  filter(year == 1850) %>%
  pivot_wider(., names_from = "year", values_from = "relAbundance")

pivot_2021 <- relAbun %>%
  filter(year == 2021) %>%
  pivot_wider(., names_from = "year", values_from = "relAbundance")

# calculate difference between modern and historical time periods
# positive value implies gain in relative abundance in 2021 compared to 1850, while negative value implies loss in relative abundance in 2021 compared to 1850

relAbun_wide <- full_join(pivot_1850, pivot_2021, by=c("common_name","historical_site_code")) %>%
  mutate("relAbunChange" = (`2021` - `1850`))

## prepare the land cover data
## calculating mean change at the level of the historical site code

lc_meanChange <- lc_change_dat %>%
  group_by(historical_site_code, name) %>%
  summarise(lc_prop_change = mean(lc_prop_change)) 

## make land cover data wider
lc_glmm <- lc_meanChange %>%
  pivot_wider(id_cols = historical_site_code, 
              names_from = name, 
              values_from = lc_prop_change)

## join all three dataframes together
## prepare climate change data
clim_change_glmm <- clim_change_dat %>%
  dplyr::select(historical_site_code, t_mean_dry_2010, t_mean_rainy_2010, ppt_dry_2010, ppt_rainy_2010) %>%
  group_by(historical_site_code) %>%
  summarise(across(everything(), mean))

## dataframe for glmm
glmm_data <- left_join(relAbun_wide,lc_glmm,
                       by = "historical_site_code") %>%
  left_join(., clim_change_glmm, by = "historical_site_code") %>%
  as.data.frame() %>%
  replace(is.na(.), 0)

## scale the climate data
glmm_data <- glmm_data %>%
   mutate(water_bodies_z = scale_z(water_bodies)) %>%
  mutate(plantations_z = scale_z(plantations)) %>%
   mutate(agriculture_z = scale_z(agriculture)) %>%
   mutate(settlements_z = scale_z(settlements)) %>%
   mutate(shola_forest_z = scale_z(shola_forest)) %>%
   mutate(shola_grassland_z = scale_z(shola_grassland)) %>%
  mutate(t_mean_dry_2010_z = scale_z(t_mean_dry_2010)) %>%
  mutate(t_mean_rainy_2010_z = scale_z(t_mean_rainy_2010)) %>%
  mutate(ppt_dry_2010_z = scale_z(ppt_dry_2010)) %>%
  mutate(ppt_rainy_2010_z = scale_z(ppt_rainy_2010)) 
```

## Test for correlations between covariates
```{r}
library(psych)
for_corr <- glmm_data[,c(16:25)] %>%
  distinct()

pairs.panels(for_corr)

# correlative analyses suggests that t_mean_dry2010 is highly correlated with t_mean_rainy2010 and ppt_mean_rainy2010
# for future analyses, we will only include t_mean_dry2010 and ppt_mean_dry2010
```

## Running generalized linear mixed effect models for specific habitat affiliations
```{r}
glmm_habitat <- left_join(glmm_data, trait_dat, by = "common_name") 

## grassland birds
glmm_grassland <-  glmm_habitat %>%
  filter(Habitat.type == "Grassland")

# linear mixed model
m1 <- glmmTMB(relAbunChange ~ shola_grassland_z + t_mean_dry_2010_z+
                (1|common_name) + (1|historical_site_code) ,
              family=gaussian(),
               data = glmm_grassland)

summary(m1)
plot_model(m1, type = "std")
plot(ggpredict(m1))

# Family: gaussian  ( identity )
# Formula:          
# relAbunChange ~ shola_grassland_z + t_mean_dry_2010_z + (1 |  
#     common_name) + (1 | historical_site_code)
# Data: glmm_grassland
# 
#      AIC      BIC   logLik deviance df.resid 
#  -1353.8  -1334.6    682.9  -1365.8      174 
# 
# Random effects:
# 
# Conditional model:
#  Groups               Name        Variance  Std.Dev.
#  common_name          (Intercept) 2.010e-06 0.001418
#  historical_site_code (Intercept) 2.244e-06 0.001498
#  Residual                         2.666e-05 0.005163
# Number of obs: 180, groups:  common_name, 9; historical_site_code, 20
# 
# Dispersion estimate for gaussian family (sigma^2): 2.67e-05 
# 
# Conditional model:
#                     Estimate Std. Error z value Pr(>|z|)    
# (Intercept)       -0.0052244  0.0006955  -7.512 5.82e-14 ***
# shola_grassland_z  0.0001461  0.0005151   0.284    0.777    
# t_mean_dry_2010_z -0.0009301  0.0005151  -1.806    0.071 .  
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 

# Above, we asked if grassland species are declining relative to other species, and we found that there was no association between changes in land cover and climate on grassland bird species abundance, indicating that both grassland habitat loss and temperature increase have been uniform across locations suggesting a regional-scale process and not local-scale. 
```