-
Notifications
You must be signed in to change notification settings - Fork 1
/
10_species-relative-abundances.Rmd
536 lines (426 loc) · 18.6 KB
/
10_species-relative-abundances.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
---
editor_options:
chunk_output_type: console
---
# Species relative abundances over time
In this script, we calculate species relative abundances over time by replicating a novel approach proposed by Gotelli et al. 2021 (who provides a methodological approach to calculate relative species abundances using historical museum records).
The assumption is that the level of the analysis is at the 'historic-site' and not the 'modern-site' (which essentially corresponds to multiple unique modern survey sites for every 'historic-site').
## Load necessary libraries
```{r}
library(dplyr)
library(stringr)
library(tidyverse)
library(scico)
library(RColorBrewer)
library(extrafont)
library(sf)
library(raster)
library(lattice)
library(data.table)
library(ggrepel)
library(report)
library(lme4)
library(glmmTMB)
library(multcomp)
library(ggstatsplot)
library(paletteer)
library(ggpubr)
library(goeveg)
library(sjPlot)
library(patchwork)
# source custom functions
source("code/02_relative-species-abundance-functions.R")
```
## Load list of resurvey locations
We will load the list of sites across which a modern resurvey was carried out, along with elevation rasters.
```{r}
# load list of resurvey locations and add elevation as a variable
# remove the modern resurvey locations only (ie. ASFQ)
sites <- read.csv("data/list-of-resurvey-locations.csv")
sites <- sites[-c(1:50),]
# convert to sf object and transform
sites <- st_as_sf(sites, coords = c("longitude", "latitude")) %>%
`st_crs<-`(4326) %>%
st_transform(32643)
# add elevation raster
alt <- raster::raster("data/elevation/alt") # this layer is not added to github as a result of its large size and can be downloaded from SRTM (Farr et al. (2007))
# extract values from that raster (note: transformation of coordinate system)
elev <- raster::extract(alt, sites)
sites <- cbind(sites, elev)
```
## Calibrating historical data to modern survey data for relative abundance calculations
To ensure that museum specimens and associated observations are comparable with field associated surveys, since they are both independent sources of information, we calibrate them to understand if they are comparable in the first place.
```{r}
## read in historical occurrence data
hist_occ <- read.csv("data/historical-occurrence-data.csv")
## applying filters:
# include only data until 1950
hist_occ <- hist_occ %>%
filter(year <= 1950)
## read in modern occurrence data
mod_occ <- read.csv("data/modern-occurrence-data.csv") %>%
filter(!is.na(historical_site_code)) # 2301 observations of 103 bird species
# ensure the data is comparable with historical data
mod_occ <- mod_occ %>%
filter(!is.na(species_code)) %>%
filter(common_name %in% hist_occ$common_name) %>%
filter(historical_site_code %in% hist_occ$historical_site_code)
# total species count across historical data
mod_spp_count <- mod_occ %>%
group_by(common_name) %>%
count()
## group by scientific name and modern site code
mod_occ <- mod_occ %>%
filter(!is.na(common_name)) %>%
group_by(common_name, modern_site_code) %>%
summarise("2021" = sum(number))
## left join the sites dataframe
mod_occ <- left_join(mod_occ, sites, by=c("modern_site_code"="modern_site_code")) %>%
filter(!is.na(common_name)) %>%
replace(is.na(.), 0)
# calculate total count of historical data at each site
# here we pool all historical data from 1850-1950
hist_occ_tot <- hist_occ %>%
group_by(common_name, historical_site_code) %>%
count() %>%
summarise("historical" = sum(n))
# left join the sites dataframe
hist_occ_tot <- left_join(hist_occ_tot, sites, by=c("historical_site_code"="historical_site_code")) %>%
filter(!is.na(common_name)) %>%
replace(is.na(.), 0)
# we remove modern site_code and get only distinct rows
unique_hist <- hist_occ_tot[,c(1,2,3)] %>%
group_by(common_name, historical_site_code) %>%
distinct() %>%
group_by(common_name) %>%
summarise("relAbundHist" = sum(historical))
# we remove modern site_code and get only distinct rows
unique_mod <- mod_occ[,c(1,3,5)] %>%
group_by(common_name, historical_site_code) %>%
distinct() %>%
group_by(common_name) %>%
summarise("relAbund2021" = sum(`2021`))
# join historical and modern dataframes
hist_mod <- full_join(unique_hist, unique_mod)%>%
replace_na(list(relAbundHist = 0,
relAbund2021 = 0))
# apply dirichlet distribution to the historical data
# choosing 1850-1900 data first
dat <- tibble::deframe(hist_mod[,1:2])
relAbunHist <- dirch_stats(dat) %>%
dplyr::select(species, bayes_mean) %>%
rename(., bayes_mean_hist = bayes_mean)
# apply dirichlet distribution to the modern survey
dat <- tibble::deframe(hist_mod[,c(1,3)]) ## choosing modern data
relAbun2021 <- dirch_stats(dat) %>%
dplyr::select(species, bayes_mean) %>%
rename(., bayes_mean_2021 = bayes_mean)
# for calibration
### join the relative abundance datasets together
relAbunCalib <- full_join(relAbunHist, relAbun2021) %>%
arrange(species) %>%
rename(., "historical" = "bayes_mean_hist") %>%
rename(., "modern" = "bayes_mean_2021") %>%
rename(., common_name = species)
# regressing 1850s data vs. 2021
fig_hist_v_mod_rel <- ggplot(data=relAbunCalib,aes(x=modern,y=historical)) +
scale_y_log10() +
geom_abline(slope=1, intercept=0,linetype="dashed") +
scale_x_log10() +
labs(y = "Historical Relative abundance",
x = "Modern Relative abundance") +
geom_point(shape = 21, colour = "black", fill = "white", size = 2, stroke = 1)+
geom_smooth(method="lm", se=TRUE, fullrange=FALSE, level=0.95,linetype="solid")+
stat_regline_equation(aes(label = ..rr.label..)) +
theme_bw() +
theme(axis.text = element_text(family = "Century Gothic", size = 13),
legend.title = element_text(family = "Century Gothic"),
legend.text = element_text(family = "Century Gothic"),
text = element_text(family = "Century Gothic", size = 25))
ggsave(fig_hist_v_mod_rel, filename = "figs/fig_historical_vs_modern_relAbun.png", width = 15, height = 10, device = png(), units = "in", dpi = 300)
dev.off()
```
![Historical vs. modern species relative abundance suggests high correlations, implying that they are comparable to one another](figs/fig_historical_vs_modern_relAbun.png)
## Calculating species relative abundance for the historical data (1850-1900 and 1900-1950)
```{r}
## read in historical occurrence data
hist_occ <- read.csv("data/historical-occurrence-data.csv")
## applying filters:
# a: include only data until 1950
hist_occ <- hist_occ %>%
filter(year <= 1950)
# b: a second filter includes only those species that had a minimum count of atleast three specimens and occurred across atleast two unique historical survey locations
# total species count across historical data
spp_count <- hist_occ %>%
group_by(common_name) %>%
count() # total of 179 species
sppMinThree <- spp_count %>%
filter(n >= 3) # total of 92 species
# please note, we include ~four species that were collected from a single historical location Apus melba, Apus melba, Brachypodius priocephalus, Phylloscopus tytleri, Picus chlorolophus
# filter historical data
hist_occ <- hist_occ %>%
filter(common_name %in% sppMinThree$common_name)
### pre-1900
hist_occ_pre1900 <- hist_occ %>%
filter(year <= 1900)
# calculate total count of historical data at each site
hist_occ_pre1900 <- hist_occ_pre1900 %>%
group_by(common_name, historical_site_code) %>%
count() %>%
summarise("1850-1900" = sum(n))
# left join the sites dataframe
hist_occ_pre1900 <- left_join(hist_occ_pre1900, sites, by=c("historical_site_code"="historical_site_code")) %>%
filter(!is.na(common_name)) %>%
replace(is.na(.), 0)
## 1900-1950 data
hist_occ_1900_1950 <- hist_occ %>%
filter(year > 1900 & year <= 1950)
# calculate total count of historical data at each site
hist_occ_1900_1950 <- hist_occ_1900_1950 %>%
group_by(common_name, historical_site_code) %>%
count() %>%
summarise("1900-1950" = sum(n))
# left join the sites dataframe
hist_occ_1900_1950 <- left_join(hist_occ_1900_1950, sites, by=c("historical_site_code"="historical_site_code")) %>%
filter(!is.na(common_name)) %>%
replace(is.na(.), 0)
```
## Load the modern occurrence data
For the modern occurrence data, we only keep species that were recorded historically to avoid artificial increases in relative abundance due to the method of collection.
```{r}
mod_occ <- read.csv("data/modern-occurrence-data.csv") %>%
filter(!is.na(historical_site_code)) # 2301 observations of 103 bird species
# ensure the data is comparable with historical data
mod_occ <- mod_occ %>%
filter(!is.na(species_code)) %>%
filter(common_name %in% hist_occ$common_name) %>%
filter(historical_site_code %in% hist_occ$historical_site_code)
# total species count across historical data
mod_spp_count <- mod_occ %>%
group_by(common_name) %>%
count() # total of 63 spp
## group by scientific name and modern site code
mod_occ <- mod_occ %>%
filter(!is.na(common_name)) %>%
group_by(common_name, modern_site_code) %>%
summarise("2021" = sum(number))
## left join the sites dataframe
mod_occ <- left_join(mod_occ, sites, by=c("modern_site_code"="modern_site_code")) %>%
filter(!is.na(common_name)) %>%
replace(is.na(.), 0)
```
## Calculate dirichlet distributions of relative abundance
Here, we will functions from Gotelli et al. 2021 to calculate relative species abundances between historic and modern periods.
```{r}
# note: the level of the analysis is at the 'historic-site' and not the 'modern-site' (which essentially corresponds to two or three unique sites for every 'historic-site')
# to ensure that the analysis is consistent, we sum abundances across the modern sites corresponding to each historic site
# However, our analysis calculates relative species abundances for the give time period rather than by site + time-period
# we remove modern site_code and get only distinct rows
unique_hist_pre1900 <- hist_occ_pre1900[,c(1,2,3)] %>%
group_by(common_name, historical_site_code) %>%
distinct() %>%
group_by(common_name) %>%
summarise("relAbund1850" = sum(`1850-1900`))
# we remove modern site_code and get only distinct rows
unique_hist_1900_1950 <- hist_occ_1900_1950[,c(1,2,3)] %>%
group_by(common_name, historical_site_code) %>%
distinct() %>%
group_by(common_name) %>%
summarise("relAbund1900" = sum(`1900-1950`))
# we remove modern site_code and get only distinct rows
unique_mod <- mod_occ[,c(1,3,5)] %>%
group_by(common_name, historical_site_code) %>%
distinct() %>%
group_by(common_name) %>%
summarise("relAbund2021" = sum(`2021`))
hist_mod <- full_join(unique_hist_pre1900,
unique_hist_1900_1950) %>%
full_join(., unique_mod) %>%
replace_na(list(relAbund1850 = 0,
relAbund1900 = 0,
relAbund2021 = 0))
# apply dirichlet distribution to the historical data
# choosing 1850-1900 data first
dat <- tibble::deframe(hist_mod[,1:2])
relAbun1850 <- dirch_stats(dat) %>%
dplyr::select(species, bayes_mean) %>%
rename(., bayes_mean_1850 = bayes_mean)
# choosing 1900-1950 data
dat <- tibble::deframe(hist_mod[,c(1,3)])
relAbun1900 <- dirch_stats(dat) %>%
dplyr::select(species, bayes_mean) %>%
rename(., bayes_mean_1900 = bayes_mean)
# apply dirichlet distribution to the modern survey
dat <- tibble::deframe(hist_mod[,c(1,4)]) ## choosing modern data
relAbun2021 <- dirch_stats(dat) %>%
dplyr::select(species, bayes_mean) %>%
rename(., bayes_mean_2021 = bayes_mean)
### join the relative abundance datasets together
relAbun <- full_join(relAbun1850, relAbun1900) %>%
full_join(., relAbun2021) %>%
arrange(species) %>%
rename(., `1850-1900` = "bayes_mean_1850") %>%
rename(., `1900-1950` = "bayes_mean_1900") %>%
rename(., `2021` = "bayes_mean_2021") %>%
rename(., common_name = species)
```
## Boxplot visualization of overall relative abundance across time periods
```{r}
data_for_boxplot <- relAbun %>%
dplyr::select(-common_name) %>%
pivot_longer(everything())
fig_relAbun_over_time <- ggbetweenstats(
data = data_for_boxplot,
x = name,
y = value,
xlab = "Time Period",
ylab = "Relative Abundance",
title = "Relative abundance by time period",
violin.args = list(width = 0),
pairwise.comparisons = T) +
scale_y_log10() +
scale_color_manual(values = c("#9EB0FFFF", "#122C39FF","#D5857DFF")) +
theme(plot.title = element_text(family = "Century Gothic",
size = 18, face = "bold"),
axis.title = element_text(family = "Century Gothic",
size = 16, face = "bold"),
axis.text = element_text(family="Century Gothic",
size = 14),
plot.subtitle = element_text(
family = "Century Gothic",
size = 14,
face = "bold",
color="#1b2838"
))
ggsave(fig_relAbun_over_time, filename = "figs/fig_relAbun_landscapeLevel.png", width = 15, height = 10, device = png(), units = "in", dpi = 300)
dev.off()
```
![Species relative abundance over time (pooled at the landscape level)](figs/fig_relAbun_landscapeLevel.png)
## Species relative abundances at the site-level
We now recalculate species relative abundances at the site-level, which will be used for downstream comparisons with change in land cover and climate at a given site.
```{r}
# 1850-1900 data
# we remove modern site_code and get only distinct rows
hist_pre1900 <- hist_occ_pre1900[,c(1:3)] %>%
distinct(.)
# 1900-1950 data
# we remove modern site_code and get only distinct rows
hist_1900_1950 <- hist_occ_1900_1950[,c(1:3)] %>%
distinct(.)
## modern resurvey
## analysis is being done at the level of the historical site code
mod_site_level <- mod_occ[,c(1,3,5)] %>%
group_by(common_name, historical_site_code) %>%
mutate("2021" = sum(`2021`)) %>%
distinct(.)
## join data frames together
site_level_relAbun <- full_join(hist_pre1900,
hist_1900_1950) %>%
full_join(., mod_site_level) %>%
replace_na(list(`1850-1900` = 0,
`1900-1950` = 0,
`2021` = 0)) %>%
ungroup() %>%
complete(common_name, historical_site_code,
fill = list(`1850-1900` = 0,
`1900-1950` = 0, `2021` = 0))
## site-level relative abundance for 1850-1900 data
relSpAbund_1850_1900 <- c()
for(i in 1:length(unique(site_level_relAbun$historical_site_code))) {
# for every unique historical site
a <- site_level_relAbun %>%
filter(historical_site_code == unique(site_level_relAbun$historical_site_code)[i])
# subset historical data
b <- tibble::deframe(a[,c(1,3)]) ## choosing 1850-1900 data
loc <- unique(site_level_relAbun$historical_site_code)[i] # what's the site_code?
## historical relative abundance
relSpAbund_1850_1900[[i]] <- dirch_stats(b) %>%
dplyr::select(species, bayes_mean)
old_varname <- c('species','bayes_mean')
new_varname <- c('common_name', paste(loc,"_","1850", sep = ""))
relSpAbund_1850_1900[[i]] <- relSpAbund_1850_1900[[i]] %>%
data.table::setnames(old = old_varname, new = new_varname)
names(relSpAbund_1850_1900)[i] <- loc
}
# save object but full_join across lists
relSpAbund_1850_1900 <- relSpAbund_1850_1900 %>%
reduce(.f=full_join)
## site-level relative abundance for 1900-1950 data
relSpAbund_1900_1950 <- c()
for(i in 1:length(unique(site_level_relAbun$historical_site_code))) {
# for every unique historical site
a <- site_level_relAbun %>%
filter(historical_site_code == unique(site_level_relAbun$historical_site_code)[i])
# subset historical data
b <- tibble::deframe(a[,c(1,4)]) ## choosing 1900-1950 data
loc <- unique(site_level_relAbun$historical_site_code)[i] # what's the site_code?
## historical relative abundance
relSpAbund_1900_1950[[i]] <- dirch_stats(b) %>%
dplyr::select(species, bayes_mean)
old_varname <- c('species','bayes_mean')
new_varname <- c('common_name', paste(loc,"_","1900", sep = ""))
relSpAbund_1900_1950[[i]] <- relSpAbund_1900_1950[[i]] %>%
data.table::setnames(old = old_varname, new = new_varname)
names(relSpAbund_1900_1950)[i] <- loc
}
# save object but full_join across lists
relSpAbund_1900_1950 <- relSpAbund_1900_1950 %>%
reduce(.f=full_join)
# modern resurvey relative abundance - site level
relSpAbund_2021 <- c()
for(i in 1:length(unique(site_level_relAbun$historical_site_code))) {
a <- site_level_relAbun %>%
filter(historical_site_code == unique(site_level_relAbun$historical_site_code)[i])
b <- tibble::deframe(a[,c(1,5)]) ## choosing modern data
loc <- unique(site_level_relAbun$historical_site_code)[i] # what's the site_code?
relSpAbund_2021[[i]] <- dirch_stats(b) %>%
dplyr::select(species, bayes_mean)
old_varname <- c('species','bayes_mean')
new_varname <- c('common_name', paste(loc,"_","2021", sep = ""))
relSpAbund_2021[[i]] <- relSpAbund_2021[[i]] %>%
data.table::setnames(old = old_varname, new = new_varname)
names(relSpAbund_2021)[i] <- loc
}
# save object but full_join across lists
relSpAbund_2021 <- relSpAbund_2021 %>%
reduce(.f=full_join)
## join all the datasets together
relSpAbund_by_site <- full_join(relSpAbund_1850_1900,
relSpAbund_1900_1950) %>%
full_join(., relSpAbund_2021) %>%
arrange(common_name)
```
## Visualizing differences in relative abundance over time at the site level
```{r}
## preparing dataframe for visualization
relSpAbund <- relSpAbund_by_site %>%
pivot_longer(!common_name, names_to = "site_year", values_to = "relAbundance") %>%
separate(site_year, into = c("site", "year"), sep = "_")
## Please note: for the sake of downstream analysis, we have renamed 1850-1900 as 1850. In other words, each year - except for 2021 represents a 50 year time period.
fig_relAbun_siteLevel <- relSpAbund %>%
dplyr::select(-common_name) %>%
ggplot(aes(x=year, y = relAbundance,
fill = year)) +
geom_boxplot() +
scale_y_log10() +
facet_wrap(~site)+
xlab("Time period") +
ylab("Relative abundance") +
scale_fill_scico_d(palette = "roma") +
theme_bw() +
theme(axis.title = element_text(family = "Century Gothic",
size = 14, face = "bold"),
axis.text = element_text(family="Century Gothic",size = 14),
legend.title = element_text(family="Century Gothic",
size = 14, face = "bold"),
legend.key.size = unit(1,"cm"),
legend.text = element_text(family="Century Gothic",size = 14))
ggsave(fig_relAbun_siteLevel, filename = "figs/fig_relAbun_siteLevel.png", width = 15, height = 10, device = png(), units = "in", dpi = 300)
dev.off()
```
![Species relative abundances across species, when estimated at the level of the site](figs/fig_relAbun_siteLevel.png)
## Writing results to file
```{r}
write.csv(relAbun, "results/species-relative-abundance.csv", row.names = F)
write.csv(relSpAbund, "results/species-relative-abundance-siteLevel.csv", row.names = F)
```