-
Notifications
You must be signed in to change notification settings - Fork 1
/
08_pgls-analysis.Rmd
490 lines (402 loc) · 20.3 KB
/
08_pgls-analysis.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
---
editor_options:
chunk_output_type: console
---
# Phylogenetic Generalized Least Squares (PGLS) regressions
In this script, we examine which predictor/hypothesis (light availability, peak frequency, foraging guild, territoriality, and sociality) best explains the variation in vocal activity between dawn and dusk. We use linear regresssions while controlling for potential phylogenetic signals/relationships.
## Loading necessary libraries
```{r}
library(ape)
library(nlme)
library(geiger)
library(phytools)
library(phangorn)
library(dplyr)
library(tidyverse)
library(tidyr)
library(ggplot2)
library(stringr)
library(vegan)
library(scico)
library(data.table)
library(extrafont)
library(suncalc)
library(lutz)
library(ggpmisc)
library(ggpubr)
library(hms)
library(phyr)
library(sjPlot)
library(emmeans)
```
## Loading necessary data
```{r}
acoustic_data <- read.csv("results/acoustic_data.csv")
species_codes <- read.csv("data/species-annotation-codes.csv")
trait <- read.csv("data/species-trait-dat.csv")
territoriality <- read.csv("data/territoriality-data.csv")
sociality <- read.csv("data/sociality-data.csv")
freq <- read.csv("data/frequency-data.csv")
sites <- read.csv("data/list-of-sites.csv")
tree <- read.nexus("data/birdtree.nex") # obtained 100 trees using Hackett all species
```
## Getting the maximum credibility tree from the nexus file
```{r}
tree <- mcc(tree)
```
## Filtering acoustic data to ensure sampling periods are even across dawn and dusk
Since our recording schedule was uneven (6am to 10am in the morning and 4pm to 7pm in the evening), we filter acoustic data to retain recordings between 6am and 830am and recordings made between 4pm and 630pm so that the two sampling windows capture a similar amount of time right after dawn and right before dusk.
```{r}
dawn <- acoustic_data %>%
group_by(time_of_day == "dawn") %>%
filter(start_time >= 060000 & start_time <= 083000)
dusk <- acoustic_data %>%
group_by(time_of_day == "dusk") %>%
filter(start_time >= 160000 & start_time <= 183000)
acoustic_data <- bind_rows(dawn[, -10], dusk[, -10])
```
## Vocal Activity
We use normalized percent detections (percent detections which account for sampling effort) as a response variable in our PGLS analysis.
```{r}
# sampling effort by time_of_day
effort <- acoustic_data %>%
dplyr::select(site_id, date, time_of_day) %>%
distinct() %>%
arrange(time_of_day) %>%
count(time_of_day) %>%
rename(., nVisits = n)
# Above, we note that we had sampled ~145 site-date combinations at dawn, while ~230 site-date combinations were sampled at dusk
# total number of acoustic detections summarized across every 10-s audio file
# here, we estimate % detections at dawn and dusk, while accounting for sampling effort
vocal_act <- acoustic_data %>%
group_by(time_of_day, eBird_codes) %>%
summarise(detections = sum(number)) %>%
left_join(., species_codes[, c(1, 2, 5)],
by = "eBird_codes"
) %>%
group_by(eBird_codes) %>%
mutate(total_detections = sum(detections)) %>%
mutate(percent_detections = (detections / total_detections) * 100) %>%
ungroup()
## accouting for sampling effort and normalizing data
vocal_act <- vocal_act %>%
left_join(., effort, by = "time_of_day") %>%
mutate(normalized_detections = detections / nVisits) %>%
group_by(eBird_codes) %>%
mutate(total_normalized_detections = sum(normalized_detections)) %>%
mutate(percent_normalized_detections = (normalized_detections / total_normalized_detections) * 100) %>%
ungroup()
```
## Light availability
Here, we prepare a predictor for light availability or time to darkness for the PGLS analysis
```{r}
# add longitude and latitude to acoustic_data
acoustic_data <- left_join(acoustic_data, sites[, c(2, 4, 5)],
by = "site_id"
)
acoustic_data$date <- lubridate::ymd(acoustic_data$date)
names(acoustic_data)[c(10, 11)] <- c("lon", "lat")
# find out what time zone needs to be provided for the sunlight calculations
acoustic_data$tz <- tz_lookup_coords(
lat = acoustic_data$lat,
lon = acoustic_data$lon,
method = "accurate",
warn = FALSE
)
# extract nauticalDawn, nauticalDusk, sunrise and sunset times
light_data <- getSunlightTimes(
data = acoustic_data,
keep = c(
"sunrise", "sunset",
"nauticalDawn", "nauticalDusk"
),
tz = "Asia/Kolkata"
) %>% distinct(.)
# strip dates from new columms and keep only time
light_data$sunrise <- as_hms(light_data$sunrise)
light_data$sunset <- as_hms(light_data$sunset)
light_data$nauticalDawn <- as_hms(light_data$nauticalDawn)
light_data$nauticalDusk <- as_hms(light_data$nauticalDusk)
# summarize detections of species for every 15-min window
acoustic_data <- acoustic_data %>%
group_by(
site_id, date, start_time, time_of_day, eBird_codes,
lon, lat, hour_of_day
) %>%
summarise(detections = sum(number)) %>%
ungroup()
# join the two datasets
acoustic_data <- left_join(acoustic_data, light_data,
by = c("date", "lon", "lat")
)
# format the start_time column in the acoustic data to keep it as the same format as light_data
acoustic_data <- acoustic_data %>%
mutate(across(start_time, str_pad, width = 6, pad = "0"))
acoustic_data$start_time <- format(strptime(acoustic_data$start_time,
format = "%H%M%S"
), format = "%H:%M:%S")
acoustic_data$start_time <- as_hms(acoustic_data$start_time)
# add end_times for each recording
acoustic_data <- acoustic_data %>%
mutate(end_time = start_time + dminutes(20))
acoustic_data$end_time <- as_hms(acoustic_data$end_time)
# subtract times from sunrise, sunset, nauticalDawn and nauticalDusk from start_time of acoustic detections
acoustic_data <- acoustic_data %>%
mutate(time_from_dawn = as.numeric((start_time - nauticalDawn),
units = "hours"
)) %>%
mutate(time_from_sunrise = as.numeric((start_time - sunrise),
units = "hours"
)) %>%
mutate(time_to_dusk = as.numeric((nauticalDusk - end_time),
units = "hours"
)) %>%
mutate(time_to_sunset = as.numeric((sunset - end_time),
units = "hours"
))
# add species scientific name to this data
acoustic_data <- acoustic_data %>%
left_join(., species_codes[, c(1, 5)],
by = "eBird_codes"
)
# binding dawn and dusk times together to get at median time/light availability at dawn
dawn <- acoustic_data %>%
group_by(eBird_codes) %>%
filter(time_of_day == "dawn") %>%
dplyr::select(time_from_dawn, time_of_day) %>%
rename(., time_from_startTime = time_from_dawn) %>%
summarise(median_startTime = median(time_from_startTime)) %>%
mutate(time_of_day = "dawn")
dusk <- acoustic_data %>%
group_by(eBird_codes) %>%
filter(time_of_day == "dusk") %>%
dplyr::select(time_to_dusk, time_of_day) %>%
rename(., time_from_startTime = time_to_dusk) %>%
summarise(median_startTime = median(time_from_startTime)) %>%
mutate(time_of_day = "dusk")
light <- bind_rows(dawn, dusk) # this dataframe gives us the median time from dawn and time to dusk for each species
vocal_act <- vocal_act %>%
left_join(light, by = c("eBird_codes", "time_of_day")) # adding median time from dawn and time to dusk to vocal activity data
```
## Territoriality
We prepare territoriality as a predictor for the PGLS analysis
```{r}
territoriality <- territoriality %>% dplyr::select(Species, Territory)
colnames(territoriality) <- c("scientific_name", "territory")
# Updating the scientific names of following species as per our dataset:
# Flame-throated bulbul- Rubigula gularis (earlier Pycnonotus gularis)
# Black eagle- Ictinaetus malaiensis (earlier Ictinaetus malayensis)
# Brown-capped pygmy woodpecker- Yungipicus nanus (earlier Dendrocopos nanus)
# Malabar barbet- Psilopogon malabaricus (earlier Megalaima malabarica)
# Dark-fronted babbler- Dumetia atriceps (earlier Rhopocichla atriceps)
# Greater flameback- Chrysocolaptes guttacristatus (earlier Chrysocolaptes lucidus)
# Indian blue robin- Larvivora brunnea (earlier Luscinia brunnea)
# Indian yellow tit- Machlolophus aplonotus (earlier Parus aplonotus)
# Jungle babbler- Argya striata (earlier Turdoides striata)
# Orange-headed thrush- Geokichla citrina (earlier Zoothera citrina)
# Rufous babbler- Argya subrufa (earlier Turdoides subrufa)
# Rufous woodpecker- Micropternus brachyurus (earlier Celeus brachyurus)
# Rusty-tailed flycatcher- Ficedula ruficauda (earlier Muscicapa ruficauda
# Spot-bellied eagle owl- Ketupa nipalensis (earlier Bubo nipalensis)
# Spotted dove- Spilopelia chinensis (earlier Stigmatopelia chinensis)
# Thick-billed warbler- Arundinax aedon (earlier Acrocephalus aedon)
# White-bellied flycatcher- Cyornis pallidipes (earlier Cyornis pallipes)
# White-cheeked barbet- Psilopogon viridis (earlier Megalaima viridis)
# Wayanad laughingthrush- Pterorhinus delesserti (earlier Garrulax delesserti)
# Yellow-browed bulbul- Iole indica (earlier Acritillas indica)
territoriality <- territoriality %>% mutate(scientific_name = recode(scientific_name, "Pycnonotus gularis" = "Rubigula gularis", "Ictinaetus malayensis" = "Ictinaetus malaiensis", "Dendrocopos nanus" = "Yungipicus nanus", "Megalaima malabarica" = "Psilopogon malabaricus", "Rhopocichla atriceps" = "Dumetia atriceps", "Chrysocolaptes lucidus" = "Chrysocolaptes guttacristatus", "Luscinia brunnea" = "Larvivora brunnea", "Parus aplonotus" = "Machlolophus aplonotus", "Turdoides striata" = "Argya striata", "Zoothera citrina" = "Geokichla citrina", "Turdoides subrufa" = "Argya subrufa", "Celeus brachyurus" = "Micropternus brachyurus", "Muscicapa ruficauda" = "Ficedula ruficauda", "Bubo nipalensis" = "Ketupa nipalensis", "Stigmatopelia chinensis" = "Spilopelia chinensis", "Acrocephalus aedon" = "Arundinax aedon", "Cyornis pallipes" = "Cyornis pallidipes", "Megalaima viridis" = "Psilopogon viridis", "Garrulax delesserti" = "Pterorhinus delesserti", "Acritillas indica" = "Iole indica"))
vocal_act <- vocal_act %>%
left_join(territoriality, by = "scientific_name")
vocal_act <- vocal_act %>% mutate(territory = case_when(territory %in% "1" ~ "Non-territorial", territory %in% "2" ~ "Weakly territorial", territory %in% "3" ~ "Highly territorial"))
```
## Sociality
We prepare sociality as a predictor for the PGLS analysis
```{r}
sociality <- sociality %>% dplyr::select(Species, Communal)
colnames(sociality) <- c("scientific_name", "sociality")
# Updating the scientific names of following species as per our dataset: see list in the code chunk for Territoriality
sociality <- sociality %>% mutate(scientific_name = recode(scientific_name, "Pycnonotus gularis" = "Rubigula gularis", "Ictinaetus malayensis" = "Ictinaetus malaiensis", "Dendrocopos nanus" = "Yungipicus nanus", "Megalaima malabarica" = "Psilopogon malabaricus", "Rhopocichla atriceps" = "Dumetia atriceps", "Chrysocolaptes lucidus" = "Chrysocolaptes guttacristatus", "Luscinia brunnea" = "Larvivora brunnea", "Parus aplonotus" = "Machlolophus aplonotus", "Turdoides striata" = "Argya striata", "Zoothera citrina" = "Geokichla citrina", "Turdoides subrufa" = "Argya subrufa", "Celeus brachyurus" = "Micropternus brachyurus", "Muscicapa ruficauda" = "Ficedula ruficauda", "Bubo nipalensis" = "Ketupa nipalensis", "Stigmatopelia chinensis" = "Spilopelia chinensis", "Acrocephalus aedon" = "Arundinax aedon", "Cyornis pallipes" = "Cyornis pallidipes", "Megalaima viridis" = "Psilopogon viridis", "Garrulax delesserti" = "Pterorhinus delesserti", "Acritillas indica" = "Iole indica"))
vocal_act <- vocal_act %>%
left_join(sociality, by = "scientific_name")
vocal_act <- vocal_act %>% mutate(sociality = case_when(sociality %in% "0" ~ "Non-communal signallers", sociality %in% "1" ~ "Communal signallers"))
```
## Peak frequency
We prepare peak frequency as a predictor for the PGLS analysis
```{r}
# add standardized eBird codes to the frequency data
freq <- left_join(freq, species_codes[c(3, 5)],
by = "species_annotation_codes"
)
# Only a total of 99 species are left after filtering species with very few templates
nTemplates_5 <- freq %>%
group_by(eBird_codes) %>%
count() %>%
filter(n >= 5) %>%
drop_na()
# left-join to remove species with less than 10 templates in the frequency dataset
freq_5 <- left_join(nTemplates_5[, 1], freq)
# calculate median peak frequency after grouping by time of day and species
median_pf <- separate(freq_5, col = filename, into = c("site_id", "date", "time", "splits"), sep = "_") %>%
mutate(
time_of_day =
case_when(
time >= "060000" & time <= "100000" ~ "dawn",
time >= "160000" & time <= "190000" ~ "dusk"
)
) %>%
group_by(eBird_codes, time_of_day) %>%
summarise(median_peak_freq = median(peak_freq_in_Hz))
## join the frequency data to the vocal activity data
vocal_act <- vocal_act %>%
left_join(median_pf, by = c("eBird_codes", "time_of_day")) %>%
drop_na()
# A total of 66 species were included and three species were excluded from final analysis.
```
## Foraging guild
We prepare foraging guild categories for the PGLS analysis
```{r}
vocal_act <- vocal_act %>%
left_join(trait[, c(1, 2, 29)], by = c(
"scientific_name",
"common_name"
))
## remove species that are poorly represented by a particular trophic niche
## we get rid of nectarivore species, aquatic predator and granivore
vocal_act <- vocal_act %>%
filter(trophic_niche != "Aquatic predator") %>%
filter(trophic_niche != "Granivore") %>%
filter(trophic_niche != "Nectarivore") %>%
filter(trophic_niche != "Vertivore")
## Only 62 species remain at this stage.
```
## Data cleaning
We clean the objects prior to running PGLS regressions
```{r}
# converting the scientific names of vocal_act as per tip.labels of the tree
vocal_act <- vocal_act %>%
mutate(across(scientific_name, str_replace, " ", "_"))
# converting the scientific names of vocal_act as per the birdtree data so that both are matching
vocal_act <- vocal_act %>%
mutate(
scientific_name =
case_when(scientific_name == "Dumetia_atriceps" ~ "Rhopocichla_atriceps", scientific_name == "Chrysocolaptes_guttacristatus" ~ "Chrysocolaptes_lucidus",
scientific_name == "Turdus_simillimus" ~ "Turdus_merula",
scientific_name == "Larvivora_brunnea" ~ "Luscinia_brunnea",
scientific_name == "Machlolophus_aplonotus" ~ "Parus_xanthogenys",
scientific_name == "Psilopogon_malabaricus" ~ "Megalaima_rubricapillus",
scientific_name == "Tephrodornis_sylvicola" ~ "Tephrodornis_gularis",
scientific_name == "Geokichla_citrina" ~ "Zoothera_citrina",
scientific_name == "Cinnyris_asiaticus" ~ "Nectarinia_asiatica",
scientific_name == "Leptocoma_minima" ~ "Nectarinia_minima",
scientific_name == "Spilopelia_chinensis" ~ "Stigmatopelia_chinensis",
scientific_name == "Argya_subrufa" ~ "Turdoides_subrufa",
scientific_name == "Ficedula_nigrorufa" ~ "Muscicapa_ruficauda",
scientific_name == "Gracula_indica" ~ "Gracula_religiosa",
scientific_name == "Ketupa_nipalensis" ~ "Bubo_nipalensis",
scientific_name == "Hypsipetes_ganeesa" ~ "Hypsipetes_leucocephalus",
scientific_name == "Cyornis_pallidipes" ~ "Cyornis_pallipes",
scientific_name == "Psilopogon_viridis" ~ "Megalaima_viridis",
.default = scientific_name
)
) # final dataframe with variables
# check same species in vocal_act and phylogenetic data, and pruning species from the tree which are not present in the vocal_act data
diff <- setdiff(tree$tip.label, vocal_act$scientific_name)
pruned.tree <- drop.tip(tree, diff) # final phylogenetic dataframe
setdiff(vocal_act$scientific_name, pruned.tree$tip.label) # checking if vocal_act and pruned.tree have the same species: result- 'character(0)'
## testing for correlations
library(psych)
for_corr <- vocal_act %>%
filter(time_of_day == "dawn") %>%
mutate(median_startTime_z = scale(median_startTime)) %>%
mutate(median_peak_freq_z = scale(median_peak_freq))
pairs.panels(for_corr[, c(13:16)])
# Territoriality and sociality are correlated to one another (r = 0.66), so we remove sociality from the multivariate model (see below).
```
## PGLS analysis
Here we test if the higher rates of acoustic detections at dawn can be explained by environmental and/or social factors
```{r}
## Using normalized detections for dawn only
# Univariate models:
# percent_normalized_detections ~ median_startTime only
model1 <- vocal_act %>%
filter(time_of_day == "dawn") %>%
gls(percent_normalized_detections ~ median_startTime,
data = .,
correlation = corBrownian(1, pruned.tree, form = ~scientific_name)
)
summary(model1) # AIC = 497.72
# percent_normalized_detections ~ territoriality only
model2 <- vocal_act %>%
filter(time_of_day == "dawn") %>%
gls(percent_normalized_detections ~ relevel(factor(territory), ref = "Non-territorial"), data = ., correlation = corBrownian(1, pruned.tree, form = ~scientific_name))
summary(model2) # AIC = 485.27
# percent_normalized_detections ~ median_peak_freq only
model3 <- vocal_act %>%
filter(time_of_day == "dawn") %>%
gls(percent_normalized_detections ~ scale(median_peak_freq), data = ., correlation = corBrownian(1, pruned.tree, form = ~scientific_name))
summary(model3) # AIC = 497.78
# percent_normalized_detections ~ trophic_niche only
model4 <- vocal_act %>%
filter(time_of_day == "dawn") %>%
gls(percent_normalized_detections ~ relevel(factor(trophic_niche), ref = "Frugivore"),
data = ., correlation = corBrownian(1, pruned.tree, form = ~scientific_name)
)
summary(model4) # AIC = 488.58
# Multi-variate model with all predictor variables:
data <- vocal_act %>%
filter(time_of_day == "dawn")
# adding a reference category for categorical predictors
data$territory <- relevel(factor(data$territory), ref = "Non-territorial")
data$sociality <-relevel(factor(data$sociality), ref = "Non-communal signallers")
data$trophic_niche <- relevel(factor(data$trophic_niche), ref = "Frugivore")
# scaling and centering continuous predictors
data$scaled_median_startTime <- scale(data$median_startTime)
data$scaled_median_peak_freq <- scale(data$median_peak_freq)
model5 <- gls(
percent_normalized_detections ~ scaled_median_startTime +
territory +
scaled_median_peak_freq +
trophic_niche,
data = data,
correlation = corBrownian(1, pruned.tree, form = ~scientific_name)) #with sociality removed
summary(model5) # AIC = 469.81
anova(model5)
# calculation of R2
rr2::R2_lik(model5) #R2 measures how well the model predicts the outcome of the observed data
# calculating marginal means
terrEff <- emmeans(model5, ~"territory", data = data)
pairs(terrEff)
eff_size(terrEff, sigma = sigma(model5), edf = 39)
plot(terrEff, comparisons = TRUE)
pwpp(terrEff)
nicheEff <- emmeans(model5, ~"trophic_niche", data = data)
pairs(nicheEff)
eff_size(nicheEff, sigma = sigma(model5), edf = 39)
plot(nicheEff, comparisons = TRUE)
pwpp(nicheEff)
## visualization
pgls_plot <- plot_model(model5, show.values = TRUE, value.offset = 0.3, show.p = TRUE, p.threshold = c(0.07, 0.01, 0.001), order.terms = c(6, 5, 4, 3, 2, 1), axis.labels = c("Ambient Light (Median time to darkness)", "Territoriality (Highly territorial)", "Territoriality (Weakly territorial)", "Median Peak frequency", "Foraging guild (Invertivore)", "Foraging guild (Omnivore)"), title = "", dot.size = 3, line.size = 1, value.size = 6, type = "std") +
theme_bw() +
theme(axis.title = element_text(size = 20, family = "Century Gothic", face = "bold"), axis.text = element_text(size = 16, family = "Century Gothic", colour = "black")) +
scale_color_manual(values = c("#d95f02", "#1b9e77"))
## write the standardized coefficient plots to file
write.csv(data.frame(pgls_plot[["data"]]), "results/pgls-standardized-estimates.csv")
ggsave(pgls_plot, filename = "figs/fig_pgls.svg", width = 15, height = 9, device = svg(), units = "in", dpi = 300)
dev.off()
## Territoriality (Highly territorial compared to Non-territorial) show a significant positive association with percent_normalized_detections. Trophic guild (Omnivores compared to Frugivores) show a marginally significant positive association with percent_normalized_detections.
## Here we will run a model using dusk data anyway
model5 <- vocal_act %>%
filter(time_of_day == "dusk") %>%
gls(
percent_normalized_detections ~ scale(median_startTime) +
relevel(factor(territory), ref = "Non-territorial") +
relevel(factor(sociality), ref = "Non-communal signallers") +
scale(median_peak_freq) +
relevel(factor(trophic_niche), ref = "Frugivore"),
data = .,
correlation = corBrownian(1, pruned.tree, form = ~scientific_name)
)
summary(model6)
plot_model(model6, type = "std")
## No significant association was detected between dusk data and any environmental/social factors (when controlled for phylogeny).
```