-
Notifications
You must be signed in to change notification settings - Fork 11
/
21-AAV_DLVE.Rmd
256 lines (210 loc) · 8.14 KB
/
21-AAV_DLVE.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
---
title: "Compare differentially expressed LVs across AAV datasets"
output:
html_notebook:
toc: true
toc_float: true
---
**J. Taroni 2018**
Here, we'll compare what latent variables are differentially expressed in each
of the ANCA-associated vasculitis data sets.
An advantage of the multiPLIER approach is that we don't have to compare across
multiple models.
We'll use a cutoff of `FDR < 0.05`.
Recall that we used "BH" correction for multiple testing in each case.
At first, we'll take the naive approach of just looking at the overlapping
sets of significant LVs.
There's no guarantee that the directionality will be in agreement this way.
## Functions and directory set up
```{r}
# magrittr pipe
`%>%` <- dplyr::`%>%`
```
```{r}
# plot and result directory setup for this notebook
plot.dir <- file.path("plots", "21")
dir.create(plot.dir, recursive = TRUE, showWarnings = FALSE)
results.dir <- file.path("results", "21")
dir.create(results.dir, recursive = TRUE, showWarnings = FALSE)
```
## Find overlap
### Read in data
```{r}
# files with all the limma results
nares.file <- file.path("results", "18",
"NARES_recount2_model_LV_limma_results.tsv")
blood.file <- file.path("results", "19",
"GPA_blood_recount2_model_LV_limma_results.tsv")
kidney.file <- file.path("results", "20",
"ERCB_glom_recount2_model_LV_limma_results.tsv")
```
```{r}
nares.df <- readr::read_tsv(nares.file)
blood.df <- readr::read_tsv(blood.file)
kidney.df <- readr::read_tsv(kidney.file)
```
### Identify significant LVs
```{r}
# initialize list to hold significant pathways for each of the datasets -- this
# is what VennDiagram functions generally take as an argument
sig.list <- list()
sig.list[["nares"]] <- nares.df$LV[which(nares.df$adj.P.Val < 0.05)]
sig.list[["blood"]] <- blood.df$LV[which(blood.df$adj.P.Val < 0.05)]
sig.list[["kidney"]] <- kidney.df$LV[which(kidney.df$adj.P.Val < 0.05)]
```
### Overlap
```{r}
overlap <- VennDiagram::calculate.overlap(sig.list)
overlap.file <- file.path(results.dir, "AAV_FDR_0.05_overlap.RDS")
saveRDS(overlap, file = overlap.file)
```
### Venn Diagram
```{r}
vd.file <- file.path(plot.dir, "AAV_FDR_0.05_overlap.png")
VennDiagram::venn.diagram(x = sig.list, filename = vd.file,
imagetype = "png",
resolution = 600,
category.names = c("NARES", "GPA blood",
"ERCB glomeruli"))
```
### What are the 22 LVs that are differentially expressed in all datasets/tissues?
```{r}
# which element of the overlap list are we looking for?
lapply(overlap, length)
```
```{r}
overlap$a5
```
```{r}
# remove files & data.frame no longer needed once we have the overlap
rm(list = setdiff(ls(), c("%>%", "overlap", "plot.dir", "results.list")))
```
## Plotting overlapping LVs
```{r}
lvs.to.plot <- overlap$a5
```
### B data.frames
We'll use these, from the individual dataset notebooks, for plotting.
```{r}
nares.file <- file.path("results", "18",
"NARES_recount2_model_B_long_sample_info.tsv")
nares.df <- readr::read_tsv(nares.file)
gpa.file <- file.path("results", "19",
"GPA_blood_recount2_model_B_long_sample_info.tsv")
gpa.df <- readr::read_tsv(gpa.file)
kidney.file <- file.path("results", "20",
"ERCB_glom_recount2_model_B_long_sample_info.tsv")
kidney.df <- readr::read_tsv(kidney.file)
```
#### Recoding and reordering
We'll want the plots to be in the same general order and for the x-axis labels
to display well.
```{r}
# Nasal brushing data
nares.df <- nares.df %>%
dplyr::mutate(Classification =
dplyr::recode(Classification,
`GPA with active nasal disease` = "GPA active",
`GPA with prior nasal disease` = "GPA prior",
`GPA (no prior nasal disease)` = "GPA none"
)) %>%
dplyr::mutate(Classification =
factor(Classification,
levels = c("GPA active",
"GPA prior",
"GPA none",
"EGPA",
"Allergic Rhinitis",
"Sarcoidosis",
"Healthy")))
```
```{r}
# GPA PBMC data
gpa.df <- gpa.df %>%
dplyr::mutate(GPA_signature = dplyr::recode(GPA_signature,
GPApos = "GPA-positive",
GPAneg = "GPA-negative",
Control = "Control")) %>%
dplyr::mutate(GPA_signature = factor(GPA_signature,
levels = c("GPA-positive",
"GPA-negative",
"Control")))
```
```{r}
kidney.df <- kidney.df %>%
dplyr::mutate(Diagnosis = factor(Diagnosis,
levels = c("Vasculitis",
"Nephrotic syndrome",
"Living donor control")))
```
### Plotting functions
```{r}
mean_ci <- function(x) ggplot2::mean_se(x, mult = 2)
# custom ggplot2 theme
custom_theme <- function() {
ggplot2::theme_bw() +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45,
hjust = 1),
legend.position = "none",
plot.title = ggplot2::element_text(hjust = 0.5,
face = "bold"),
axis.title.x = ggplot2::element_blank())
}
```
```{r}
# wrapper function for plotting the three datasets -- this is only intended to
# be used in this environment
MultiPlotWrapper <- function(lv.name) {
# lv.name: string to be passed to dplyr::filter -- one LV plotted at a time
# returns a 3 panel plot with strip charts from the three tissues, the output
# of cowplot::plot_grid
#### NARES ####
nares.p <- nares.df %>%
dplyr::filter(LV == lv.name) %>%
ggplot2::ggplot(ggplot2::aes(x = Classification, y = Value)) +
ggplot2::geom_jitter(width = 0.2, alpha = 0.6,
ggplot2::aes(colour = Classification)) +
ggplot2::stat_summary(geom = "pointrange",
fun.data = mean_ci) +
ggplot2::labs(y = lv.name, title = "NARES") +
ggplot2::scale_color_manual(values = c("#2F4F4F", "#191970", "#20B2AA",
"#666666", "#CDCD00", "#FF4500",
"#FF8C69")) +
custom_theme()
#### Glomeruli ####
glom.p <- kidney.df %>%
dplyr::filter(LV == lv.name) %>%
ggplot2::ggplot(ggplot2::aes(x = Diagnosis, y = Value)) +
ggplot2::geom_jitter(width = 0.2, alpha = 0.6,
ggplot2::aes(colour = Diagnosis)) +
ggplot2::stat_summary(geom = "pointrange",
fun.data = mean_ci) +
ggplot2::labs(y = lv.name, title = "Glomeruli") +
ggplot2::scale_color_manual(values = c("#2F4F4F", "#666666", "#FF8C69")) +
custom_theme()
#### PBMC ####
pbmc.p <- gpa.df %>%
dplyr::filter(LV == lv.name) %>%
ggplot2::ggplot(ggplot2::aes(x = GPA_signature, y = Value)) +
ggplot2::geom_jitter(width = 0.2, alpha = 0.6,
ggplot2::aes(colour = GPA_signature)) +
ggplot2::stat_summary(geom = "pointrange",
fun.data = mean_ci) +
ggplot2::labs(y = lv.name, title = "GPA PBMCs") +
ggplot2::scale_color_manual(values = c("#2F4F4F", "#20B2AA", "#FF8C69")) +
custom_theme()
#### Return multipanel plot ####
return(cowplot::plot_grid(nares.p, glom.p, pbmc.p, align = "h", ncol = 3))
}
```
### Plotting itself
```{r}
# list to hold the multipanel plots
plot.list <- lapply(lvs.to.plot, MultiPlotWrapper)
```
```{r}
plot.file <- file.path(plot.dir, "significant_LVs_3_AAV_sets.pdf")
pdf(plot.file, width = 8, height = 6, onefile = TRUE)
plot.list
dev.off()
```