-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.R
executable file
·168 lines (134 loc) · 6.76 KB
/
parser.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#!/usr/bin/env Rscript
library("optparse")
library("parallel")
# ===================
# Take input
# ===================
### HELP ###
# parser.R -d/-g input_file -c cores --report_name reportname(can include path)
option_list <- list(
make_option(c("-g", "--global"), type="character", default=NULL,
help="Input folder with Smetana global results (e.g. ./my_results/NodeXXXXX/smetana_results/global).", metavar="character"),
make_option(c("-d", "--detailed"), type="character", default=NULL,
help="Input folder with Smetana global results (e.g. ./my_results/NodeXXXXX/smetana_results/global).", metavar="character"),
make_option(c("-c","--cores"), type="numeric", default=4,
help="Number of cores to use in parallelization", metavar = "character"),
make_option(c("--report_name"), type="character", default="report.txt",
help="Name of the output report file. Default: ./report.txt", metavar="character")
)
parser <- OptionParser(option_list=option_list)
opt <- parse_args(parser)
global <- opt$global
detailed <- opt$detailed
cores <- opt$cores
report_name <- opt$report_name
if (is.null(global)) {
if (is.null(detailed)) {
stop("Please specify an input folder with --global or --detailed.")
} else {
is.detailed <- TRUE
is.global <- FALSE
}
} else if (is.null(detailed)) {
is.global <- TRUE
is.detailed <- FALSE
} else {
stop("Only --global or --detailed input is accepted. Please choose only one.")
}
if (is.global){
# Open the dataset
# ================
filenames <- list.files(global, pattern="*.tsv", full.names=TRUE)
dataset <- do.call("rbind", mclapply(filenames, FUN=function(filename) {
df <- read.table(filename, sep="\t", header=TRUE, na.strings = "n/a")
rownames(df) <- tail(strsplit(filename,split="/")[[1]],n=1)
return(df)
} ,mc.cores=cores))
# Report MRO=n/a files
# ====================
mro.is.na <- as.logical(is.na(dataset["mro"]))
row_names <- filenames
not_growing <- row_names[mro.is.na]
# MRO values
# =============
growing <- dataset[!mro.is.na,]
if (length(growing)==0) {
write("MRO was n/a for all files. The co-culture can't grow.",file=report_name)
stop ("MRO was n/a for all files. The co-culture can't grow.")
} else {
mean_mro <- mean(growing[,"mro"])
min_mro <- min(growing[,"mro"])
max_mro <- max(growing[,"mro"])
}
# MIP report
# ==========
mip.is.na <- as.logical(is.na(growing[,"mip"]))
mip.is.not.na <- growing[!mip.is.na,]
mean_mip <- mean(mip.is.not.na[,"mip"])
min_mip <- min(mip.is.not.na[,"mip"])
max_mip <- max(mip.is.not.na[,"mip"])
# Print report
# ============
write("The following files have n/a MRO and MIP, which means they can't grow by themselves:\n",file=report_name)
write(not_growing,file=report_name,append=TRUE)
write("\n\nMRO (metabolic resource overlap) calculates how much the species compete for the same metabolites", file=report_name, append=TRUE)
write(paste("\nMean MRO:", mean_mro, sep="\n"), file=report_name, append=TRUE)
write(paste("\nMinimum MRO:", min_mro, sep="\n"), file=report_name, append=TRUE)
write(paste("\nMaximum MRO:", max_mro, sep="\n"), file=report_name, append=TRUE)
write("\n\nMIP (metabolic interaction potential) calculates how many metabolites the species can share to decrease their dependency on external resources", file=report_name, append=TRUE)
write(paste("\nMean MIP:", mean_mip, sep="\n"), file=report_name, append=TRUE)
write(paste("\nMinimum MIP:", min_mip, sep="\n"), file=report_name, append=TRUE)
write(paste("\nMaximum MIP:", max_mip, sep="\n"), file=report_name, append=TRUE)
write("\n\nFiles where MRO values are available:", file=report_name, append=TRUE)
write.table(growing, file=report_name, sep="\t", quote=FALSE, append=TRUE)
} else {
# Open the data files
# ===================
files_names <- list.files(detailed, pattern="*.tsv", full.names=TRUE)
files_text <- mclapply(files_names, FUN=function(filename) {
read.csv(filename, sep="\t", header=TRUE)
} ,mc.cores=cores)
names(files_text) <- files_names
# Report empty files
# ==================
is.empty <- as.numeric(mclapply(files_text,FUN=function(x){length(x[,1])},mc.cores=cores))==0
empty <- files_names[is.empty]
not.empty <- files_names[!is.empty]
# Analyze metabolites
# ===================
merged.files <- do.call("rbind",files_text)
rm(files_text) # free memory
# Create node columns
check_node <- function(x,node_1_index,tags=c("first","second")){
if (x %in% node_1_index) {result <- tags[1]} else {result <- tags[2]}
return(result)
}
node_1_index <- unique(sapply(files_names,FUN=function(filename){head(strsplit(filename,split="_")[[1]],n=1)},USE.NAMES = FALSE))
donor_node <- sapply(merged.files[,"donor"], check_node, node_1_index, USE.NAMES=FALSE)
receiver_node <- sapply(merged.files[,"receiver"],check_node, node_1_index, USE.NAMES=FALSE)
merged.files <- cbind(merged.files, donor_node, receiver_node)
# Exchanged in general
all.metabolites <- setNames(aggregate(x=merged.files[,c(7,9)],by=list(merged.files$compound),mean),
nm=c("compound","mus","smetana"))
# Exchanged by node
all.metabolites.by.node <- setNames(aggregate(x=merged.files[,c(7,9)],
by=list(merged.files$compound,merged.files$donor_node, merged.files$receiver_node),
mean,),nm=c("compound","donor_node","receiver_node","mus","smetana"))
# Ordered
all.metabolites <- all.metabolites[order(all.metabolites$smetana,decreasing=TRUE),]
all.metabolites.by.node <- all.metabolites.by.node[order(all.metabolites.by.node$smetana,decreasing=TRUE),]
# Print report
# ============
write("The following files are empty, which means no exchange between species:\n",file=report_name)
write(empty,file=report_name,append=TRUE)
write(paste0("\nThere are ",length(not.empty)," files which are not empty:"),file=report_name,append=TRUE)
write(not.empty,file=report_name,append=TRUE)
write("\n10 most exchanged metabolites:",file=report_name,append=TRUE)
write.table(all.metabolites[1:10,],file=report_name,append=TRUE,quote=FALSE,sep="\t",row.names=FALSE)
write("\n10 most exchanged metabolites by donor node:",file=report_name,append=TRUE)
write.table(all.metabolites.by.node[1:10,],file=report_name,append=TRUE,quote=FALSE,sep="\t",row.names=FALSE)
write("\n'first' node includes ",file=report_name,append=TRUE)
write(node_1_index, file=report_name,append=TRUE)
write("\nThe whole dataset is as follows:",file=report_name,append=TRUE)
write.table(merged.files,file=report_name,append=TRUE,quote=FALSE,sep="\t",row.names=FALSE)
}