Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
val authored and val committed May 8, 2017
1 parent e46f275 commit 4fcffcc
Show file tree
Hide file tree
Showing 10 changed files with 73,609 additions and 17 deletions.
117 changes: 100 additions & 17 deletions ScriptAlleleNetwork.r
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,63 @@ library("igraph")

##### AlleleNetwork Alternative ###############################################

Net.abr = read_delim("Net.abr.net", col_names = FALSE, delim = "\t")
Net.bac = read_delim("Net.bac.net", col_names = FALSE, delim = "\t")
Net.rel = read_delim("Net.rel.net", col_names = FALSE, delim = "\t")

colnames(Net.abr) = c("Source", "Target", "value")
colnames(Net.bac) = c("Source", "Target", "value")
colnames(Net.rel) = c("Source", "Target", "value")

Net.abr.graph = graph_from_edgelist(as.matrix(Net.abr), directed = FALSE)
Net.bac.graph = graph_from_edgelist(as.matrix(Net.bac), directed = FALSE)
Net.rel.graph = graph_from_edgelist(as.matrix(Net.rel), directed = FALSE)
Net.abr.matrix = Net.abr %>% filter(!is.na(Target), !is.na(Source)) %>% spread(Source, value, fill = 0)
Net.bac.matrix = Net.bac %>% filter(!is.na(Target), !is.na(Source)) %>% spread(Source, value, fill = 0)
Net.rel.matrix = Net.rel %>% filter(!is.na(Target), !is.na(Source)) %>% spread(Source, value, fill = 0)

Net.abr.matrix = as_adjacency_matrix(Net.abr.graph)
Net.abr.mcl = mcl(Net.abr.matrix, addLoops = TRUE, allow1 = TRUE)
tmp = Net.abr.matrix$Target
Net.abr.matrix = Net.abr.matrix[, -1]
rownames(Net.abr.matrix) = tmp

tmp = Net.bac.matrix$Target
Net.bac.matrix = Net.bac.matrix[, -1]
rownames(Net.bac.matrix) = tmp


tmp = Net.rel.matrix$Target
Net.rel.matrix = Net.rel.matrix[, -1]
rownames(Net.rel.matrix) = tmp


Net.abr.graph = graph_from_adjacency_matrix(
as.matrix(Net.abr.matrix),
mode = "upper",
weighted = TRUE,
diag = FALSE
)
Net.bac.graph = graph_from_adjacency_matrix(
as.matrix(Net.abr.matrix),
mode = "upper",
weighted = TRUE,
diag = FALSE
)
Net.rel.graph = graph_from_adjacency_matrix(
as.matrix(Net.abr.matrix),
mode = "upper",
weighted = TRUE,
diag = FALSE
)

################ Using MCL algorithm for clustering classification #########################
Net.abr.mcl = mcl(as_adjacency_matrix(Net.abr.graph),
addLoops = TRUE,
allow1 = TRUE)
Net.bac.mcl = mcl(as_adjacency_matrix(Net.bac.graph),
addLoops = TRUE,
allow1 = TRUE)
Net.rel.mcl = mcl(as_adjacency_matrix(Net.rel.graph),
addLoops = TRUE,
allow1 = TRUE)

Net.bac.matrix = as_adjacency_matrix(Net.bac.graph)
Net.bac.mcl = mcl(Net.bac.matrix, addLoops = TRUE, allow1 = TRUE)

Net.rel.matrix = as_adjacency_matrix(Net.rel.graph)
Net.rel.mcl = mcl(Net.rel.matrix, addLoops = TRUE, allow1 = TRUE)


Net.abr.cl = data_frame(
Expand All @@ -42,10 +85,44 @@ Net.rel.cl = data_frame(
Net.all.cl = bind_rows(Net.abr.cl,
Net.bac.cl,
Net.rel.cl)
############ END MCL algorithm for clustering classification ################################

############ Clustering by Components (Calculate the maximal (weakly or strongly) connected components of a graph) #############
############ This is an alternative if MCL fails because the size of the AlleleNetwork ####################

Net.abr.comp = components(Net.abr.graph)
Net.bac.comp = components(Net.bac.graph)
Net.rel.comp = components(Net.rel.graph)

Net.abr.cl = data_frame(
Gene = rownames(as.data.frame(Net.abr.comp$membership)),
Cluster = Net.abr.comp$membership,
DataSet = "abr"
)
Net.bac.cl = data_frame(
Gene = rownames(as.data.frame(Net.bac.comp$membership)),
Cluster = Net.bac.comp$membership,
DataSet = "bac"
)
Net.rel.cl = data_frame(
Gene = rownames(as.data.frame(Net.rel.comp$membership)),
Cluster = Net.rel.comp$membership,
DataSet = "rel"
)
Net.all.cl = bind_rows(Net.abr.cl,
Net.bac.cl,
Net.rel.cl)

########### END of Clustering by components ################################################


Connections.abr = bind_rows(Net.abr %>% select(X = Gen, N), Net.abr %>% select(X = siguienteG, N)) %>% group_by(X) %>% summarise(N = sum(N))
Connections.bac = bind_rows(Net.bac %>% select(X = Gen, N), Net.bac %>% select(X = siguienteG, N)) %>% group_by(X) %>% summarise(N = sum(N))
Connections.rel = bind_rows(Net.rel %>% select(X = Gen, N), Net.rel %>% select(X = siguienteG, N)) %>% group_by(X) %>% summarise(N = sum(N))

Connections.abr = bind_rows(Net.abr %>% select(X = Target, value),
Net.abr %>% select(X = Source, value)) %>% group_by(X) %>% summarise(N = sum(value))
Connections.bac = bind_rows(Net.bac %>% select(X = Target, value),
Net.bac %>% select(X = Source, value)) %>% group_by(X) %>% summarise(N = sum(value))
Connections.rel = bind_rows(Net.rel %>% select(X = Target, value),
Net.rel %>% select(X = Source, value)) %>% group_by(X) %>% summarise(N = sum(value))
Connections.all = bind_rows(Connections.abr, Connections.bac, Connections.rel)
colnames(Connections.all) = c("Gene", "Connections")

Expand Down Expand Up @@ -83,13 +160,19 @@ Full.table = Full.table %>% separate(Sample, c("Sample", "DataSet", "kk"), sep =
"\\.") %>% select(-kk) %>% full_join(., Net.all.cl)
Full.table = Full.table %>% left_join(., Connections.all)
Full.table$Connections[is.na(Full.table$Connections)] = 0
reads = read_table("../count.reads",col_names = FALSE)
colnames(reads) = c("TotalReads","Sample")
reads$Sample = gsub(".R1.fastq","",reads$Sample)
reads$TotalReads = reads$TotalReads/4
Full.table = left_join(Full.table, Nreads)
reads = read_delim("../Nreads.txt", col_names = TRUE, delim = ",") #### File with the reads count per Sample

Full.table = left_join(Full.table, reads)
Full.table = Full.table %>% mutate(FinalClust = ifelse(is.na(Cluster), Gene, paste(DataSet, Cluster, sep =
"")))
RepresentativesOfCluster = Full.table %>% group_by(FinalClust, Gene) %>% summarise(conn = max(Connections)) %>% group_by(FinalClust) %>% mutate(top = max(conn)) %>% filter(conn == top) %>% group_by(FinalClust) %>% summarise(Representative = first(Gene))
Full.table = full_join(Full.table, RepresentativesOfCluster)

##### END Join AlleleNetwork and Data abundance ############



Full.table = Full.table %>% mutate(RPKM = RPK * 1e6 / TotalReads,
UpM = Uniq * 1e6 / TotalReads)


Binary file added db/ResCapReference.1.bt2
Binary file not shown.
Binary file added db/ResCapReference.2.bt2
Binary file not shown.
Binary file added db/ResCapReference.3.bt2
Binary file not shown.
Binary file added db/ResCapReference.4.bt2
Binary file not shown.
Loading

0 comments on commit 4fcffcc

Please sign in to comment.