Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Work on the simplification of multi-relation edges in networks #255

Merged
merged 14 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/pull_request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
##
## Copyright 2023 by Maximilian Löffler <[email protected]>
## Copyright 2023-2024 by Maximilian Löffler <[email protected]>
## Copyright 2024 by Thomas Bock <[email protected]>
## All Rights Reserved.

Expand Down Expand Up @@ -41,7 +41,7 @@ jobs:

steps:
- name: Checkout Repo
uses: actions/checkout@v3
uses: actions/checkout@v4
bockthom marked this conversation as resolved.
Show resolved Hide resolved

- name: Update system
run: |
Expand Down
187 changes: 187 additions & 0 deletions tests/test-networks.R
bockthom marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,193 @@ test_that("Simplify network with more than one relation", {

})

test_that("Simplify basic multi-relational network", {

##
## Simplify networks with vertices connected by multi-relational edges
##

## create artifact network with vertices connected by "cochange" and "mail edges"
network =
igraph::make_empty_graph(n = 0, directed = FALSE) +
igraph::vertices("A", "B", type = TYPE.ARTIFACT, kind = "feature")
for (i in 1:3) {
network = igraph::add.edges(network, c("A", "B"), type = TYPE.EDGES.INTRA, relation = "mail")
network = igraph::add.edges(network, c("A", "B"), type = TYPE.EDGES.INTRA, relation = "cochange")
}

network.expected = igraph::make_empty_graph(n = 0, directed = FALSE) +
igraph::vertices("A", "B", type = TYPE.ARTIFACT, kind = "feature") +
igraph::edges("A", "B", type = TYPE.EDGES.INTRA, relation = "mail") +
igraph::edges("A", "B", type = TYPE.EDGES.INTRA, relation = "cochange")

## simplify network without simplifying multiple relations into single edges
network.simplified = simplify.network(network, simplify.multiple.relations = FALSE)
assert.networks.equal(network.simplified, network.expected)

## simplify network with simplifying multiple relations into single edges
network.simplified = simplify.network(network, simplify.multiple.relations = TRUE)
expect_identical(igraph::ecount(simplify.network(network.simplified)), 1)
bockthom marked this conversation as resolved.
Show resolved Hide resolved
expect_identical(igraph::E(network.simplified)$type[[1]], "Unipartite")
expect_identical(igraph::E(network.simplified)$relation[[1]], c("cochange", "mail"))
bockthom marked this conversation as resolved.
Show resolved Hide resolved
})

test_that("Simplify author-network with relation = c('cochange', 'mail') using both algorithms", {

## configurations
proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT)
proj.conf$update.value("commits.filter.base.artifact", FALSE)
net.conf = NetworkConf$new()
net.conf$update.values(updated.values = list(author.relation = c("cochange", "mail"), simplify = TRUE))

## construct objects
proj.data = ProjectData$new(project.conf = proj.conf)
network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf)

## vertex attributes
authors = data.frame(name = c("Björn", "Olaf", "Karl", "Thomas", "udo", "Fritz [email protected]", "georg", "Hans"),
kind = TYPE.AUTHOR,
type = TYPE.AUTHOR)


## ---------------------- simplify.multiple.relations == FALSE -------------------------- ##

## edge attributes
data = data.frame(comb.1. = c("Björn", "Olaf", "Olaf", "Karl", # cochange
"Björn", "Olaf"), # mail
comb.2. = c("Olaf", "Karl", "Thomas", "Thomas", # cochange
"Olaf", "Thomas")) # mail
data$date = list(get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45")),
get.date.from.string(c("2016-07-12 16:05:41", "2016-07-12 16:06:10")),
get.date.from.string(c("2016-07-12 16:05:41", "2016-07-12 16:06:32")),
get.date.from.string(c("2016-07-12 16:06:10", "2016-07-12 16:06:32")), # cochange
get.date.from.string(c("2016-07-12 15:58:40", "2016-07-12 15:58:50")),
get.date.from.string(c("2016-07-12 16:04:40", "2016-07-12 16:05:37"))) # mail
data$artifact.type = list(c("Feature", "Feature"), c("Feature", "Feature"),
c("Feature", "Feature"), c("Feature", "Feature"), # cochange
c("Mail", "Mail"), c("Mail", "Mail")) # mail
data$hash = list(c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338"),
c("3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"),
c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"),
c("1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"),
as.character(c(NA, NA)), as.character(c(NA, NA)))
data$file = list(c("test.c", "test.c"), c("test2.c", "test3.c"), c("test2.c", "test2.c"), c("test3.c", "test2.c"),
as.character(c(NA, NA)), as.character(c(NA, NA)))
data$artifact = list(c("A", "A"), c("Base_Feature", "Base_Feature"), c("Base_Feature", "Base_Feature"),
c("Base_Feature", "Base_Feature"), as.character(c(NA, NA)), as.character(c(NA, NA)))
data$weight = rep(2, 6)
data$type = rep(TYPE.EDGES.INTRA, 6)
data$relation = c(rep("cochange", 4), rep("mail", 2))
data$message.id = list(as.character(c(NA, NA)), as.character(c(NA, NA)), as.character(c(NA, NA)), as.character(c(NA, NA)),
c("<[email protected]>",
"<[email protected]>"),
c("<[email protected]>",
"<[email protected]>"))
data$thread = list(as.character(c(NA, NA)), as.character(c(NA, NA)), as.character(c(NA, NA)), as.character(c(NA, NA)),
c("<thread-13#8>", "<thread-13#8>"), c("<thread-13#9>", "<thread-13#9>"))

## build expected network
network.expected = igraph::graph.data.frame(data, vertices = authors,
directed = net.conf$get.value("author.directed"))

## build simplified network
network.built = network.builder$get.author.network()

assert.networks.equal(network.built, network.expected)


## ---------------------- simplify.multiple.relations == TRUE --------------------------- ##

data = data.frame(comb.1. = c("Björn", "Olaf", "Olaf", "Karl"),
comb.2. = c("Olaf", "Karl", "Thomas", "Thomas"))

data$date = list(get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", # cochange
"2016-07-12 15:58:40", "2016-07-12 15:58:50")), # mail
get.date.from.string(c("2016-07-12 16:05:41", "2016-07-12 16:06:10")), # cochange
get.date.from.string(c("2016-07-12 16:05:41", "2016-07-12 16:06:32", # cochange
"2016-07-12 16:04:40", "2016-07-12 16:05:37")), # mail
get.date.from.string(c("2016-07-12 16:06:10", "2016-07-12 16:06:32"))) # cochange
data$artifact.type = list(c("Feature", "Feature", "Mail", "Mail"),
c("Feature", "Feature"),
c("Feature", "Feature", "Mail", "Mail"),
c("Feature", "Feature"))
data$hash = list(as.character(c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", NA, NA)),
c("3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"),
as.character(c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", NA, NA)),
c("1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"))
data$file = list(as.character(c("test.c", "test.c", NA, NA)), c("test2.c", "test3.c"),
as.character(c("test2.c", "test2.c", NA, NA)), c("test3.c", "test2.c"))
data$artifact = list(as.character(c("A", "A", NA, NA)), c("Base_Feature", "Base_Feature"),
as.character(c("Base_Feature", "Base_Feature", NA, NA)), c("Base_Feature", "Base_Feature"))
data$weight = c(4, 2, 4, 2)
data$type = rep(TYPE.EDGES.INTRA, 4)
data$relation = list(c("cochange", "mail"), c("cochange"), c("cochange", "mail"), c("cochange"))
data$message.id = list(as.character(c(NA, NA, "<[email protected]>",
"<[email protected]>")),
as.character(c(NA, NA)),
as.character(c(NA, NA, "<[email protected]>",
"<[email protected]>")),
as.character(c(NA, NA)))
data$thread = list(as.character(c(NA, NA, "<thread-13#8>", "<thread-13#8>")),
as.character(c(NA, NA)),
as.character(c(NA, NA, "<thread-13#9>", "<thread-13#9>")),
as.character(c(NA, NA)))

## build expected network
network.expected = igraph::graph.data.frame(data, vertices = authors,
directed = net.conf$get.value("author.directed"))

## build simplified network
network.builder$update.network.conf(updated.values = list(simplify.multiple.relations = TRUE))
network.built = network.builder$get.author.network()

assert.networks.equal(network.built, network.expected)

})

test_that("Simplify multiple basic multi-relational networks", {

##
## Simplify networks with vertices connected by multi-relational edges
##

## create artifact network with vertices connected by "cochange" and "mail edges"
bockthom marked this conversation as resolved.
Show resolved Hide resolved
network.A =
igraph::make_empty_graph(n = 0, directed = FALSE) +
igraph::vertices("A", "B", type = TYPE.ARTIFACT, kind = "feature")
network.B =
igraph::make_empty_graph(n = 0, directed = FALSE) +
igraph::vertices("C", "D", type = TYPE.AUTHOR, kind = TYPE.AUTHOR)
for (i in 1:3) {
network.A = igraph::add.edges(network.A, c("A", "B"), type = TYPE.EDGES.INTRA, relation = "mail")
network.A = igraph::add.edges(network.A, c("A", "B"), type = TYPE.EDGES.INTRA, relation = "cochange")
network.B = igraph::add.edges(network.B, c("C", "D"), type = TYPE.EDGES.INTRA, relation = "mail")
network.B = igraph::add.edges(network.B, c("C", "D"), type = TYPE.EDGES.INTRA, relation = "cochange")
}

network.A.expected = igraph::make_empty_graph(n = 0, directed = FALSE) +
igraph::vertices("A", "B", type = TYPE.ARTIFACT, kind = "feature") +
igraph::edges("A", "B", type = TYPE.EDGES.INTRA, relation = "mail") +
igraph::edges("A", "B", type = TYPE.EDGES.INTRA, relation = "cochange")
network.B.expected = igraph::make_empty_graph(n = 0, directed = FALSE) +
igraph::vertices("C", "D", type = TYPE.AUTHOR, kind = TYPE.AUTHOR) +
igraph::edges("C", "D", type = TYPE.EDGES.INTRA, relation = "mail") +
igraph::edges("C", "D", type = TYPE.EDGES.INTRA, relation = "cochange")

## simplify networks without simplifying multiple relations into single edges
networks.simplified = simplify.networks(list(network.A, network.B), simplify.multiple.relations = FALSE)
assert.networks.equal(networks.simplified[[1]], network.A.expected)
assert.networks.equal(networks.simplified[[2]], network.B.expected)
bockthom marked this conversation as resolved.
Show resolved Hide resolved

## simplify network with simplifying multiple relations into single edges
networks.simplified = simplify.networks(list(network.A, network.B), simplify.multiple.relations = TRUE)
bockthom marked this conversation as resolved.
Show resolved Hide resolved
for (i in 1:2) {
expect_identical(igraph::ecount(simplify.network(networks.simplified[[i]])), 1)
expect_identical(igraph::E(networks.simplified[[i]])$type[[1]], "Unipartite")
expect_identical(igraph::E(networks.simplified[[i]])$relation[[1]], c("cochange", "mail"))
}
})


## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
## Merge -------------------------------------------------------------------
Expand Down
28 changes: 20 additions & 8 deletions util-networks.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ EDGE.ATTR.HANDLING = list(
## network-analytic data
weight = "sum",
type = "first",
relation = "first",
relation = function(relation) sort(unique(relation)),

## commit data
changed.files = "sum",
Expand Down Expand Up @@ -781,6 +781,12 @@ NetworkBuilder = R6::R6Class("NetworkBuilder",
igraph::V(net)$kind = TYPE.AUTHOR
igraph::V(net)$type = TYPE.AUTHOR

## simplify network if wanted
if (private$network.conf$get.value("simplify")) {
net = simplify.network(net, simplify.multiple.relations =
private$network.conf$get.value("simplify.multiple.relations"))
}

## add range attribute for later analysis (if available)
if ("RangeData" %in% class(private$proj.data)) {
attr(net, "range") = private$proj.data$get.range()
Expand Down Expand Up @@ -822,6 +828,12 @@ NetworkBuilder = R6::R6Class("NetworkBuilder",
## set vertex and edge attributes for identifaction
igraph::V(net)$type = TYPE.ARTIFACT

## simplify network if wanted
if (private$network.conf$get.value("simplify")) {
net = simplify.network(net, simplify.multiple.relations =
private$network.conf$get.value("simplify.multiple.relations"))
}

## add range attribute for later analysis (if available)
if ("RangeData" %in% class(private$proj.data)) {
attr(net, "range") = private$proj.data$get.range()
Expand Down Expand Up @@ -922,6 +934,12 @@ NetworkBuilder = R6::R6Class("NetworkBuilder",
network = igraph::delete.vertices(network, authors.to.remove)
}

## simplify network if wanted
if (private$network.conf$get.value("simplify")) {
network = simplify.network(network, simplify.multiple.relations =
private$network.conf$get.value("simplify.multiple.relations"))
}

## add range attribute for later analysis (if available)
if ("RangeData" %in% class(private$proj.data)) {
attr(network, "range") = private$proj.data$get.range()
Expand Down Expand Up @@ -1306,12 +1324,6 @@ construct.network.from.edge.list = function(vertices, edge.list, network.conf, d
## initialize edge weights
net = igraph::set.edge.attribute(net, "weight", value = 1)

## transform multiple edges to edge weights
if (network.conf$get.value("simplify")) {
net = simplify.network(net,
simplify.multiple.relations = network.conf$get.value("simplify.multiple.relations"))
}

logging::logdebug("construct.network.from.edge.list: finished.")

return(net)
Expand Down Expand Up @@ -1786,7 +1798,7 @@ delete.authors.without.specific.edges = function(network, specific.edge.types =
#' empty relation, i.e. \code{character(0)}
get.data.sources.from.relations = function(network) {
## get all relations in the network
data.sources = unique(igraph::E(network)$relation)
data.sources = unique(unlist(igraph::E(network)$relation))
bockthom marked this conversation as resolved.
Show resolved Hide resolved

## map them to data sources respectively using the defined translation constant
data.sources = sapply(data.sources, function(relation) {
Expand Down