diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 3ced3784..4e1b7a6c 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -11,7 +11,7 @@ ## with this program; if not, write to the Free Software Foundation, Inc., ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ## -## Copyright 2023 by Maximilian Löffler +## Copyright 2023-2024 by Maximilian Löffler ## Copyright 2024 by Thomas Bock ## All Rights Reserved. @@ -41,7 +41,7 @@ jobs: steps: - name: Checkout Repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Update system run: | diff --git a/NEWS.md b/NEWS.md index c0b0dd30..2fc2f539 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,8 +10,10 @@ - Add a new `split.data.by.bins` function (not to be confused with a previously existing function that had the same name and was renamed in this context), which splits data based on given activity-based bins (PR #244, ece569ceaf557bb38cd0cfad437b69b30fe8a698, ed5feb214a123b605c9513262f187cfd72b9e1f4) - Add new `assert.sparse.matrices.equal` function to compare two sparse matrices for equality for testing purposes (PR #248, 9784cdf12d1497ee122e2ae73b768b8c334210d4, d9f1a8d90e00a634d7caeb5e7f8f262776496838) - Add tests for file `util-networks-misc.R` for issue #242 (PR #248, f3202a6f96723d11c170346556d036cf087521c8, 030574b9d0f3435db4032d0e195a3d407fb7244b, 380b02234275127297fcd508772c69db21c216de, 8b803c50d60fc593e4e527a08fd4c2068d801a48, 7335c3dd4d0302b024a66d18701d9800ed3fe806, 6b600df04bec1fe70c272604f274ec5309840e65) -- Add the possibility to simplify edges of multiple-relation networks into a single edge at all instead of a single edge per relation (PR #250, 2105ea89b5227e7c9fa78fea9de1977f2d9e8faa) +- Add the possibility to simplify edges of multiple-relation networks into a single edge at all instead of a single edge per relation (PR #250, PR #255, 2105ea89b5227e7c9fa78fea9de1977f2d9e8faa, a34b5bd50351b9ccf3cc45fc323cfa2e84d65ea0, 34516415fed599eba0cc7d3cc4a9acd6b26db252, 78f43514962d7651e6b7a1e80ee22ce012f32535, d310fdc38690f0d701cd32c92112c33f7fdde0ff, 58d77b01ecc6a237104a4e72ee5fb9025efeaaf2) +- Add tests for network simplification (PR #255, 338b06941eec1c9cfdb121e78ce0d9db6b75da19, 8a6f47bc115c10fbbe4eee21985d97aee5c9dc91, e01908c94eccc4dda5f2b3c0746b0eab0172dc07, 7b6848fb86f69db088ce6ef2bea8315ac94d48f9) - Add `get.bin.dates.from.ranges` function to convert date ranges into bins format (PR #249, a1842e9be46596321ee86860fd87d17a3c88f50f, 858b1812ebfc3194cc6a03c99f3ee7d161d1ca15) +- Add network simplification to showcase file (PR #255, dc32d44f9db7dfc3cc795ef5d6b86609d6c1936f) ### Changed/Improved diff --git a/showcase.R b/showcase.R index a4cceb53..42a3e2e0 100644 --- a/showcase.R +++ b/showcase.R @@ -23,6 +23,7 @@ ## Copyright 2021 by Johannes Hostert ## Copyright 2021 by Niklas Schneider ## Copyright 2022 by Jonathan Baumann +## Copyright 2024 by Maximilian Löffler ## All Rights Reserved. @@ -361,6 +362,24 @@ g.motifs = motifs.count(network = g, remove.duplicates = TRUE, raw.data = FALSE) +## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / +## Network simplification -------------------------------------------------- + +## construct sample network +g = y$get.multi.network() +g = igraph::delete_edges(g, c(5, 6)) +g = igraph::delete_vertices(g, c(2, 4, 5, 6, 7, 8)) +g = g + igraph::edges(c("Björn", "Olaf", "Björn", "Olaf"), type = TYPE.EDGES.INTRA, weight = 1, + relation = "cochange", artifact.type = "Feature") + +## merge edges between vertice pairs that stem from the same data source +g.simplified = simplify.network(g) +plot.network(g.simplified) + +## merge all edges between vertice pairs +g.simplified = simplify.network(g, simplify.multiple.relations = TRUE) +plot.network(g.simplified) + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Plots ------------------------------------------------------------------- diff --git a/tests/test-networks.R b/tests/test-networks.R index 24a1a098..3a3954eb 100644 --- a/tests/test-networks.R +++ b/tests/test-networks.R @@ -13,6 +13,7 @@ ## ## Copyright 2018-2019 by Claus Hunsen ## Copyright 2021 by Niklas Schneider +## Copyright 2024 by Maximilian Löffler ## All Rights Reserved. @@ -78,6 +79,198 @@ test_that("Simplify network with more than one relation", { }) +test_that("Simplify basic multi-relational network", { + + ## + ## Simplify networks with vertices connected by multi-relational edges + ## + + ## create artifact network with vertices connected by "cochange" and "mail" edges + network = + igraph::make_empty_graph(n = 0, directed = FALSE) + + igraph::vertices("A", "B", type = TYPE.ARTIFACT, kind = "feature") + for (i in 1:3) { + network = igraph::add.edges(network, c("A", "B"), type = TYPE.EDGES.INTRA, relation = "mail") + network = igraph::add.edges(network, c("A", "B"), type = TYPE.EDGES.INTRA, relation = "cochange") + } + + network.expected = igraph::make_empty_graph(n = 0, directed = FALSE) + + igraph::vertices("A", "B", type = TYPE.ARTIFACT, kind = "feature") + + igraph::edges("A", "B", type = TYPE.EDGES.INTRA, relation = "mail") + + igraph::edges("A", "B", type = TYPE.EDGES.INTRA, relation = "cochange") + + ## simplify network without simplifying multiple relations into single edges + network.simplified = simplify.network(network, simplify.multiple.relations = FALSE) + assert.networks.equal(network.simplified, network.expected) + + ## simplify network with simplifying multiple relations into single edges + network.simplified = simplify.network(network, simplify.multiple.relations = TRUE) + expect_identical(igraph::ecount(simplify.network(network.simplified)), 1) + expect_identical(igraph::E(network.simplified)$type[[1]], "Unipartite") + expect_identical(igraph::E(network.simplified)$relation[[1]], c("cochange", "mail")) +}) + +test_that("Simplify author-network with relation = c('cochange', 'mail') using both algorithms", { + + ## configurations + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("commits.filter.base.artifact", FALSE) + net.conf = NetworkConf$new() + net.conf$update.values(updated.values = list(author.relation = c("cochange", "mail"), simplify = TRUE)) + + ## construct objects + proj.data = ProjectData$new(project.conf = proj.conf) + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + + ## vertex attributes + authors = data.frame(name = c("Björn", "Olaf", "Karl", "Thomas", "udo", "Fritz fritz@example.org", "georg", "Hans"), + kind = TYPE.AUTHOR, + type = TYPE.AUTHOR) + + + ## ---------------------- simplify.multiple.relations == FALSE -------------------------- ## + + ## edge attributes + data = data.frame(comb.1. = c("Björn", "Olaf", "Olaf", "Karl", # cochange + "Björn", "Olaf"), # mail + comb.2. = c("Olaf", "Karl", "Thomas", "Thomas", # cochange + "Olaf", "Thomas")) # mail + data$date = list(get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45")), + get.date.from.string(c("2016-07-12 16:05:41", "2016-07-12 16:06:10")), + get.date.from.string(c("2016-07-12 16:05:41", "2016-07-12 16:06:32")), + get.date.from.string(c("2016-07-12 16:06:10", "2016-07-12 16:06:32")), # cochange + get.date.from.string(c("2016-07-12 15:58:40", "2016-07-12 15:58:50")), + get.date.from.string(c("2016-07-12 16:04:40", "2016-07-12 16:05:37"))) # mail + data$artifact.type = list(c("Feature", "Feature"), c("Feature", "Feature"), + c("Feature", "Feature"), c("Feature", "Feature"), # cochange + c("Mail", "Mail"), c("Mail", "Mail")) # mail + data$hash = list(c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338"), + c("3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), + c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"), + c("1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), + as.character(c(NA, NA)), as.character(c(NA, NA))) + data$file = list(c("test.c", "test.c"), c("test2.c", "test3.c"), c("test2.c", "test2.c"), c("test3.c", "test2.c"), + as.character(c(NA, NA)), as.character(c(NA, NA))) + data$artifact = list(c("A", "A"), c("Base_Feature", "Base_Feature"), c("Base_Feature", "Base_Feature"), + c("Base_Feature", "Base_Feature"), as.character(c(NA, NA)), as.character(c(NA, NA))) + data$weight = rep(2, 6) + data$type = rep(TYPE.EDGES.INTRA, 6) + data$relation = c(rep("cochange", 4), rep("mail", 2)) + data$message.id = list(as.character(c(NA, NA)), as.character(c(NA, NA)), as.character(c(NA, NA)), as.character(c(NA, NA)), + c("<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", + "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>"), + c("<65a1sf31sagd684dfv31@mail.gmail.com>", + "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>")) + data$thread = list(as.character(c(NA, NA)), as.character(c(NA, NA)), as.character(c(NA, NA)), as.character(c(NA, NA)), + c("", ""), c("", "")) + + ## build expected network + network.expected = igraph::graph.data.frame(data, vertices = authors, + directed = net.conf$get.value("author.directed")) + + ## build simplified network + network.built = network.builder$get.author.network() + + assert.networks.equal(network.built, network.expected) + + + ## ---------------------- simplify.multiple.relations == TRUE --------------------------- ## + + data = data.frame(comb.1. = c("Björn", "Olaf", "Olaf", "Karl"), + comb.2. = c("Olaf", "Karl", "Thomas", "Thomas")) + + data$date = list(get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", # cochange + "2016-07-12 15:58:40", "2016-07-12 15:58:50")), # mail + get.date.from.string(c("2016-07-12 16:05:41", "2016-07-12 16:06:10")), # cochange + get.date.from.string(c("2016-07-12 16:05:41", "2016-07-12 16:06:32", # cochange + "2016-07-12 16:04:40", "2016-07-12 16:05:37")), # mail + get.date.from.string(c("2016-07-12 16:06:10", "2016-07-12 16:06:32"))) # cochange + data$artifact.type = list(c("Feature", "Feature", "Mail", "Mail"), + c("Feature", "Feature"), + c("Feature", "Feature", "Mail", "Mail"), + c("Feature", "Feature")) + data$hash = list(as.character(c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", NA, NA)), + c("3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"), + as.character(c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", NA, NA)), + c("1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526")) + data$file = list(as.character(c("test.c", "test.c", NA, NA)), c("test2.c", "test3.c"), + as.character(c("test2.c", "test2.c", NA, NA)), c("test3.c", "test2.c")) + data$artifact = list(as.character(c("A", "A", NA, NA)), c("Base_Feature", "Base_Feature"), + as.character(c("Base_Feature", "Base_Feature", NA, NA)), c("Base_Feature", "Base_Feature")) + data$weight = c(4, 2, 4, 2) + data$type = rep(TYPE.EDGES.INTRA, 4) + data$relation = list(c("cochange", "mail"), c("cochange"), c("cochange", "mail"), c("cochange")) + data$message.id = list(as.character(c(NA, NA, "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>", + "<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>")), + as.character(c(NA, NA)), + as.character(c(NA, NA, "<65a1sf31sagd684dfv31@mail.gmail.com>", + "<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>")), + as.character(c(NA, NA))) + data$thread = list(as.character(c(NA, NA, "", "")), + as.character(c(NA, NA)), + as.character(c(NA, NA, "", "")), + as.character(c(NA, NA))) + + ## build expected network + network.expected = igraph::graph.data.frame(data, vertices = authors, + directed = net.conf$get.value("author.directed")) + + ## build simplified network + network.builder$update.network.conf(updated.values = list(simplify.multiple.relations = TRUE)) + network.built = network.builder$get.author.network() + + assert.networks.equal(network.built, network.expected) + +}) + +test_that("Simplify multiple basic multi-relational networks", { + + ## + ## Simplify networks with vertices connected by multi-relational edges + ## + + ## create artifact network with vertices connected by "cochange" and "mail edges" + network.A = + igraph::make_empty_graph(n = 0, directed = FALSE) + + igraph::vertices("A", "B", type = TYPE.ARTIFACT, kind = "feature") + network.B = + igraph::make_empty_graph(n = 0, directed = FALSE) + + igraph::vertices("C", "D", type = TYPE.AUTHOR, kind = TYPE.AUTHOR) + for (i in 1:3) { + network.A = igraph::add.edges(network.A, c("A", "B"), type = TYPE.EDGES.INTRA, relation = "mail") + network.A = igraph::add.edges(network.A, c("A", "B"), type = TYPE.EDGES.INTRA, relation = "cochange") + network.B = igraph::add.edges(network.B, c("C", "D"), type = TYPE.EDGES.INTRA, relation = "mail") + network.B = igraph::add.edges(network.B, c("C", "D"), type = TYPE.EDGES.INTRA, relation = "cochange") + } + + network.A.expected = igraph::make_empty_graph(n = 0, directed = FALSE) + + igraph::vertices("A", "B", type = TYPE.ARTIFACT, kind = "feature") + + igraph::edges("A", "B", type = TYPE.EDGES.INTRA, relation = "mail") + + igraph::edges("A", "B", type = TYPE.EDGES.INTRA, relation = "cochange") + network.B.expected = igraph::make_empty_graph(n = 0, directed = FALSE) + + igraph::vertices("C", "D", type = TYPE.AUTHOR, kind = TYPE.AUTHOR) + + igraph::edges("C", "D", type = TYPE.EDGES.INTRA, relation = "mail") + + igraph::edges("C", "D", type = TYPE.EDGES.INTRA, relation = "cochange") + networks = list(A = network.A, B = network.B) + + ## simplify networks without simplifying multiple relations into single edges + networks.simplified = simplify.networks(networks, simplify.multiple.relations = FALSE) + expect_true(length(networks.simplified) == 2) + expect_identical(names(networks.simplified), names(networks)) + assert.networks.equal(networks.simplified[["A"]], network.A.expected) + assert.networks.equal(networks.simplified[["B"]], network.B.expected) + + ## simplify networks with simplifying multiple relations into single edges + networks.simplified = simplify.networks(networks, simplify.multiple.relations = TRUE) + expect_true(length(networks.simplified) == 2) + expect_identical(names(networks.simplified), names(networks)) + for (i in 1:2) { + expect_identical(igraph::ecount(simplify.network(networks.simplified[[i]])), 1) + expect_identical(igraph::E(networks.simplified[[i]])$type[[1]], "Unipartite") + expect_identical(igraph::E(networks.simplified[[i]])$relation[[1]], c("cochange", "mail")) + } +}) + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Merge ------------------------------------------------------------------- @@ -727,3 +920,24 @@ test_that("Get the data sources from a network with one relation", { expect_identical(expected.data.sources, get.data.sources.from.relations(network), info = "data sources: mails") }) + +test_that("Get the data sources from a network with multiple relations on a single edge", { + expected.data.sources = c("commits", "mails") + + ## configurations + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT) + proj.conf$update.value("commits.filter.base.artifact", FALSE) + ## construct data object + proj.data = ProjectData$new(project.conf = proj.conf) + + ## construct network builder + net.conf = NetworkConf$new() + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + network.builder$update.network.conf(updated.values = list(author.relation = c("mail", "cochange"))) + + ## build network + network = network.builder$get.author.network() + network = simplify.network(network, simplify.multiple.relations = TRUE) + + expect_identical(expected.data.sources, get.data.sources.from.relations(network), info = "data sources: commits, mails") +}) diff --git a/util-networks.R b/util-networks.R index 1068cb99..ba624276 100644 --- a/util-networks.R +++ b/util-networks.R @@ -56,7 +56,7 @@ EDGE.ATTR.HANDLING = list( ## network-analytic data weight = "sum", type = "first", - relation = "first", + relation = function(relation) sort(unique(relation)), ## commit data changed.files = "sum", @@ -781,6 +781,12 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", igraph::V(net)$kind = TYPE.AUTHOR igraph::V(net)$type = TYPE.AUTHOR + ## simplify network if wanted + if (private$network.conf$get.value("simplify")) { + net = simplify.network(net, simplify.multiple.relations = + private$network.conf$get.value("simplify.multiple.relations")) + } + ## add range attribute for later analysis (if available) if ("RangeData" %in% class(private$proj.data)) { attr(net, "range") = private$proj.data$get.range() @@ -822,6 +828,12 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## set vertex and edge attributes for identifaction igraph::V(net)$type = TYPE.ARTIFACT + ## simplify network if wanted + if (private$network.conf$get.value("simplify")) { + net = simplify.network(net, simplify.multiple.relations = + private$network.conf$get.value("simplify.multiple.relations")) + } + ## add range attribute for later analysis (if available) if ("RangeData" %in% class(private$proj.data)) { attr(net, "range") = private$proj.data$get.range() @@ -922,6 +934,12 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", network = igraph::delete.vertices(network, authors.to.remove) } + ## simplify network if wanted + if (private$network.conf$get.value("simplify")) { + network = simplify.network(network, simplify.multiple.relations = + private$network.conf$get.value("simplify.multiple.relations")) + } + ## add range attribute for later analysis (if available) if ("RangeData" %in% class(private$proj.data)) { attr(network, "range") = private$proj.data$get.range() @@ -1306,12 +1324,6 @@ construct.network.from.edge.list = function(vertices, edge.list, network.conf, d ## initialize edge weights net = igraph::set.edge.attribute(net, "weight", value = 1) - ## transform multiple edges to edge weights - if (network.conf$get.value("simplify")) { - net = simplify.network(net, - simplify.multiple.relations = network.conf$get.value("simplify.multiple.relations")) - } - logging::logdebug("construct.network.from.edge.list: finished.") return(net) @@ -1786,7 +1798,7 @@ delete.authors.without.specific.edges = function(network, specific.edge.types = #' empty relation, i.e. \code{character(0)} get.data.sources.from.relations = function(network) { ## get all relations in the network - data.sources = unique(igraph::E(network)$relation) + data.sources = unique(unlist(igraph::E(network)$relation)) ## map them to data sources respectively using the defined translation constant data.sources = sapply(data.sources, function(relation) { diff --git a/util-plot.R b/util-plot.R index 67f638eb..25e2a24c 100644 --- a/util-plot.R +++ b/util-plot.R @@ -15,6 +15,7 @@ ## Copyright 2018 by Barbara Eckl ## Copyright 2018 by Thomas Bock ## Copyright 2020-2021 by Thomas Bock +## Copyright 2024 by Maximilian Löffler ## All Rights Reserved. @@ -142,7 +143,7 @@ plot.get.plot.for.network = function(network, labels = TRUE) { if (igraph::ecount(network) > 0) { p = p + ggraph::geom_edge_fan( - mapping = ggplot2::aes(colour = relation, linetype = edge.type, width = 0.3 + 0.5 * log(weight)), + mapping = ggplot2::aes(colour = paste(relation, sep = " "), linetype = edge.type, width = 0.3 + 0.5 * log(weight)), end_cap = ggraph::circle(PLOT.VERTEX.SIZE + 3, "pt"), start_cap = ggraph::circle(PLOT.VERTEX.SIZE + 3, "pt"), arrow = if (igraph::is.directed(network)) {