-
Notifications
You must be signed in to change notification settings - Fork 15
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Additional Core/Peripheral Classification Methods #276
base: dev
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ | |
## Copyright 2019 by Christian Hechtl <[email protected]> | ||
## Copyright 2021 by Christian Hechtl <[email protected]> | ||
## Copyright 2023-2024 by Maximilian Löffler <[email protected]> | ||
## Copyright 2024 by Leo Sendelbach <[email protected]> | ||
## All Rights Reserved. | ||
|
||
|
||
|
@@ -105,6 +106,74 @@ test_that("Eigenvector classification", { | |
expect_equal(expected, result, tolerance = 0.0001) | ||
}) | ||
|
||
test_that("Betweenness classification", { | ||
|
||
## Act | ||
result = get.author.class.network.betweenness(network) | ||
|
||
## Assert | ||
expected.core = data.frame(author.name = c("Olaf"), | ||
betweenness.centrality = c(1)) | ||
expected.peripheral = data.frame(author.name = c("Björn", "udo", "Thomas", "Fritz [email protected]", | ||
"georg", "Hans"), | ||
betweenness.centrality = c(0, 0, 0, 0, 0, 0)) | ||
expected = list(core = expected.core, peripheral = expected.peripheral) | ||
row.names(result[["core"]]) = NULL | ||
row.names(result[["peripheral"]]) = NULL | ||
expect_equal(expected, result) | ||
}) | ||
|
||
test_that("Closeness classification", { | ||
|
||
## Act | ||
result = get.author.class.network.closeness(network) | ||
|
||
## Assert | ||
expected.core = data.frame(author.name = c("Olaf"), | ||
closeness.centrality = c(0.5)) | ||
expected.peripheral = data.frame(author.name = c("Björn", "Thomas", "udo", "Fritz [email protected]", | ||
"georg", "Hans"), | ||
closeness.centrality = c(0.33333, 0.33333, 0.0, 0.0, 0.0, 0.0)) | ||
expected = list(core = expected.core, peripheral = expected.peripheral) | ||
row.names(result[["core"]]) = NULL | ||
row.names(result[["peripheral"]]) = NULL | ||
expect_equal(expected, result, tolerance = 0.0001) | ||
}) | ||
|
||
test_that("Pagerank classification", { | ||
|
||
## Act | ||
result = get.author.class.network.pagerank(network) | ||
|
||
## Assert | ||
expected.core = data.frame(author.name = c("Olaf"), | ||
pagerank.centrality = c(0.40541)) | ||
expected.peripheral = data.frame(author.name = c("Björn", "Thomas", "udo", "Fritz [email protected]", | ||
"georg", "Hans"), | ||
pagerank.centrality = c(0.21396, 0.21396, 0.041667, 0.041667, 0.041667, 0.041667)) | ||
expected = list(core = expected.core, peripheral = expected.peripheral) | ||
row.names(result[["core"]]) = NULL | ||
row.names(result[["peripheral"]]) = NULL | ||
expect_equal(expected, result, tolerance = 0.0001) | ||
}) | ||
|
||
test_that("Eccentricity classification", { | ||
|
||
## Act | ||
result = get.author.class.network.eccentricity(network) | ||
|
||
## Assert | ||
expected.core = data.frame(author.name = c("Olaf"), | ||
eccentricity = c(1)) | ||
expected.peripheral = data.frame(author.name = c("Björn", "udo", "Thomas", "Fritz [email protected]", | ||
"georg", "Hans"), | ||
eccentricity = c(0, 0, 0, 0, 0, 0)) | ||
expected = list(core = expected.core, peripheral = expected.peripheral) | ||
row.names(result[["core"]]) = NULL | ||
row.names(result[["peripheral"]]) = NULL | ||
expect_equal(expected, result) | ||
}) | ||
|
||
# TODO: Add a test for hierarchy classification | ||
|
||
test_that("Commit-count classification using 'result.limit'" , { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,7 @@ | |
## Copyright 2019 by Thomas Bock <[email protected]> | ||
## Copyright 2019 by Jakob Kronawitter <[email protected]> | ||
## Copyright 2021 by Johannes Hostert <[email protected]> | ||
## Copyright 2024 by Leo Sendelbach <[email protected]> | ||
## All Rights Reserved. | ||
## | ||
## This file is derived from following Codeface script: | ||
|
@@ -59,6 +60,10 @@ CLASSIFICATION.TYPE.TO.CATEGORY = list( | |
"network.degree" = "network", | ||
"network.eigen" = "network", | ||
"network.hierarchy" = "network", | ||
"network.betweenness" = "network", | ||
"network.closeness" = "network", | ||
"network.pagerank" = "network", | ||
"network.eccentricity" = "network", | ||
"commit.count" = "count", | ||
"loc.count" = "count", | ||
"mail.count" = "count", | ||
|
@@ -96,7 +101,7 @@ CLASSIFICATION.TYPE.TO.CATEGORY = list( | |
#' Network-based options/metrics (parameter \code{network} has to be specified): | ||
#' - "network.degree" | ||
#' - "network.eigen" | ||
#' - "network.hierarchy" | ||
#' - "network.hierarchy" ###TODO check all documentation | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please don't forget about this TODO 😉 |
||
#' Count-based options/metrics (parameter \code{proj.data} has to be specified): | ||
#' - "commit.count" | ||
#' - "loc.count" | ||
|
@@ -126,7 +131,8 @@ CLASSIFICATION.TYPE.TO.CATEGORY = list( | |
#' first column and their centrality values in the second column. | ||
get.author.class.by.type = function(network = NULL, | ||
proj.data = NULL, | ||
type = c("network.degree", "network.eigen", "network.hierarchy", | ||
type = c("network.degree", "network.eigen", "network.hierarchy", "network.betweenness", | ||
"network.closeness", "network.pagerank", "network.eccentricity", | ||
"commit.count", "loc.count", "mail.count", "mail.thread.count", | ||
"issue.count", "issue.comment.count", "issue.commented.in.count", | ||
"issue.created.count"), | ||
|
@@ -144,6 +150,10 @@ get.author.class.by.type = function(network = NULL, | |
"network.degree" = "vertex.degree", | ||
"network.eigen" = "eigen.centrality", | ||
"network.hierarchy" = "hierarchy", | ||
"network.betweenness" = "betweenness.centrality", | ||
"network.closeness" = "closeness.centrality", | ||
"network.pagerank" = "pagerank.centrality", | ||
"network.eccentricity" = "eccentricity", | ||
"commit.count" = "commit.count", | ||
"loc.count" = "loc.count", | ||
"mail.count" = "mail.count", | ||
|
@@ -231,6 +241,30 @@ get.author.class.by.type = function(network = NULL, | |
## Construct centrality dataframe | ||
centrality.dataframe = data.frame(author.name = row.names(hierarchy.base.df), | ||
centrality = hierarchy.calculated) | ||
} else if (type == "network.betweenness") { | ||
betweenness.centrality.vec = igraph::betweenness(network, directed = TRUE) | ||
## Construct centrality dataframe | ||
centrality.dataframe = data.frame(author.name = names(betweenness.centrality.vec), | ||
centrality = as.vector(betweenness.centrality.vec)) | ||
} else if (type == "network.closeness") { | ||
closeness.centrality.vec = igraph::closeness(network) | ||
## Construct centrality dataframe | ||
centrality.dataframe = data.frame(author.name = names(closeness.centrality.vec), | ||
centrality = as.vector(closeness.centrality.vec)) | ||
Comment on lines
+249
to
+253
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure about the Could you please check that with igraph documentation and with small examples of directed networks whether we should use |
||
} else if (type == "network.pagerank") { | ||
pagerank.centrality.vec = igraph::page_rank(network, directed = TRUE)[["vector"]] | ||
## Construct centrality dataframe | ||
centrality.dataframe = data.frame(author.name = names(pagerank.centrality.vec), | ||
centrality = as.vector(pagerank.centrality.vec)) | ||
} else if (type == "network.eccentricity") { | ||
eccentricity.vec = igraph::eccentricity(network) | ||
## since core developers are expected to have a lower eccentricity, | ||
## we need to invert all non-zero values | ||
indices = which(eccentricity.vec > 0) | ||
Comment on lines
+261
to
+263
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From the description it is not clear what happens for zero values... |
||
eccentricity.vec[indices] = max(eccentricity.vec) - eccentricity.vec[indices] | ||
## Construct centrality dataframe | ||
centrality.dataframe = data.frame(author.name = names(eccentricity.vec), | ||
centrality = as.vector(eccentricity.vec)) | ||
} else if (type == "commit.count") { | ||
## Construct centrality dataframe | ||
centrality.dataframe = get.author.commit.count(proj.data) | ||
|
@@ -669,6 +703,146 @@ get.author.class.network.hierarchy = function(network, result.limit = NULL, | |
return(result) | ||
} | ||
|
||
## * Betweenness-based classification -------------------------------------- | ||
|
||
#' Classify authors into "core" and "peripheral" based on the betweenness-centrality of author vertices in the network | ||
#' and return the classification result. | ||
#' | ||
#' The details of the classification algorithm is explained in the documentation of \code{get.author.class.by.type}. | ||
#' | ||
#' @param network the network containing the authors to classify | ||
#' @param result.limit the maximum number of authors contained in the classification result. Only the top | ||
#' \code{result.limit} authors of the classification stack will be contained within the returned | ||
#' classification result. \code{NULL} means that all authors will be returned. [default: NULL] | ||
#' @param restrict.classification.to.authors a vector of author names. Only authors that are contained within this | ||
#' vector are to be classified. Authors that appear in the vector but are not | ||
#' part of the classification result (i.e., they are not present in the | ||
#' underlying data) will be added to it afterwards (with a centrality value | ||
#' of \code{NA}). \code{NULL} means that no restriction is made. | ||
#' [default: NULL] | ||
#' | ||
#' @return the classification result, that is, a list containing two named list members \code{core} and | ||
#' \code{peripheral}, each of which holding the authors classified as core or peripheral, respectively. Both | ||
#' entries in this list (\code{core} and \code{peripheral) are dataframes containing the authors' names in the | ||
#' first column and their centrality values in the second column. | ||
#' | ||
#' @seealso get.author.class.by.type | ||
get.author.class.network.betweenness = function(network, result.limit = NULL, | ||
restrict.classification.to.authors = NULL) { | ||
Comment on lines
+730
to
+731
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indentation wrong. (Also applies to some of the functions below.) |
||
logging::logdebug("get.author.class.network.betweenness: starting.") | ||
|
||
result = get.author.class.by.type(network = network, type = "network.betweenness", result.limit = result.limit, | ||
restrict.classification.to.authors = restrict.classification.to.authors) | ||
|
||
logging::logdebug("get.author.class.network.betweenness: finished.") | ||
return(result) | ||
} | ||
|
||
## * Closeness-based classification -------------------------------------- | ||
|
||
#' Classify authors into "core" and "peripheral" based on the closeness-centrality of author vertices in the network | ||
#' and return the classification result. | ||
#' | ||
#' The details of the classification algorithm is explained in the documentation of \code{get.author.class.by.type}. | ||
#' | ||
#' @param network the network containing the authors to classify | ||
#' @param result.limit the maximum number of authors contained in the classification result. Only the top | ||
#' \code{result.limit} authors of the classification stack will be contained within the returned | ||
#' classification result. \code{NULL} means that all authors will be returned. [default: NULL] | ||
#' @param restrict.classification.to.authors a vector of author names. Only authors that are contained within this | ||
#' vector are to be classified. Authors that appear in the vector but are not | ||
#' part of the classification result (i.e., they are not present in the | ||
#' underlying data) will be added to it afterwards (with a centrality value | ||
#' of \code{NA}). \code{NULL} means that no restriction is made. | ||
#' [default: NULL] | ||
#' | ||
#' @return the classification result, that is, a list containing two named list members \code{core} and | ||
#' \code{peripheral}, each of which holding the authors classified as core or peripheral, respectively. Both | ||
#' entries in this list (\code{core} and \code{peripheral) are dataframes containing the authors' names in the | ||
#' first column and their centrality values in the second column. | ||
#' | ||
#' @seealso get.author.class.by.type | ||
get.author.class.network.closeness = function(network, result.limit = NULL, | ||
restrict.classification.to.authors = NULL) { | ||
logging::logdebug("get.author.class.network.closeness: starting.") | ||
|
||
result = get.author.class.by.type(network = network, type = "network.closeness", result.limit = result.limit, | ||
restrict.classification.to.authors = restrict.classification.to.authors) | ||
|
||
logging::logdebug("get.author.class.network.closeness: finished.") | ||
return(result) | ||
} | ||
|
||
## * Pagerank-based classification -------------------------------------- | ||
|
||
#' Classify authors into "core" and "peripheral" based on the pagerank-centrality of author vertices in the network | ||
#' and return the classification result. | ||
#' | ||
#' The details of the classification algorithm is explained in the documentation of \code{get.author.class.by.type}. | ||
#' | ||
#' @param network the network containing the authors to classify | ||
#' @param result.limit the maximum number of authors contained in the classification result. Only the top | ||
#' \code{result.limit} authors of the classification stack will be contained within the returned | ||
#' classification result. \code{NULL} means that all authors will be returned. [default: NULL] | ||
#' @param restrict.classification.to.authors a vector of author names. Only authors that are contained within this | ||
#' vector are to be classified. Authors that appear in the vector but are not | ||
#' part of the classification result (i.e., they are not present in the | ||
#' underlying data) will be added to it afterwards (with a centrality value | ||
#' of \code{NA}). \code{NULL} means that no restriction is made. | ||
#' [default: NULL] | ||
#' | ||
#' @return the classification result, that is, a list containing two named list members \code{core} and | ||
#' \code{peripheral}, each of which holding the authors classified as core or peripheral, respectively. Both | ||
#' entries in this list (\code{core} and \code{peripheral) are dataframes containing the authors' names in the | ||
#' first column and their centrality values in the second column. | ||
#' | ||
#' @seealso get.author.class.by.type | ||
get.author.class.network.pagerank = function(network, result.limit = NULL, | ||
restrict.classification.to.authors = NULL) { | ||
logging::logdebug("get.author.class.network.pagerank: starting.") | ||
|
||
result = get.author.class.by.type(network = network, type = "network.pagerank", result.limit = result.limit, | ||
restrict.classification.to.authors = restrict.classification.to.authors) | ||
|
||
logging::logdebug("get.author.class.network.pagerank: finished.") | ||
return(result) | ||
} | ||
|
||
## * Eccentricity-based classification -------------------------------------- | ||
|
||
#' Classify authors into "core" and "peripheral" based on the eccentricity of author vertices in the network | ||
#' and return the classification result. | ||
#' | ||
#' The details of the classification algorithm is explained in the documentation of \code{get.author.class.by.type}. | ||
#' | ||
#' @param network the network containing the authors to classify | ||
#' @param result.limit the maximum number of authors contained in the classification result. Only the top | ||
#' \code{result.limit} authors of the classification stack will be contained within the returned | ||
#' classification result. \code{NULL} means that all authors will be returned. [default: NULL] | ||
#' @param restrict.classification.to.authors a vector of author names. Only authors that are contained within this | ||
#' vector are to be classified. Authors that appear in the vector but are not | ||
#' part of the classification result (i.e., they are not present in the | ||
#' underlying data) will be added to it afterwards (with a centrality value | ||
#' of \code{NA}). \code{NULL} means that no restriction is made. | ||
#' [default: NULL] | ||
#' | ||
#' @return the classification result, that is, a list containing two named list members \code{core} and | ||
#' \code{peripheral}, each of which holding the authors classified as core or peripheral, respectively. Both | ||
#' entries in this list (\code{core} and \code{peripheral) are dataframes containing the authors' names in the | ||
#' first column and their eccentricity values in the second column. | ||
#' | ||
#' @seealso get.author.class.by.type | ||
get.author.class.network.eccentricity = function(network, result.limit = NULL, | ||
restrict.classification.to.authors = NULL) { | ||
logging::logdebug("get.author.class.network.eccentricity: starting.") | ||
|
||
result = get.author.class.by.type(network = network, type = "network.eccentricity", result.limit = result.limit, | ||
restrict.classification.to.authors = restrict.classification.to.authors) | ||
|
||
logging::logdebug("get.author.class.network.eccentricity: finished.") | ||
return(result) | ||
} | ||
|
||
## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / | ||
## Count-based classification --------------------------------------------- | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
metric ➡️ metrics
which ➡️ that
And there is no need to capitalize the metrics' names. But please put a comma before the final occurrence of "and".